Source code for verde.vector

"""
Classes for dealing with vector data.
"""
import numpy as np
from sklearn.utils.validation import check_is_fitted

from .base import check_fit_input, least_squares, BaseGridder
from .spline import warn_weighted_exact_solution
from .utils import n_1d_arrays, parse_engine
from .coordinates import get_region

try:
    import numba
    from numba import jit
except ImportError:
    numba = None
    from .utils import dummy_jit as jit


# Default arguments for numba.jit
JIT_ARGS = dict(nopython=True, target="cpu", fastmath=True, parallel=True)


[docs]class Vector(BaseGridder): """ Fit an estimator to each component of multi-component vector data. Provides a convenient way of fitting and gridding vector data using scalar gridders and estimators. Each data component provided to :meth:`~verde.Vector.fit` is fitted to a separated estimator. Methods like :meth:`~verde.Vector.grid` and :meth:`~verde.Vector.predict` will operate on the multiple components simultaneously. .. warning:: Never pass code like this as input to this class: ``[vd.Trend(1)]*3``. This creates 3 references to the **same instance** of ``Trend``, which means that they will all get the same coefficients after fitting. Use a list comprehension instead: ``[vd.Trend(1) for i in range(3)]``. Parameters ---------- components : tuple or list A tuple or list of the estimator/gridder instances used for each component. The estimators will be applied for each data component in the same order that they are given here. Attributes ---------- components : tuple Tuple of the fitted estimators on each component of the data. region_ : tuple The boundaries (``[W, E, S, N]``) of the data used to fit the interpolator. Used as the default region for the :meth:`~verde.Vector.grid` and :meth:`~verde.Vector.scatter` methods. See also -------- verde.Chain : Chain filtering operations to fit on each subsequent output. """ def __init__(self, components): self.components = components
[docs] def fit(self, coordinates, data, weights=None): """ Fit the estimators to the given multi-component data. The data region is captured and used as default for the :meth:`~verde.Vector.grid` and :meth:`~verde.Vector.scatter` methods. All input arrays must have the same shape. If weights are given, there must be a separate array for each component of the data. Parameters ---------- coordinates : tuple of arrays Arrays with the coordinates of each data point. Should be in the following order: (easting, northing, vertical, ...). Only easting and northing will be used, all subsequent coordinates will be ignored. data : tuple of array The data values of each component at each data point. Must be a tuple. weights : None or tuple of array If not None, then the weights assigned to each data point of each data component. Typically, this should be 1 over the data uncertainty squared. Returns ------- self Returns this estimator instance for chaining operations. """ if not isinstance(data, tuple): raise ValueError( "Data must be a tuple of arrays. {} given.".format(type(data)) ) if weights is not None and not isinstance(weights, tuple): raise ValueError( "Weights must be a tuple of arrays. {} given.".format(type(weights)) ) coordinates, data, weights = check_fit_input(coordinates, data, weights) self.region_ = get_region(coordinates[:2]) for estimator, data_comp, weight_comp in zip(self.components, data, weights): estimator.fit(coordinates, data_comp, weight_comp) return self
[docs] def predict(self, coordinates): """ Evaluate each data component on a set of points. Requires a fitted estimator (see :meth:`~verde.Vector.fit`). Parameters ---------- coordinates : tuple of arrays Arrays with the coordinates of each data point. Should be in the following order: (easting, northing, vertical, ...). Only easting and northing will be used, all subsequent coordinates will be ignored. Returns ------- data : tuple of array The values for each vector component evaluated on the given points. The order of components will be the same as was provided to :meth:`~verde.Vector.fit`. """ check_is_fitted(self, ["region_"]) return tuple(comp.predict(coordinates) for comp in self.components)
[docs]class VectorSpline2D(BaseGridder): r""" Elastically coupled interpolation of 2-component vector data. This gridder assumes Cartesian coordinates. Uses the Green's functions based on elastic deformation from [SandwellWessel2016]_. The interpolation is done by estimating point forces that generate an elastic deformation that fits the observed vector data. The deformation equations are based on a 2D elastic sheet with a constant Poisson's ratio. The data can then be predicted at any desired location. The east and north data components are coupled through the elastic deformation equations. This coupling is controlled by the Poisson's ratio, which is usually between -1 and 1. The special case of Poisson's ratio -1 leads to an uncoupled interpolation, meaning that the east and north components don't interfere with each other. The point forces are traditionally placed under each data point. The force locations are set the first time :meth:`~verde.VectorSpline2D.fit` is called. Subsequent calls will fit using the same force locations as the first call. This configuration results in an exact prediction at the data points but can be unstable. [SandwellWessel2016]_ stabilize the solution using Singular Value Decomposition but we use ridge regression instead. The regularization can be controlled using the *damping* argument. Alternatively, you can specify the position of the forces manually using the *force_coords* argument. Regularization or forces not coinciding with data points will result in a least-squares estimate, not an exact solution. Note that the least-squares solution is required for data weights to have any effect. Before fitting, the Jacobian (design, sensitivity, feature, etc) matrix for the spline is normalized using :class:`sklearn.preprocessing.StandardScaler` without centering the mean so that the transformation can be undone in the estimated forces. Parameters ---------- poisson : float The Poisson's ratio for the elastic deformation Green's functions. Default is 0.5. A value of -1 will lead to uncoupled interpolation of the east and north data components. mindist : float A minimum distance between the point forces and data points. Needed because the Green's functions are singular when forces and data points coincide. Acts as a fudge factor. A good rule of thumb is to use the average spacing between data points. damping : None or float The positive damping regularization parameter. Controls how much smoothness is imposed on the estimated forces. If None, no regularization is used. force_coords : None or tuple of arrays The easting and northing coordinates of the point forces. If None (default), then will be set to the data coordinates the first time :meth:`~verde.VectorSpline2D.fit` is called. engine : str Computation engine for the Jacobian matrix and predictions. Can be ``'auto'``, ``'numba'``, or ``'numpy'``. If ``'auto'``, will use numba if it is installed or numpy otherwise. The numba version is multi-threaded and usually faster, which makes fitting and predicting faster. Attributes ---------- force_ : array The estimated forces that fit the observed data. region_ : tuple The boundaries (``[W, E, S, N]``) of the data used to fit the interpolator. Used as the default region for the :meth:`~verde.VectorSpline2D.grid` and :meth:`~verde.VectorSpline2D.scatter` methods. """ def __init__( self, poisson=0.5, mindist=10e3, damping=None, force_coords=None, engine="auto" ): self.poisson = poisson self.mindist = mindist self.damping = damping self.force_coords = force_coords self.engine = engine
[docs] def fit(self, coordinates, data, weights=None): """ Fit the gridder to the given 2-component vector data. The data region is captured and used as default for the :meth:`~verde.VectorSpline2D.grid` and :meth:`~verde.VectorSpline2D.scatter` methods. All input arrays must have the same shape. Parameters ---------- coordinates : tuple of arrays Arrays with the coordinates of each data point. Should be in the following order: (easting, northing, vertical, ...). Only easting and northing will be used, all subsequent coordinates will be ignored. data : tuple of array A tuple ``(east_component, north_component)`` of arrays with the vector data values at each point. weights : None or tuple array If not None, then the weights assigned to each data point. Must be one array per data component. Typically, this should be 1 over the data uncertainty squared. Returns ------- self Returns this estimator instance for chaining operations. """ coordinates, data, weights = check_fit_input( coordinates, data, weights, unpack=False ) if len(data) != 2: raise ValueError( "Need two data components. Only {} given.".format(len(data)) ) # Capture the data region to use as a default when gridding. self.region_ = get_region(coordinates[:2]) if any(w is not None for w in weights): weights = np.concatenate([i.ravel() for i in weights]) else: weights = None warn_weighted_exact_solution(self, weights) data = np.concatenate([i.ravel() for i in data]) if self.force_coords is None: self.force_coords = tuple(i.copy() for i in n_1d_arrays(coordinates, n=2)) jacobian = self.jacobian(coordinates[:2], self.force_coords) self.force_ = least_squares(jacobian, data, weights, self.damping) return self
[docs] def predict(self, coordinates): """ Evaluate the fitted gridder on the given set of points. Requires a fitted estimator (see :meth:`~verde.VectorSpline2D.fit`). Parameters ---------- coordinates : tuple of arrays Arrays with the coordinates of each data point. Should be in the following order: (easting, northing, vertical, ...). Only easting and northing will be used, all subsequent coordinates will be ignored. Returns ------- data : tuple of arrays A tuple ``(east_component, north_component)`` of arrays with the predicted vector data values at each point. """ check_is_fitted(self, ["force_"]) force_east, force_north = self.force_coords east, north = n_1d_arrays(coordinates, n=2) cast = np.broadcast(*coordinates[:2]) npoints = cast.size components = ( np.empty(npoints, dtype=east.dtype), np.empty(npoints, dtype=east.dtype), ) if parse_engine(self.engine) == "numba": components = predict_2d_numba( east, north, force_east, force_north, self.mindist, self.poisson, self.force_, components[0], components[1], ) else: components = predict_2d_numpy( east, north, force_east, force_north, self.mindist, self.poisson, self.force_, components[0], components[1], ) return tuple(comp.reshape(cast.shape) for comp in components)
[docs] def jacobian(self, coordinates, force_coords, dtype="float64"): """ Make the Jacobian matrix for the 2D coupled elastic deformation. The Jacobian is segmented into 4 parts, each relating a force component to a data component [SandwellWessel2016]_:: | J_ee J_ne |*|f_e| = |d_e| | J_ne J_nn | |f_n| |d_n| The forces and data are assumed to be stacked into 1D arrays with the east component on top of the north component. Parameters ---------- coordinates : tuple of arrays Arrays with the coordinates of each data point. Should be in the following order: (easting, northing, vertical, ...). Only easting and northing will be used, all subsequent coordinates will be ignored. force_coords : tuple of arrays Arrays with the coordinates for the forces. Should be in the same order as the coordinate arrays. dtype : str or numpy dtype The type of the Jacobian array. Returns ------- jacobian : 2D array The (n_data*2, n_forces*2) Jacobian matrix. """ force_east, force_north = n_1d_arrays(force_coords, n=2) east, north = n_1d_arrays(coordinates, n=2) jac = np.empty((east.size * 2, force_east.size * 2), dtype=dtype) if parse_engine(self.engine) == "numba": jac = jacobian_2d_numba( east, north, force_east, force_north, self.mindist, self.poisson, jac ) else: jac = jacobian_2d_numpy( east, north, force_east, force_north, self.mindist, self.poisson, jac ) return jac
def greens_func_2d(east, north, mindist, poisson): "Calculate the Green's functions for the 2D elastic case." distance = np.sqrt(east ** 2 + north ** 2) # The mindist factor helps avoid singular matrices when the force and # computation point are too close distance += mindist # Pre-compute common terms for the Green's functions of each component ln_r = (3 - poisson) * np.log(distance) over_r2 = (1 + poisson) / distance ** 2 green_ee = ln_r + over_r2 * north ** 2 green_nn = ln_r + over_r2 * east ** 2 green_ne = -over_r2 * east * north return green_ee, green_nn, green_ne def predict_2d_numpy( east, north, force_east, force_north, mindist, poisson, forces, vec_east, vec_north ): "Calculate the predicted data using numpy." vec_east[:] = 0 vec_north[:] = 0 nforces = forces.size // 2 for j in range(nforces): green_ee, green_nn, green_ne = greens_func_2d( east - force_east[j], north - force_north[j], mindist, poisson ) vec_east += green_ee * forces[j] + green_ne * forces[j + nforces] vec_north += green_ne * forces[j] + green_nn * forces[j + nforces] return vec_east, vec_north def jacobian_2d_numpy(east, north, force_east, force_north, mindist, poisson, jac): "Calculate the Jacobian matrix using numpy broadcasting." npoints = east.size nforces = force_east.size # Reshaping the data coordinates to a column vector will automatically build a # Green's functions matrix between each data point and force. green_ee, green_nn, green_ne = greens_func_2d( east.reshape((npoints, 1)) - force_east, north.reshape((npoints, 1)) - force_north, mindist, poisson, ) jac[:npoints, :nforces] = green_ee jac[npoints:, nforces:] = green_nn jac[:npoints, nforces:] = green_ne jac[npoints:, :nforces] = green_ne # J is symmetric return jac @jit(**JIT_ARGS) def predict_2d_numba( east, north, force_east, force_north, mindist, poisson, forces, vec_east, vec_north ): "Calculate the predicted data using numba to speed things up." nforces = forces.size // 2 for i in numba.prange(east.size): # pylint: disable=not-an-iterable vec_east[i] = 0 vec_north[i] = 0 for j in range(nforces): green_ee, green_nn, green_ne = GREENS_FUNC_2D_JIT( east[i] - force_east[j], north[i] - force_north[j], mindist, poisson ) vec_east[i] += green_ee * forces[j] + green_ne * forces[j + nforces] vec_north[i] += green_ne * forces[j] + green_nn * forces[j + nforces] return vec_east, vec_north @jit(**JIT_ARGS) def jacobian_2d_numba(east, north, force_east, force_north, mindist, poisson, jac): "Calculate the Jacobian matrix using numba to speed things up." nforces = force_east.size npoints = east.size for i in numba.prange(npoints): # pylint: disable=not-an-iterable for j in range(nforces): green_ee, green_nn, green_ne = GREENS_FUNC_2D_JIT( east[i] - force_east[j], north[i] - force_north[j], mindist, poisson ) jac[i, j] = green_ee jac[i + npoints, j + nforces] = green_nn jac[i, j + nforces] = green_ne jac[i + npoints, j] = green_ne # J is symmetric return jac # JIT compile the Greens functions for use in numba functions GREENS_FUNC_2D_JIT = jit(**JIT_ARGS)(greens_func_2d)