"""
Classes for dealing with vector data.
"""
import numpy as np
from sklearn.utils.validation import check_is_fitted
from .base import check_fit_input, least_squares, BaseGridder
from .spline import warn_weighted_exact_solution
from .utils import n_1d_arrays, parse_engine
from .coordinates import get_region
try:
import numba
from numba import jit
except ImportError:
numba = None
from .utils import dummy_jit as jit
# Default arguments for numba.jit
JIT_ARGS = dict(nopython=True, target="cpu", fastmath=True, parallel=True)
[docs]class Vector(BaseGridder):
"""
Fit an estimator to each component of multi-component vector data.
Provides a convenient way of fitting and gridding vector data using scalar gridders
and estimators.
Each data component provided to :meth:`~verde.Vector.fit` is fitted to a separated
estimator. Methods like :meth:`~verde.Vector.grid` and :meth:`~verde.Vector.predict`
will operate on the multiple components simultaneously.
.. warning::
Never pass code like this as input to this class: ``[vd.Trend(1)]*3``. This
creates 3 references to the **same instance** of ``Trend``, which means that
they will all get the same coefficients after fitting. Use a list comprehension
instead: ``[vd.Trend(1) for i in range(3)]``.
Parameters
----------
components : tuple or list
A tuple or list of the estimator/gridder instances used for each component. The
estimators will be applied for each data component in the same order that they
are given here.
Attributes
----------
components : tuple
Tuple of the fitted estimators on each component of the data.
region_ : tuple
The boundaries (``[W, E, S, N]``) of the data used to fit the interpolator. Used
as the default region for the :meth:`~verde.Vector.grid` and
:meth:`~verde.Vector.scatter` methods.
See also
--------
verde.Chain : Chain filtering operations to fit on each subsequent output.
"""
def __init__(self, components):
self.components = components
[docs] def fit(self, coordinates, data, weights=None):
"""
Fit the estimators to the given multi-component data.
The data region is captured and used as default for the
:meth:`~verde.Vector.grid` and :meth:`~verde.Vector.scatter` methods.
All input arrays must have the same shape. If weights are given, there
must be a separate array for each component of the data.
Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...). Only easting
and northing will be used, all subsequent coordinates will be
ignored.
data : tuple of array
The data values of each component at each data point. Must be a
tuple.
weights : None or tuple of array
If not None, then the weights assigned to each data point of each
data component. Typically, this should be 1 over the data
uncertainty squared.
Returns
-------
self
Returns this estimator instance for chaining operations.
"""
if not isinstance(data, tuple):
raise ValueError(
"Data must be a tuple of arrays. {} given.".format(type(data))
)
if weights is not None and not isinstance(weights, tuple):
raise ValueError(
"Weights must be a tuple of arrays. {} given.".format(type(weights))
)
coordinates, data, weights = check_fit_input(coordinates, data, weights)
self.region_ = get_region(coordinates[:2])
for estimator, data_comp, weight_comp in zip(self.components, data, weights):
estimator.fit(coordinates, data_comp, weight_comp)
return self
[docs] def predict(self, coordinates):
"""
Evaluate each data component on a set of points.
Requires a fitted estimator (see :meth:`~verde.Vector.fit`).
Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...). Only easting
and northing will be used, all subsequent coordinates will be
ignored.
Returns
-------
data : tuple of array
The values for each vector component evaluated on the given points. The
order of components will be the same as was provided to
:meth:`~verde.Vector.fit`.
"""
check_is_fitted(self, ["region_"])
return tuple(comp.predict(coordinates) for comp in self.components)
[docs]class VectorSpline2D(BaseGridder):
r"""
Elastically coupled interpolation of 2-component vector data.
This gridder assumes Cartesian coordinates.
Uses the Green's functions based on elastic deformation from [SandwellWessel2016]_.
The interpolation is done by estimating point forces that generate an elastic
deformation that fits the observed vector data. The deformation equations are based
on a 2D elastic sheet with a constant Poisson's ratio. The data can then be
predicted at any desired location.
The east and north data components are coupled through the elastic deformation
equations. This coupling is controlled by the Poisson's ratio, which is usually
between -1 and 1. The special case of Poisson's ratio -1 leads to an uncoupled
interpolation, meaning that the east and north components don't interfere with each
other.
The point forces are traditionally placed under each data point. The force locations
are set the first time :meth:`~verde.VectorSpline2D.fit` is called. Subsequent calls
will fit using the same force locations as the first call. This configuration
results in an exact prediction at the data points but can be unstable.
[SandwellWessel2016]_ stabilize the solution using Singular Value Decomposition but
we use ridge regression instead. The regularization can be controlled using the
*damping* argument. Alternatively, you can specify the position of the forces
manually using the *force_coords* argument. Regularization or forces not coinciding
with data points will result in a least-squares estimate, not an exact solution.
Note that the least-squares solution is required for data weights to have any
effect.
Before fitting, the Jacobian (design, sensitivity, feature, etc) matrix for the
spline is normalized using :class:`sklearn.preprocessing.StandardScaler` without
centering the mean so that the transformation can be undone in the estimated forces.
Parameters
----------
poisson : float
The Poisson's ratio for the elastic deformation Green's functions. Default is
0.5. A value of -1 will lead to uncoupled interpolation of the east and north
data components.
mindist : float
A minimum distance between the point forces and data points. Needed because the
Green's functions are singular when forces and data points coincide. Acts as a
fudge factor. A good rule of thumb is to use the average spacing between data
points.
damping : None or float
The positive damping regularization parameter. Controls how much smoothness is
imposed on the estimated forces. If None, no regularization is used.
force_coords : None or tuple of arrays
The easting and northing coordinates of the point forces. If None (default),
then will be set to the data coordinates the first time
:meth:`~verde.VectorSpline2D.fit` is called.
engine : str
Computation engine for the Jacobian matrix and predictions. Can be ``'auto'``,
``'numba'``, or ``'numpy'``. If ``'auto'``, will use numba if it is installed or
numpy otherwise. The numba version is multi-threaded and usually faster, which
makes fitting and predicting faster.
Attributes
----------
force_ : array
The estimated forces that fit the observed data.
region_ : tuple
The boundaries (``[W, E, S, N]``) of the data used to fit the
interpolator. Used as the default region for the
:meth:`~verde.VectorSpline2D.grid` and :meth:`~verde.VectorSpline2D.scatter`
methods.
"""
def __init__(
self, poisson=0.5, mindist=10e3, damping=None, force_coords=None, engine="auto"
):
self.poisson = poisson
self.mindist = mindist
self.damping = damping
self.force_coords = force_coords
self.engine = engine
[docs] def fit(self, coordinates, data, weights=None):
"""
Fit the gridder to the given 2-component vector data.
The data region is captured and used as default for the
:meth:`~verde.VectorSpline2D.grid` and :meth:`~verde.VectorSpline2D.scatter`
methods.
All input arrays must have the same shape.
Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...). Only easting
and northing will be used, all subsequent coordinates will be
ignored.
data : tuple of array
A tuple ``(east_component, north_component)`` of arrays with the
vector data values at each point.
weights : None or tuple array
If not None, then the weights assigned to each data point. Must be
one array per data component. Typically, this should be 1 over the
data uncertainty squared.
Returns
-------
self
Returns this estimator instance for chaining operations.
"""
coordinates, data, weights = check_fit_input(
coordinates, data, weights, unpack=False
)
if len(data) != 2:
raise ValueError(
"Need two data components. Only {} given.".format(len(data))
)
# Capture the data region to use as a default when gridding.
self.region_ = get_region(coordinates[:2])
if any(w is not None for w in weights):
weights = np.concatenate([i.ravel() for i in weights])
else:
weights = None
warn_weighted_exact_solution(self, weights)
data = np.concatenate([i.ravel() for i in data])
if self.force_coords is None:
self.force_coords = tuple(i.copy() for i in n_1d_arrays(coordinates, n=2))
jacobian = self.jacobian(coordinates[:2], self.force_coords)
self.force_ = least_squares(jacobian, data, weights, self.damping)
return self
[docs] def predict(self, coordinates):
"""
Evaluate the fitted gridder on the given set of points.
Requires a fitted estimator (see :meth:`~verde.VectorSpline2D.fit`).
Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...). Only easting
and northing will be used, all subsequent coordinates will be
ignored.
Returns
-------
data : tuple of arrays
A tuple ``(east_component, north_component)`` of arrays with the
predicted vector data values at each point.
"""
check_is_fitted(self, ["force_"])
force_east, force_north = self.force_coords
east, north = n_1d_arrays(coordinates, n=2)
cast = np.broadcast(*coordinates[:2])
npoints = cast.size
components = (
np.empty(npoints, dtype=east.dtype),
np.empty(npoints, dtype=east.dtype),
)
if parse_engine(self.engine) == "numba":
components = predict_2d_numba(
east,
north,
force_east,
force_north,
self.mindist,
self.poisson,
self.force_,
components[0],
components[1],
)
else:
components = predict_2d_numpy(
east,
north,
force_east,
force_north,
self.mindist,
self.poisson,
self.force_,
components[0],
components[1],
)
return tuple(comp.reshape(cast.shape) for comp in components)
[docs] def jacobian(self, coordinates, force_coords, dtype="float64"):
"""
Make the Jacobian matrix for the 2D coupled elastic deformation.
The Jacobian is segmented into 4 parts, each relating a force component to a
data component [SandwellWessel2016]_::
| J_ee J_ne |*|f_e| = |d_e|
| J_ne J_nn | |f_n| |d_n|
The forces and data are assumed to be stacked into 1D arrays with the east
component on top of the north component.
Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...). Only easting and
northing will be used, all subsequent coordinates will be ignored.
force_coords : tuple of arrays
Arrays with the coordinates for the forces. Should be in the same order as
the coordinate arrays.
dtype : str or numpy dtype
The type of the Jacobian array.
Returns
-------
jacobian : 2D array
The (n_data*2, n_forces*2) Jacobian matrix.
"""
force_east, force_north = n_1d_arrays(force_coords, n=2)
east, north = n_1d_arrays(coordinates, n=2)
jac = np.empty((east.size * 2, force_east.size * 2), dtype=dtype)
if parse_engine(self.engine) == "numba":
jac = jacobian_2d_numba(
east, north, force_east, force_north, self.mindist, self.poisson, jac
)
else:
jac = jacobian_2d_numpy(
east, north, force_east, force_north, self.mindist, self.poisson, jac
)
return jac
def greens_func_2d(east, north, mindist, poisson):
"Calculate the Green's functions for the 2D elastic case."
distance = np.sqrt(east ** 2 + north ** 2)
# The mindist factor helps avoid singular matrices when the force and
# computation point are too close
distance += mindist
# Pre-compute common terms for the Green's functions of each component
ln_r = (3 - poisson) * np.log(distance)
over_r2 = (1 + poisson) / distance ** 2
green_ee = ln_r + over_r2 * north ** 2
green_nn = ln_r + over_r2 * east ** 2
green_ne = -over_r2 * east * north
return green_ee, green_nn, green_ne
def predict_2d_numpy(
east, north, force_east, force_north, mindist, poisson, forces, vec_east, vec_north
):
"Calculate the predicted data using numpy."
vec_east[:] = 0
vec_north[:] = 0
nforces = forces.size // 2
for j in range(nforces):
green_ee, green_nn, green_ne = greens_func_2d(
east - force_east[j], north - force_north[j], mindist, poisson
)
vec_east += green_ee * forces[j] + green_ne * forces[j + nforces]
vec_north += green_ne * forces[j] + green_nn * forces[j + nforces]
return vec_east, vec_north
def jacobian_2d_numpy(east, north, force_east, force_north, mindist, poisson, jac):
"Calculate the Jacobian matrix using numpy broadcasting."
npoints = east.size
nforces = force_east.size
# Reshaping the data coordinates to a column vector will automatically build a
# Green's functions matrix between each data point and force.
green_ee, green_nn, green_ne = greens_func_2d(
east.reshape((npoints, 1)) - force_east,
north.reshape((npoints, 1)) - force_north,
mindist,
poisson,
)
jac[:npoints, :nforces] = green_ee
jac[npoints:, nforces:] = green_nn
jac[:npoints, nforces:] = green_ne
jac[npoints:, :nforces] = green_ne # J is symmetric
return jac
@jit(**JIT_ARGS)
def predict_2d_numba(
east, north, force_east, force_north, mindist, poisson, forces, vec_east, vec_north
):
"Calculate the predicted data using numba to speed things up."
nforces = forces.size // 2
for i in numba.prange(east.size): # pylint: disable=not-an-iterable
vec_east[i] = 0
vec_north[i] = 0
for j in range(nforces):
green_ee, green_nn, green_ne = GREENS_FUNC_2D_JIT(
east[i] - force_east[j], north[i] - force_north[j], mindist, poisson
)
vec_east[i] += green_ee * forces[j] + green_ne * forces[j + nforces]
vec_north[i] += green_ne * forces[j] + green_nn * forces[j + nforces]
return vec_east, vec_north
@jit(**JIT_ARGS)
def jacobian_2d_numba(east, north, force_east, force_north, mindist, poisson, jac):
"Calculate the Jacobian matrix using numba to speed things up."
nforces = force_east.size
npoints = east.size
for i in numba.prange(npoints): # pylint: disable=not-an-iterable
for j in range(nforces):
green_ee, green_nn, green_ne = GREENS_FUNC_2D_JIT(
east[i] - force_east[j], north[i] - force_north[j], mindist, poisson
)
jac[i, j] = green_ee
jac[i + npoints, j + nforces] = green_nn
jac[i, j + nforces] = green_ne
jac[i + npoints, j] = green_ne # J is symmetric
return jac
# JIT compile the Greens functions for use in numba functions
GREENS_FUNC_2D_JIT = jit(**JIT_ARGS)(greens_func_2d)