Source code for verde.vector

"""
Classes for dealing with vector data.
"""
import warnings

import numpy as np
from sklearn.utils.validation import check_is_fitted

from .base import n_1d_arrays, check_fit_input, least_squares, BaseGridder
from .spline import warn_weighted_exact_solution
from .utils import parse_engine
from .coordinates import get_region

try:
    import numba
    from numba import jit
except ImportError:
    numba = None
    from .utils import dummy_jit as jit


# Otherwise, DeprecationWarning won't be shown, kind of defeating the purpose.
warnings.simplefilter("default")


[docs]class Vector(BaseGridder):
    """
    Fit an estimator to each component of multi-component vector data.

    Provides a convenient way of fitting and gridding vector data using scalar
    gridders and estimators.

    Each data component provided to :meth:`~verde.Vector.fit` is fitted to a
    separated estimator. Methods like :meth:`~verde.Vector.grid` and
    :meth:`~verde.Vector.predict` will operate on the multiple components
    simultaneously.

    .. warning::

        Never pass code like this as input to this class: ``[vd.Trend(1)]*3``.
        This creates 3 references to the **same instance** of ``Trend``, which
        means that they will all get the same coefficients after fitting. Use a
        list comprehension instead: ``[vd.Trend(1) for i in range(3)]``.

    Parameters
    ----------
    components : tuple or list
        A tuple or list of the estimator/gridder instances used for each
        component. The estimators will be applied for each data component in
        the same order that they are given here.

    Attributes
    ----------
    components : tuple
        Tuple of the fitted estimators on each component of the data.
    region_ : tuple
        The boundaries (``[W, E, S, N]``) of the data used to fit the
        interpolator. Used as the default region for the
        :meth:`~verde.Vector.grid` and :meth:`~verde.Vector.scatter` methods.

    See also
    --------
    verde.Chain : Chain filtering operations to fit on each subsequent output.

    """

    def __init__(self, components):
        super().__init__()
        self.components = components

[docs]    def fit(self, coordinates, data, weights=None):
        """
        Fit the estimators to the given multi-component data.

        The data region is captured and used as default for the
        :meth:`~verde.Vector.grid` and :meth:`~verde.Vector.scatter` methods.

        All input arrays must have the same shape. If weights are given, there
        must be a separate array for each component of the data.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, vertical, ...). Only easting
            and northing will be used, all subsequent coordinates will be
            ignored.
        data : tuple of array
            The data values of each component at each data point. Must be a
            tuple.
        weights : None or tuple of array
            If not None, then the weights assigned to each data point of each
            data component. Typically, this should be 1 over the data
            uncertainty squared.

        Returns
        -------
        self
            Returns this estimator instance for chaining operations.

        """
        if not isinstance(data, tuple):
            raise ValueError(
                "Data must be a tuple of arrays. {} given.".format(type(data))
            )
        if weights is not None and not isinstance(weights, tuple):
            raise ValueError(
                "Weights must be a tuple of arrays. {} given.".format(type(weights))
            )
        coordinates, data, weights = check_fit_input(coordinates, data, weights)
        self.region_ = get_region(coordinates[:2])
        for estimator, data_comp, weight_comp in zip(self.components, data, weights):
            estimator.fit(coordinates, data_comp, weight_comp)
        return self

[docs]    def predict(self, coordinates):
        """
        Evaluate each data component on a set of points.

        Requires a fitted estimator (see :meth:`~verde.Vector.fit`).

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, vertical, ...). Only easting
            and northing will be used, all subsequent coordinates will be
            ignored.

        Returns
        -------
        data : tuple of array
            The values for each vector component evaluated on the given points.
            The order of components will be the same as was provided to
            :meth:`~verde.Vector.fit`.

        """
        check_is_fitted(self, ["region_"])
        return tuple(comp.predict(coordinates) for comp in self.components)


[docs]class VectorSpline2D(BaseGridder):
    r"""
    Elastically coupled interpolation of 2-component vector data.

    .. warning::

        The :class:`~verde.VectorSpline2D` class is deprecated and will be
        removed in Verde v2.0.0. Its usage is restricted to GPS/GNSS data and
        not in the general scope of Verde. Please use the implementation in the
        `Erizo <https://github.com/fatiando/erizo>`__ package instead.

    This gridder assumes Cartesian coordinates.

    Uses the Green's functions based on elastic deformation from
    [SandwellWessel2016]_. The interpolation is done by estimating point forces
    that generate an elastic deformation that fits the observed vector data.
    The deformation equations are based on a 2D elastic sheet with a constant
    Poisson's ratio. The data can then be predicted at any desired location.

    The east and north data components are coupled through the elastic
    deformation equations. This coupling is controlled by the Poisson's ratio,
    which is usually between -1 and 1. The special case of Poisson's ratio -1
    leads to an uncoupled interpolation, meaning that the east and north
    components don't interfere with each other.

    The point forces are traditionally placed under each data point. The force
    locations are set the first time :meth:`~verde.VectorSpline2D.fit` is
    called. Subsequent calls will fit using the same force locations as the
    first call. This configuration results in an exact prediction at the data
    points but can be unstable.

    [SandwellWessel2016]_ stabilize the solution using Singular Value
    Decomposition but we use ridge regression instead. The regularization can
    be controlled using the *damping* argument. Alternatively, you can specify
    the position of the forces manually using the *force_coords* argument.
    Regularization or forces not coinciding with data points will result in a
    least-squares estimate, not an exact solution. Note that the least-squares
    solution is required for data weights to have any effect.

    Before fitting, the Jacobian (design, sensitivity, feature, etc) matrix for
    the spline is normalized using
    :class:`sklearn.preprocessing.StandardScaler` without centering the mean so
    that the transformation can be undone in the estimated forces.

    Parameters
    ----------
    poisson : float
        The Poisson's ratio for the elastic deformation Green's functions.
        Default is 0.5. A value of -1 will lead to uncoupled interpolation of
        the east and north data components.
    mindist : float
        A minimum distance between the point forces and data points. Needed
        because the Green's functions are singular when forces and data points
        coincide. Acts as a fudge factor. A good rule of thumb is to use the
        average spacing between data points.
    damping : None or float
        The positive damping regularization parameter. Controls how much
        smoothness is imposed on the estimated forces. If None, no
        regularization is used.
    force_coords : None or tuple of arrays
        The easting and northing coordinates of the point forces. If None
        (default), then will be set to the data coordinates the first time
        :meth:`~verde.VectorSpline2D.fit` is called.
    engine : str
        Computation engine for the Jacobian matrix and predictions. Can be
        ``'auto'``, ``'numba'``, or ``'numpy'``. If ``'auto'``, will use numba
        if it is installed or numpy otherwise. The numba version is
        multi-threaded and usually faster, which makes fitting and predicting
        faster.

    Attributes
    ----------
    force_ : array
        The estimated forces that fit the observed data.
    region_ : tuple
        The boundaries (``[W, E, S, N]``) of the data used to fit the
        interpolator. Used as the default region for the
        :meth:`~verde.VectorSpline2D.grid` and
        :meth:`~verde.VectorSpline2D.scatter` methods.

    """

    def __init__(
        self, poisson=0.5, mindist=10e3, damping=None, force_coords=None, engine="auto"
    ):
        super().__init__()
        self.poisson = poisson
        self.mindist = mindist
        self.damping = damping
        self.force_coords = force_coords
        self.engine = engine
        warnings.warn(
            "VectorSpline2D is deprecated and will be removed in Verde v2.0.0."
            " Please use the implementation in the Erizo package instead "
            "(https://github.com/fatiando/erizo).",
            DeprecationWarning,
        )

[docs]    def fit(self, coordinates, data, weights=None):
        """
        Fit the gridder to the given 2-component vector data.

        The data region is captured and used as default for the
        :meth:`~verde.VectorSpline2D.grid` and
        :meth:`~verde.VectorSpline2D.scatter` methods.

        All input arrays must have the same shape.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, vertical, ...). Only easting
            and northing will be used, all subsequent coordinates will be
            ignored.
        data : tuple of array
            A tuple ``(east_component, north_component)`` of arrays with the
            vector data values at each point.
        weights : None or tuple array
            If not None, then the weights assigned to each data point. Must be
            one array per data component. Typically, this should be 1 over the
            data uncertainty squared.

        Returns
        -------
        self
            Returns this estimator instance for chaining operations.

        """
        coordinates, data, weights = check_fit_input(
            coordinates, data, weights, unpack=False
        )
        if len(data) != 2:
            raise ValueError(
                "Need two data components. Only {} given.".format(len(data))
            )
        # Capture the data region to use as a default when gridding.
        self.region_ = get_region(coordinates[:2])
        if any(w is not None for w in weights):
            weights = np.concatenate([i.ravel() for i in weights])
        else:
            weights = None
        warn_weighted_exact_solution(self, weights)
        data = np.concatenate([i.ravel() for i in data])
        if self.force_coords is None:
            self.force_coords = tuple(i.copy() for i in n_1d_arrays(coordinates, n=2))
        jacobian = self.jacobian(coordinates[:2], self.force_coords)
        self.force_ = least_squares(jacobian, data, weights, self.damping)
        return self

[docs]    def predict(self, coordinates):
        """
        Evaluate the fitted gridder on the given set of points.

        Requires a fitted estimator (see :meth:`~verde.VectorSpline2D.fit`).

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, vertical, ...). Only easting
            and northing will be used, all subsequent coordinates will be
            ignored.

        Returns
        -------
        data : tuple of arrays
            A tuple ``(east_component, north_component)`` of arrays with the
            predicted vector data values at each point.

        """
        check_is_fitted(self, ["force_"])
        force_east, force_north = self.force_coords
        east, north = n_1d_arrays(coordinates, n=2)
        cast = np.broadcast(*coordinates[:2])
        npoints = cast.size
        components = (
            np.empty(npoints, dtype=east.dtype),
            np.empty(npoints, dtype=east.dtype),
        )
        if parse_engine(self.engine) == "numba":
            components = predict_2d_numba(
                east,
                north,
                force_east,
                force_north,
                self.mindist,
                self.poisson,
                self.force_,
                components[0],
                components[1],
            )
        else:
            components = predict_2d_numpy(
                east,
                north,
                force_east,
                force_north,
                self.mindist,
                self.poisson,
                self.force_,
                components[0],
                components[1],
            )
        return tuple(comp.reshape(cast.shape) for comp in components)

[docs]    def jacobian(self, coordinates, force_coords, dtype="float64"):
        """
        Make the Jacobian matrix for the 2D coupled elastic deformation.

        The Jacobian is segmented into 4 parts, each relating a force component
        to a data component [SandwellWessel2016]_::

            | J_ee  J_ne |*|f_e| = |d_e|
            | J_ne  J_nn | |f_n|   |d_n|

        The forces and data are assumed to be stacked into 1D arrays with the
        east component on top of the north component.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (easting, northing, vertical, ...). Only easting
            and northing will be used, all subsequent coordinates will be
            ignored.
        force_coords : tuple of arrays
            Arrays with the coordinates for the forces. Should be in the same
            order as the coordinate arrays.
        dtype : str or numpy dtype
            The type of the Jacobian array.

        Returns
        -------
        jacobian : 2D array
            The (n_data*2, n_forces*2) Jacobian matrix.

        """
        force_east, force_north = n_1d_arrays(force_coords, n=2)
        east, north = n_1d_arrays(coordinates, n=2)
        jac = np.empty((east.size * 2, force_east.size * 2), dtype=dtype)
        if parse_engine(self.engine) == "numba":
            jac = jacobian_2d_numba(
                east, north, force_east, force_north, self.mindist, self.poisson, jac
            )
        else:
            jac = jacobian_2d_numpy(
                east, north, force_east, force_north, self.mindist, self.poisson, jac
            )
        return jac


def greens_func_2d(east, north, mindist, poisson):
    "Calculate the Green's functions for the 2D elastic case."
    distance = np.sqrt(east ** 2 + north ** 2)
    # The mindist factor helps avoid singular matrices when the force and
    # computation point are too close
    distance += mindist
    # Pre-compute common terms for the Green's functions of each component
    ln_r = (3 - poisson) * np.log(distance)
    over_r2 = (1 + poisson) / distance ** 2
    green_ee = ln_r + over_r2 * north ** 2
    green_nn = ln_r + over_r2 * east ** 2
    green_ne = -over_r2 * east * north
    return green_ee, green_nn, green_ne


def predict_2d_numpy(
    east, north, force_east, force_north, mindist, poisson, forces, vec_east, vec_north
):
    "Calculate the predicted data using numpy."
    vec_east[:] = 0
    vec_north[:] = 0
    nforces = forces.size // 2
    for j in range(nforces):
        green_ee, green_nn, green_ne = greens_func_2d(
            east - force_east[j], north - force_north[j], mindist, poisson
        )
        vec_east += green_ee * forces[j] + green_ne * forces[j + nforces]
        vec_north += green_ne * forces[j] + green_nn * forces[j + nforces]
    return vec_east, vec_north


def jacobian_2d_numpy(east, north, force_east, force_north, mindist, poisson, jac):
    "Calculate the Jacobian matrix using numpy broadcasting."
    npoints = east.size
    nforces = force_east.size
    # Reshaping the data coordinates to a column vector will automatically
    # build a Green's functions matrix between each data point and force.
    green_ee, green_nn, green_ne = greens_func_2d(
        east.reshape((npoints, 1)) - force_east,
        north.reshape((npoints, 1)) - force_north,
        mindist,
        poisson,
    )
    jac[:npoints, :nforces] = green_ee
    jac[npoints:, nforces:] = green_nn
    jac[:npoints, nforces:] = green_ne
    jac[npoints:, :nforces] = green_ne  # J is symmetric
    return jac


@jit(nopython=True, fastmath=True, parallel=True)
def predict_2d_numba(
    east, north, force_east, force_north, mindist, poisson, forces, vec_east, vec_north
):
    "Calculate the predicted data using numba to speed things up."
    nforces = forces.size // 2
    for i in numba.prange(east.size):  # pylint: disable=not-an-iterable
        vec_east[i] = 0
        vec_north[i] = 0
        for j in range(nforces):
            green_ee, green_nn, green_ne = GREENS_FUNC_2D_JIT(
                east[i] - force_east[j], north[i] - force_north[j], mindist, poisson
            )
            vec_east[i] += green_ee * forces[j] + green_ne * forces[j + nforces]
            vec_north[i] += green_ne * forces[j] + green_nn * forces[j + nforces]
    return vec_east, vec_north


@jit(nopython=True, fastmath=True, parallel=True)
def jacobian_2d_numba(east, north, force_east, force_north, mindist, poisson, jac):
    "Calculate the Jacobian matrix using numba to speed things up."
    nforces = force_east.size
    npoints = east.size
    for i in numba.prange(npoints):  # pylint: disable=not-an-iterable
        for j in range(nforces):
            green_ee, green_nn, green_ne = GREENS_FUNC_2D_JIT(
                east[i] - force_east[j], north[i] - force_north[j], mindist, poisson
            )
            jac[i, j] = green_ee
            jac[i + npoints, j + nforces] = green_nn
            jac[i, j + nforces] = green_ne
            jac[i + npoints, j] = green_ne  # J is symmetric
    return jac


# JIT compile the Greens functions for use in numba functions
GREENS_FUNC_2D_JIT = jit(nopython=True, fastmath=True)(greens_func_2d)