# Copyright (c) 2017 The Verde Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
2D polynomial trends.
"""
import numpy as np
from sklearn.utils.validation import check_is_fitted
from .base import BaseGridder, check_fit_input, least_squares, n_1d_arrays
from .coordinates import get_region
[docs]
class Trend(BaseGridder):
r"""
Fit a 2D polynomial trend to spatial data.
The polynomial of degree :math:`N` is defined as:
.. math::
f(e, n) = \sum\limits_{l=0}^{N}\sum\limits_{m=0}^{N - l} e^l n^m
in which :math:`e` and :math:`n` are the easting and northing coordinates,
respectively.
The trend is estimated through weighted least-squares regression. The
Jacobian (design, sensitivity, feature, etc) matrix for the regression is
normalized using :class:`sklearn.preprocessing.StandardScaler` without
centering the mean so that the transformation can be undone in the
estimated coefficients.
Parameters
----------
degree : int
The degree of the polynomial. Must be >= 0 (a degree of zero would
estimate the mean of the data).
Attributes
----------
coef_ : array
The estimated polynomial coefficients that fit the observed data.
region_ : tuple
The boundaries (``[W, E, S, N]``) of the data used to fit the
interpolator. Used as the default region for the
:meth:`~verde.Trend.grid` and :meth:`~verde.Trend.scatter` methods.
Examples
--------
>>> from verde import grid_coordinates
>>> import numpy as np
>>> coordinates = grid_coordinates((1, 5, -5, -1), shape=(5, 5))
>>> data = 10 + 2*coordinates[0] - 0.4*coordinates[1]
>>> trend = Trend(degree=1).fit(coordinates, data)
>>> print(
... "Coefficients:",
... ', '.join(['{:.1f}'.format(i) for i in trend.coef_])
... )
Coefficients: 10.0, 2.0, -0.4
>>> np.allclose(trend.predict(coordinates), data)
True
A zero degree polynomial estimates the mean of the data:
>>> mean = Trend(degree=0).fit(coordinates, data)
>>> np.allclose(mean.predict(coordinates), data.mean())
True
>>> print("Data mean:", '{:.2f}'.format(data.mean()))
Data mean: 17.20
>>> print("Coefficient:", '{:.2f}'.format(mean.coef_[0]))
Coefficient: 17.20
We can use weights to account for outliers or data points with variable
uncertainties (see :func:`verde.variance_to_weights`):
>>> data_out = data.copy()
>>> data_out[2, 2] += 500
>>> weights = np.ones_like(data)
>>> weights[2, 2] = 1e-10
>>> trend_out = Trend(degree=1).fit(coordinates, data_out, weights)
>>> # Still recover the coefficients even with the added outlier
>>> print(
... "Coefficients:",
... ', '.join(['{:.1f}'.format(i) for i in trend_out.coef_])
... )
Coefficients: 10.0, 2.0, -0.4
>>> # The residual at the outlier location should be values we added to
>>> # that point
>>> residual = data_out - trend_out.predict(coordinates)
>>> print('{:.2f}'.format(residual[2, 2]))
500.00
"""
def __init__(self, degree):
super().__init__()
self.degree = degree
[docs]
def fit(self, coordinates, data, weights=None):
"""
Fit the trend to the given data.
The data region is captured and used as default for the
:meth:`~verde.Trend.grid` and :meth:`~verde.Trend.scatter` methods.
All input arrays must have the same shape.
Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...). Only easting
and northing will be used, all subsequent coordinates will be
ignored.
data : array
The data values of each data point.
weights : None or array
If not None, then the weights assigned to each data point.
Typically, this should be 1 over the data uncertainty squared.
Returns
-------
self
Returns this estimator instance for chaining operations.
"""
coordinates, data, weights = check_fit_input(coordinates, data, weights)
easting, northing = n_1d_arrays(coordinates, 2)
self.region_ = get_region((easting, northing))
jac = self.jacobian((easting, northing), dtype=data.dtype)
self.coef_ = least_squares(jac, data, weights, damping=None)
return self
[docs]
def predict(self, coordinates):
"""
Evaluate the polynomial trend on the given set of points.
Requires a fitted estimator (see :meth:`~verde.Trend.fit`).
Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...). Only easting
and northing will be used, all subsequent coordinates will be
ignored.
Returns
-------
data : array
The trend values evaluated on the given points.
"""
check_is_fitted(self, ["coef_"])
easting, northing = n_1d_arrays(coordinates, 2)
shape = np.broadcast(*coordinates[:2]).shape
data = np.zeros(easting.size, dtype=easting.dtype)
combinations = polynomial_power_combinations(self.degree)
for coef, (i, j) in zip(self.coef_, combinations):
data += (easting**i) * (northing**j) * coef
return data.reshape(shape)
[docs]
def jacobian(self, coordinates, dtype="float64"):
"""
Make the Jacobian matrix for a 2D polynomial.
Each column of the Jacobian is ``easting**i * northing**j`` for each
(i, j) pair in the polynomial.
Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...). Only easting
and northing will be used, all subsequent coordinates will be
ignored.
dtype : str or numpy dtype
The type of the output Jacobian numpy array.
Returns
-------
jacobian : 2D array
The (n_data, n_coefficients) Jacobian matrix.
Examples
--------
>>> import numpy as np
>>> east = np.linspace(0, 4, 5)
>>> north = np.linspace(-5, -1, 5)
>>> print(Trend(degree=1).jacobian((east, north), dtype=int))
[[ 1 0 -5]
[ 1 1 -4]
[ 1 2 -3]
[ 1 3 -2]
[ 1 4 -1]]
>>> print(Trend(degree=2).jacobian((east, north), dtype=int))
[[ 1 0 -5 0 0 25]
[ 1 1 -4 1 -4 16]
[ 1 2 -3 4 -6 9]
[ 1 3 -2 9 -6 4]
[ 1 4 -1 16 -4 1]]
"""
easting, northing = n_1d_arrays(coordinates, 2)
if easting.shape != northing.shape:
raise ValueError("Coordinate arrays must have the same shape.")
combinations = polynomial_power_combinations(self.degree)
ndata = easting.size
nparams = len(combinations)
out = np.empty((ndata, nparams), dtype=dtype)
for col, (i, j) in enumerate(combinations):
out[:, col] = (easting**i) * (northing**j)
return out
def polynomial_power_combinations(degree):
"""
Combinations of powers for a 2D polynomial of a given degree.
Produces the (i, j) pairs to evaluate the polynomial with ``x**i*y**j``.
Parameters
----------
degree : int
The degree of the 2D polynomial. Must be >= 1.
Returns
-------
combinations : tuple
A tuple with ``(i, j)`` pairs.
Examples
--------
>>> print(polynomial_power_combinations(1))
((0, 0), (1, 0), (0, 1))
>>> print(polynomial_power_combinations(2))
((0, 0), (1, 0), (0, 1), (2, 0), (1, 1), (0, 2))
>>> # This is a long polynomial so split it in two lines
>>> print(" ".join([str(c) for c in polynomial_power_combinations(3)]))
(0, 0) (1, 0) (0, 1) (2, 0) (1, 1) (0, 2) (3, 0) (2, 1) (1, 2) (0, 3)
>>> # A degree zero polynomial would be just the mean
>>> print(polynomial_power_combinations(0))
((0, 0),)
"""
if degree < 0:
raise ValueError("Invalid polynomial degree '{}'. Must be >= 0.".format(degree))
combinations = ((i, j) for j in range(degree + 1) for i in range(degree + 1 - j))
return tuple(sorted(combinations, key=sum))