# Source code for verde.utils

"""
General utilities.
"""
import functools

import numpy as np
import pandas as pd

def parse_engine(engine):
"""
Choose the best engine available and check if it's valid.

Parameters
----------
engine : str
The name of the engine. If "auto" will favor numba if it's available.

Returns
-------
engine : str
The name of the engine that should be used.

"""
engines = {"auto", "numba", "numpy"}
if engine not in engines:
raise ValueError("Invalid engine '{}'. Must be in {}.".format(engine, engines))
if engine == "auto":
try:
import numba  # pylint: disable=unused-variable

return "numba"
except ImportError:
return "numpy"
return engine

def dummy_jit(**kwargs):  # pylint: disable=unused-argument
"""
Replace numba.jit if not installed with a function that raises RunTimeError.

Use as a decorator.

Parameters
----------
function
A function that you would decorate with :func:numba.jit.

Returns
-------
function
A function that raises :class:RunTimeError warning that numba isn't installed.

"""

def dummy_decorator(function):
"The actual decorator"

@functools.wraps(function)
def dummy_function(*args, **kwargs):  # pylint: disable=unused-argument
"Just raise an exception."
raise RuntimeError("Could not find numba.")

return dummy_function

return dummy_decorator

def n_1d_arrays(arrays, n):
"""
Get the first n elements from a tuple/list, make sure they are arrays, and ravel.

Parameters
----------
arrays : tuple of arrays
The arrays. Can be lists or anything that can be converted to a numpy array
(including numpy arrays).
n : int
How many arrays to return.

Returns
-------
1darrays : tuple of arrays
The converted 1D numpy arrays.

Examples
--------

>>> import numpy as np
>>> arrays = [np.arange(4).reshape(2, 2)]*3
>>> n_1d_arrays(arrays, n=2)
(array([0, 1, 2, 3]), array([0, 1, 2, 3]))

"""
return tuple(np.atleast_1d(i).ravel() for i in arrays[:n])

def check_data(data):
"""
Check the *data* argument and make sure it's a tuple.
If the data is a single array, return it as a tuple with a single element.

This is the default format accepted and used by all gridders and processing
functions.

Examples
--------

>>> check_data([1, 2, 3])
([1, 2, 3],)
>>> check_data(([1, 2], [3, 4]))
([1, 2], [3, 4])
"""
if not isinstance(data, tuple):
data = (data,)
return data

[docs]def variance_to_weights(variance, tol=1e-15, dtype="float64"):
"""
Converts data variances to weights for gridding.

Weights are defined as the inverse of the variance, scaled to the range
[0, 1], i.e. variance.min()/variance.

Any variance that is smaller than *tol* will automatically receive a weight
of 1 to avoid zero division or blown up weights.

Parameters
----------
variance : array or tuple of arrays
An array with the variance of each point. If there are multiple arrays
in a tuple, will calculated weights for each of them separately. Can
have NaNs but they will be converted to zeros and therefore receive a
weight of 1.
tol : float
The tolerance, or cutoff threshold, for small variances.
dtype : str or numpy dtype
The type of the output weights array.

Returns
-------
weights : array or tuple of arrays
Data weights in the range [0, 1] with the same shape as *variance*. If
more than one variance array was provided, then this will be a tuple
with the weights corresponding to each variance array.

Examples
--------

>>> print(variance_to_weights([0, 2, 0.2, 1e-16]))
[1.  0.1 1.  1. ]
>>> print(variance_to_weights([0, 0, 0, 0]))
[1. 1. 1. 1.]
>>> for w  in variance_to_weights(([0, 1, 10], [2, 4.0, 8])):
...     print(w)
[1.  1.  0.1]
[1.   0.5  0.25]

"""
variance = check_data(variance)
weights = []
for var in variance:
var = np.nan_to_num(np.atleast_1d(var), copy=False)
w = np.ones_like(var, dtype=dtype)
nonzero = var > tol
if np.any(nonzero):
nonzero_var = var[nonzero]
w[nonzero] = nonzero_var.min() / nonzero_var
weights.append(w)
if len(weights) == 1:
return weights[0]
return tuple(weights)

[docs]def maxabs(*args):
"""
Calculate the maximum absolute value of the given array(s).

Use this to set the limits of your colorbars and center them on zero.

Parameters
----------
args
One or more arrays. If more than one are given, a single maximum will be
calculated across all arrays.

Returns
-------
maxabs : float
The maximum absolute value across all arrays.

Examples
--------

>>> maxabs((1, -10, 25, 2, 3))
25
>>> maxabs((1, -10.5, 25, 2), (0.1, 100, -500), (-200, -300, -0.1, -499))
500.0

"""
arrays = [np.atleast_1d(i) for i in args]
absolute = [np.abs([i.min(), i.max()]).max() for i in arrays]
return np.max(absolute)

[docs]def grid_to_table(grid):
"""
Convert a grid to a table with the values and coordinates of each point.

Takes a 2D grid as input, extracts the coordinates and runs them through
:func:numpy.meshgrid to create a 2D table. Works for 2D grids and any number of
variables. Use cases includes passing gridded data to functions that expect data in
XYZ format, such as :class:verde.BlockReduce

Parameters
----------
grid : :class:xarray.Dataset
A 2D grid with one or more data variables.

Returns
-------
table : :class:pandas.DataFrame
Table with coordinates and variable values for each point in the grid.

Examples
--------

>>> import xarray as xr
>>> import numpy as np
>>> # Create a sample grid with a single data variable
>>> temperature = xr.DataArray(
...     np.arange(20).reshape((4, 5)),
...     coords=(np.arange(4), np.arange(5, 10)),
...     dims=['northing', 'easting']
... )
>>> grid = xr.Dataset({"temperature": temperature})
>>> table  = grid_to_table(grid)
>>> list(sorted(table.columns))
['easting', 'northing', 'temperature']
>>> print(table.northing.values)
[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3]
>>> print(table.easting.values)
[5 6 7 8 9 5 6 7 8 9 5 6 7 8 9 5 6 7 8 9]
>>> print(table.temperature.values)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
>>> # Grids with multiple data variables will have more columns.
>>> wind_speed = xr.DataArray(
...     np.arange(20, 40).reshape((4, 5)),
...     coords=(np.arange(4), np.arange(5, 10)),
...     dims=['northing', 'easting']
... )
>>> grid['wind_speed'] = wind_speed
>>> table = grid_to_table(grid)
>>> list(sorted(table.columns))
['easting', 'northing', 'temperature', 'wind_speed']
>>> print(table.northing.values)
[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3]
>>> print(table.easting.values)
[5 6 7 8 9 5 6 7 8 9 5 6 7 8 9 5 6 7 8 9]
>>> print(table.temperature.values)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
>>> print(table.wind_speed.values)
[20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39]

"""
coordinate_names = [*grid.coords.keys()]
coord_north = grid.coords[coordinate_names[0]].values
coord_east = grid.coords[coordinate_names[1]].values
coordinates = [i.ravel() for i in np.meshgrid(coord_east, coord_north)]
coord_dict = {
coordinate_names[0]: coordinates[1],
coordinate_names[1]: coordinates[0],
}
variable_name = [*grid.data_vars.keys()]
variable_data = grid.to_array().values
variable_arrays = variable_data.reshape(
len(variable_name), int(len(variable_data.ravel()) / len(variable_name))
)
var_dict = dict(zip(variable_name, variable_arrays))
coord_dict.update(var_dict)
data = pd.DataFrame(coord_dict)
return data