diff --git a/CHANGES.rst b/CHANGES.rst index f6ad34277..4e706f662 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,12 +4,20 @@ Changelog v0.49.0 (unreleased) -------------------- -Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Juliette Lavoie (:user:`juliettelavoie`). +Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Juliette Lavoie (:user:`juliettelavoie`), Éric Dupuis (:user:`coxipi`). Announcements ^^^^^^^^^^^^^ * `xclim` has migrated its development branch name from `master` to `main`. (:issue:`1667`, :pull:`1669`). +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* Distributions with negative values are directly fitted without need for an offset for distributions such as `gamma, fisk` in ``xclim.indices.standardized_precipitation_evapotranspiration_index``. (:issue:`1477` :pull:`1653`). + +Breaking changes +^^^^^^^^^^^^^^^^ +* Argument `offset` is removed from ``xclim.indices.standardized_precipitation_evapotranspiration_index``. This will slightly change results when using SPEI with default values (:issue:`1477` :pull:`1653`). + Bug fixes ^^^^^^^^^ * Fixed an bug in sdba's ``map_groups`` that prevented passing DataArrays with cftime coordinates if the ``sdba_encode_cf`` option was True. (:issue:`1673`, :pull:`1674`). @@ -21,7 +29,6 @@ Internal changes ^^^^^^^^^^^^^^^^ * Added "doymin" and "doymax" to the possible operations of ``generic.stats``. Fixed a warning issue when ``op`` was "integral". (:pull:`1672`). - v0.48.2 (2024-02-26) -------------------- Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`). diff --git a/tests/test_indices.py b/tests/test_indices.py index e815b1766..931fe6f53 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -695,6 +695,8 @@ def test_standardized_precipitation_evapotranspiration_index( tas = tasmax - 2.5 tasmin = tasmax - 5 wb = xci.water_budget(pr, None, tasmin, tasmax, tas) + # offset like in climate indices library + wb = wb + convert_units_to("1 mm/d", wb, context="hydro") params = xci.stats.standardized_index_fit_params( wb.sel(time=slice("1950", "1980")), @@ -702,7 +704,6 @@ def test_standardized_precipitation_evapotranspiration_index( window=window, dist=dist, method=method, - offset="1 mm/d", ) spei = xci.standardized_precipitation_evapotranspiration_index( wb.sel(time=slice("1998", "2000")), params=params diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py index cdfff68e7..5653aaabd 100644 --- a/xclim/indices/_agro.py +++ b/xclim/indices/_agro.py @@ -24,11 +24,7 @@ ) from xclim.indices.generic import aggregate_between_dates, get_zones from xclim.indices.helpers import _gather_lat, day_lengths -from xclim.indices.stats import ( - preprocess_standardized_index, - standardized_index, - standardized_index_fit_params, -) +from xclim.indices.stats import standardized_index # Frequencies : YS: year start, QS-DEC: seasons starting in december, MS: month start # See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html @@ -1094,16 +1090,15 @@ def _get_rain_season(pram): @declare_units( pr="[precipitation]", - pr_cal="[precipitation]", params="[]", ) def standardized_precipitation_index( pr: xarray.DataArray, - pr_cal: Quantified | None = None, freq: str | None = "MS", window: int = 1, dist: str = "gamma", method: str = "APP", + fitkwargs: dict = {}, cal_start: DateStr | None = None, cal_end: DateStr | None = None, params: Quantified | None = None, @@ -1115,9 +1110,6 @@ def standardized_precipitation_index( ---------- pr : xarray.DataArray Daily precipitation. - pr_cal : xarray.DataArray, optional - Daily precipitation used for calibration. Usually this is a temporal subset of `pr` over some reference period. - This option will be removed in xclim >=0.47.0. Two behaviour will be possible (see below) freq : str, optional Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling has already been applied input dataset and will skip the resampling step. @@ -1129,6 +1121,8 @@ def standardized_precipitation_index( method : {'APP', 'ML'} Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method uses a deterministic function that doesn't involve any optimization. + fitkwargs : dict + Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). cal_start : DateStr, optional Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. Default option `None` means that the calibration period begins at the start of the input dataset. @@ -1189,75 +1183,33 @@ def standardized_precipitation_index( ---------- :cite:cts:`mckee_relationship_1993` """ - if params is not None and pr_cal is None: - freq, window, indexer = ( - params.attrs[s] for s in ["freq", "window", "time_indexer"] - ) - # Unpack attrs to None and {} if needed - freq = None if freq == "" else freq - indexer = {} if indexer[0] == "" else {indexer[0]: indexer[1:]} - if cal_start or cal_end: - warnings.warn( - "Expected either `cal_{start|end}` or `params`, got both. The `params` input overrides other inputs." - "If `cal_start`, `cal_end`, `freq`, `window`, and/or `dist` were given as input, they will be ignored." + dist_methods = {"gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} + if dist in dist_methods.keys(): + if method not in dist_methods[dist]: + raise NotImplementedError( + f"{method} method is not implemented for {dist} distribution" ) - - if pr_cal is not None: - warnings.warn( - "Inputting a calibration array will be deprecated in xclim>=0.47.0. " - "For example, if `pr_cal` is a subset of `pr`, then instead of say:\n" - "`standardized_precipitation_index(pr=pr,pr_cal=pr.sel(time=slice(t0,t1)),...)`,\n" - "one can call:\n" - "`standardized_precipitation_index(pr=pr,cal_range=(t0,t1),...).\n" - "If for some reason `pr_cal` is not a subset of `pr`, then the following approach will still be possible:\n" - "`params = standardized_index_fit_params(da=pr_cal, freq=freq, window=window, dist=dist, method=method)`.\n" - "`spi = standardized_precipitation_index(pr=pr, params=params)`.\n" - "This approach can be used in both scenarios to break up the computations in two," - "i.e. get params, then compute standardized indices." - ) - params = standardized_index_fit_params( - pr_cal, freq=freq, window=window, dist=dist, method=method, **indexer - ) - - pr, _ = preprocess_standardized_index(pr, freq=freq, window=window, **indexer) - if params is None: - params = standardized_index_fit_params( - pr.sel(time=slice(cal_start, cal_end)), - freq=None, - window=1, - dist=dist, - method=method, - ) - - # If params only contains a subset of main dataset time grouping - # (e.g. 8/12 months, etc.), it needs to be broadcasted - template = pr.groupby(params.attrs["group"]).first() - paramsd = {k: v for k, v in params.sizes.items() if k != "dparams"} - if paramsd != template.sizes: - params = params.broadcast_like(template) - - spi = standardized_index(pr, params) - spi.attrs = params.attrs - spi.attrs["freq"] = (freq or xarray.infer_freq(spi.time)) or "undefined" - spi.attrs["window"] = window - spi.attrs["units"] = "" + else: + raise NotImplementedError(f"{dist} distribution is not implemented yet") + spi = standardized_index( + pr, freq, window, dist, method, fitkwargs, cal_start, cal_end, params, **indexer + ) return spi @declare_units( wb="[precipitation]", - wb_cal="[precipitation]", offset="[precipitation]", params="[]", ) def standardized_precipitation_evapotranspiration_index( wb: xarray.DataArray, - wb_cal: Quantified | None = None, freq: str | None = "MS", window: int = 1, dist: str = "gamma", method: str = "APP", - offset: Quantified = "1.000 mm/d", + fitkwargs: dict = {}, + offset: Quantified = "", cal_start: DateStr | None = None, cal_end: DateStr | None = None, params: Quantified | None = None, @@ -1273,9 +1225,6 @@ def standardized_precipitation_evapotranspiration_index( ---------- wb : xarray.DataArray Daily water budget (pr - pet). - wb_cal : xarray.DataArray, optional - Daily water budget used for calibration. Usually this is a temporal subset of `wb` over some reference period. - This option will be removed in xclim >=0.47.0. Two behaviours will be possible (see below). freq : str, optional Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling has already been applied input dataset and will skip the resampling step. @@ -1289,6 +1238,12 @@ def standardized_precipitation_evapotranspiration_index( `PWM` (probability weighted moments). The approximate method uses a deterministic function that doesn't involve any optimization. Available methods vary with the distribution: 'gamma':{'APP', 'ML', 'PWM'}, 'fisk':{'APP', 'ML'} + fitkwargs : dict + Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). + offset : Quantified + For distributions bounded by zero (e.g. "gamma", "fisk"), the two-parameters distributions only accept positive + values. An offset can be added to make sure this is the case. This option will be removed in xclim >=0.49.0, ``xclim`` + will rely on three-parameters distributions instead. cal_start : DateStr, optional Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. Default option `None` means that the calibration period begins at the start of the input dataset. @@ -1299,11 +1254,6 @@ def standardized_precipitation_evapotranspiration_index( Fit parameters. The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. The output can be given here as input, and it overrides other options. - offset : Quantified - For distributions bounded by zero (e.g. "gamma", "fisk"), an offset must be added to the water budget - to make sure there are no negative values. - Keep the offset as small as possible to minimize its influence on the results. - This can be given as a precipitation flux or a rate. \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. @@ -1323,7 +1273,9 @@ def standardized_precipitation_evapotranspiration_index( See Standardized Precipitation Index (SPI) for more details on usage. """ - uses_default_offset = offset == "1.000 mm/d" + uses_default_offset = offset != "" + if uses_default_offset is False: + warnings.warn("Inputting an offset will be deprecated in xclim>=0.49.0. ") if params is not None: params_offset = params.attrs["offset"] if uses_default_offset is False and offset != params_offset: @@ -1337,20 +1289,26 @@ def standardized_precipitation_evapotranspiration_index( if dist in ["gamma", "fisk"] and offset <= 0: raise ValueError( "The water budget must be shifted towards positive values to be used with `gamma` and `fisk` " - "distributions which are bounded by zero. A positive offset is required. Current value: " - f"{offset}{wb.attrs['units']}." + "distributions which are bounded by zero (only when using two-parameters distributions: in xclim>=0.49.0," + "three-parameters distributions are used to accommodate negative values). Only positive offsets are accepted." ) + # Note that the default behaviour would imply an offset for any distribution, even those distributions # that can accommodate negative values of the water budget. This needs to be changed in future versions # of the index. if offset != 0: with xarray.set_options(keep_attrs=True): wb = wb + offset - if wb_cal is not None: - wb_cal = wb_cal + offset - - spei = standardized_precipitation_index( - wb, wb_cal, freq, window, dist, method, cal_start, cal_end, params, **indexer + dist_methods = {"gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} + if dist in dist_methods.keys(): + if method not in dist_methods[dist]: + raise NotImplementedError( + f"{method} method is not implemented for {dist} distribution" + ) + else: + raise NotImplementedError(f"{dist} distribution is not implemented yet") + spei = standardized_index( + wb, freq, window, dist, method, fitkwargs, cal_start, cal_end, params, **indexer ) return spei diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py index 8c5a559a6..b9a7f3123 100644 --- a/xclim/indices/stats.py +++ b/xclim/indices/stats.py @@ -13,7 +13,7 @@ from xclim.core.calendar import compare_offsets, resample_doy, select_time from xclim.core.formatting import prefix_attrs, unprefix_attrs, update_history from xclim.core.units import convert_units_to -from xclim.core.utils import Quantified, uses_dask +from xclim.core.utils import DateStr, Quantified, uses_dask from . import generic @@ -490,27 +490,49 @@ def _fit_start(x, dist: str, **fitkwargs: Any) -> tuple[tuple, dict]: return (chat,), {"loc": loc, "scale": scale} if dist in ["gamma"]: - x_pos = x[x > 0] + if "floc" in fitkwargs: + loc0 = fitkwargs["floc"] + else: + xs = sorted(x) + x1, x2, xn = xs[0], xs[1], xs[-1] + n = len(x) + cv = x.std() / x.mean() + p = 100 * (0.48265 + 0.32967 * cv) * n ** (-0.2984 * cv) + xp = np.percentile(x, p) + loc0 = (x1 * xn - xp**2) / (x1 + xn - 2 * xp) + loc0 = loc0 if loc0 < x1 else 0.9999 * x1 + x_pos = x - loc0 m = x_pos.mean() log_of_mean = np.log(m) mean_of_logs = np.log(x_pos).mean() - a = log_of_mean - mean_of_logs - alpha = (1 + np.sqrt(1 + 4 * a / 3)) / (4 * a) - beta = m / alpha - return (alpha,), {"scale": beta} + A = log_of_mean - mean_of_logs + a0 = (1 + np.sqrt(1 + 4 * A / 3)) / (4 * A) + scale0 = m / a0 + kwargs = {"scale": scale0, "loc": loc0} + return (a0,), kwargs if dist in ["fisk"]: - x_pos = x[x > 0] + if "floc" in fitkwargs: + loc0 = fitkwargs["floc"] + else: + xs = sorted(x) + x1, x2, xn = xs[0], xs[1], xs[-1] + loc0 = (x1 * xn - x2**2) / (x1 + xn - 2 * x2) + loc0 = loc0 if loc0 < x1 else 0.9999 * x1 + x_pos = x - loc0 m = x_pos.mean() - v = x_pos.var() - # pdf = (beta/alpha) (x/alpha)^{beta-1}/ (1+(x/alpha)^{beta})^2 - # Compute f_1 and f_2 which only depend on beta: - # f_1 := mean/alpha = /alpha - # f_2 := variance/alpha^2 = ( - ^2)/alpha^2 - # In the large beta limit, f_1 -> 1 and f_1/sqrt(f_2) -> 0.56*beta - 0.25 - # Solve for alpha and beta below: - alpha, beta = m, (1 / 0.56) * (m / np.sqrt(v) + 1 / 4) - return (beta,), {"scale": alpha} + m2 = (x_pos**2).mean() + # method of moments: + # LHS is computed analytically with the two-parameters log-logistic distribution + # and depends on alpha,beta + # RHS is from the sample + # = m + # / ^2 = m2/m**2 + # solving these equations yields + scale0 = 2 * m**3 / (m2 + m**2) + c0 = np.pi * m / np.sqrt(3) / np.sqrt(m2 - m**2) + kwargs = {"scale": scale0, "loc": loc0} + return (c0,), kwargs return (), {} @@ -538,6 +560,7 @@ def _dist_method_1D( array_like """ dist = get_dist(dist) + return getattr(dist, function)(*args, **kwargs) @@ -660,6 +683,7 @@ def standardized_index_fit_params( window: int, dist: str | scipy.stats.rv_continuous, method: str, + fitkwargs: dict = {}, offset: Quantified | None = None, **indexer, ) -> xr.DataArray: @@ -685,9 +709,8 @@ def standardized_index_fit_params( method : {'ML', 'APP', 'PWM'} Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method uses a deterministic function that doesn't involve any optimization. - offset: Quantified - Distributions bounded by zero (e.g. "gamma", "fisk") can be used for datasets with negative values - by using an offset: `da + offset`. + fitkwargs : dict + Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. @@ -702,7 +725,15 @@ def standardized_index_fit_params( Supported combinations of `dist` and `method` are: * Gamma ("gamma") : "ML", "APP", "PWM" * Log-logistic ("fisk") : "ML", "APP" + + "APP" method only supports two-parameter distributions. Parameter `loc` will be set to 0 (setting `floc=0` in `fitkwargs`). """ + if offset: + warnings.warn("Inputting an offset will be deprecated in xclim>=0.49.0. ") + if offset is not None: + with xr.set_options(keep_attrs=True): + da = da + convert_units_to(offset, da, context="hydro") + # "WPM" method doesn't seem to work for gamma or pearson3 dist_and_methods = {"gamma": ["ML", "APP", "PWM"], "fisk": ["ML", "APP"]} dist = get_dist(dist) @@ -712,13 +743,38 @@ def standardized_index_fit_params( raise NotImplementedError( f"The method `{method}` is not supported for distribution `{dist.name}`." ) - - if offset is not None: - with xr.set_options(keep_attrs=True): - da = da + convert_units_to(offset, da, context="hydro") - + if method == "APP": + if "floc" in fitkwargs.keys(): + if fitkwargs["floc"] != 0: + raise ValueError( + "The APP method is only supported for two-parameter distributions with `gamma` or `fisk`. `loc` parameter must be set 0." + "Pass `floc=0` in `fitkwargs`." + ) + else: + fitkwargs["floc"] = 0 + warnings.warn( + "The APP method is only supported for two-parameter distributions with `gamma` or `fisk`." + "Location parameter `loc` will be set to 0. To avoid this warning, set `floc=0` manually in `fitkwargs`." + ) da, group = preprocess_standardized_index(da, freq, window, **indexer) - params = da.groupby(group).map(fit, dist=dist, method=method) + + # convert floc units if needed + # this should be passed to scipy eventually, so it should not support strings + # with units. Perhaps using `fitkwargs` in this context is misleading? + for fpar in ["floc", "fscale"]: + if fpar in fitkwargs.keys(): + if np.isscalar(fitkwargs[fpar]) is False: + fitkwargs[fpar] = convert_units_to(fitkwargs[fpar], da, context="hydro") + + # Use zero inflated distributions + # Idea: Perhaps having specific zero-inflated distributions or an option passed + # to the standardized index (SPI/SPEI) to state we want to use zero-inflated + # would be a better way to organize this + if da.min() == 0 and dist.name in ["gamma", "fisk"]: + da0 = da.where(da > 0).copy() + else: + da0 = da + params = da0.groupby(group).map(fit, dist=dist, method=method, **fitkwargs) cal_range = ( da.time.min().dt.strftime("%Y-%m-%d").item(), da.time.max().dt.strftime("%Y-%m-%d").item(), @@ -731,20 +787,27 @@ def standardized_index_fit_params( "method": method, "group": group, "units": "", - "offset": offset or "", } method, args = ("", []) if indexer == {} else indexer.popitem() params.attrs["time_indexer"] = (method, *args) + params.attrs["offset"] = offset or "" return params def standardized_index( da: xr.DataArray, - params: xr.DataArray, - dist: str | scipy.stats.rv_continuous | None = None, -): - """Compute standardized index for given fit parameters. + freq: str | None, + window: int, + dist: str | scipy.stats.rv_continuous | None, + method: str, + fitkwargs: dict, + cal_start: DateStr | None, + cal_end: DateStr | None, + params: Quantified | None, + **indexer, +) -> xr.DataArray: + r"""Standardized Index (SI). This computes standardized indices which measure the deviation of variables in the dataset compared to a reference distribution. The reference is a statistical distribution computed with fitting parameters `params` @@ -754,15 +817,81 @@ def standardized_index( Parameters ---------- da : xarray.DataArray - Input array. - Resampling and rolling operations should have already been applied using - ``xclim.indices.preprocess_standardized_index``. + Daily input data. + freq : str, optional + Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling + has already been applied input dataset and will skip the resampling step. + window : int + Averaging window length relative to the resampling frequency. For example, if `freq="MS"`, + i.e. a monthly resampling, the window is an integer number of months. + dist : str or rv_continuous + Name of the univariate distribution. (see :py:mod:`scipy.stats`). + method : str + Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method + uses a deterministic function that doesn't involve any optimization. + cal_start : DateStr, optional + Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period begins at the start of the input dataset. + cal_end : DateStr, optional + End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period finishes at the end of the input dataset. params : xarray.DataArray - Fit parameters computed using ``xclim.indices.stats.standardized_index_fit_params``. - dist : str or rv_continuous, optional - Name of distribution or instance. Defaults to the "scipy_dist" attribute of `params`. + Fit parameters. + The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. + The output can be given here as input, and it overrides other options. + \*\*indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + + Returns + ------- + xarray.DataArray, [unitless] + Standardized Precipitation Index. + + Notes + ----- + * The length `N` of the N-month SPI is determined by choosing the `window = N`. + * Supported statistical distributions are: ["gamma", "fisk"], where "fisk" is scipy's implementation of + a log-logistic distribution + * If `params` is given as input, it overrides the `cal_start`, `cal_end`, `freq` and `window`, `dist` and `method` options. + * The standardized index is bounded by ±8.21. 8.21 is the largest standardized index as constrained by the float64 precision in + the inversion to the normal distribution. + + References + ---------- + :cite:cts:`mckee_relationship_1993` """ + if params is not None: + freq, window, indexer = ( + params.attrs[s] for s in ["freq", "window", "time_indexer"] + ) + # Unpack attrs to None and {} if needed + freq = None if freq == "" else freq + indexer = {} if indexer[0] == "" else {indexer[0]: indexer[1:]} + if cal_start or cal_end: + warnings.warn( + "Expected either `cal_{start|end}` or `params`, got both. The `params` input overrides other inputs." + "If `cal_start`, `cal_end`, `freq`, `window`, and/or `dist` were given as input, they will be ignored." + ) + + da, _ = preprocess_standardized_index(da, freq=freq, window=window, **indexer) + if params is None: + params = standardized_index_fit_params( + da.sel(time=slice(cal_start, cal_end)), + freq=None, + window=1, + dist=dist, + method=method, + fitkwargs=fitkwargs, + ) + + # If params only contains a subset of main dataset time grouping + # (e.g. 8/12 months, etc.), it needs to be broadcasted group = params.attrs["group"] + template = da.groupby(group).first() + paramsd = {k: v for k, v in params.sizes.items() if k != "dparams"} + if paramsd != template.sizes: + params = params.broadcast_like(template) def reindex_time(da, da_ref): if group == "time.dayofyear": @@ -772,20 +901,21 @@ def reindex_time(da, da_ref): da["time"] = da_ref.time return da if not uses_dask(da) else da.chunk({"time": -1}) + # this should be restricted to some distributions / in some context probs_of_zero = da.groupby(group).map( lambda x: (x == 0).sum("time") / x.notnull().sum("time") ) params, probs_of_zero = (reindex_time(dax, da) for dax in [params, probs_of_zero]) + # should `da` below exclude zeros? dist_probs = dist_method("cdf", params, da, dist=dist) probs = probs_of_zero + ((1 - probs_of_zero) * dist_probs) - params_norm = xr.DataArray( [0, 1], dims=["dparams"], coords=dict(dparams=(["loc", "scale"])), attrs=dict(scipy_dist="norm"), ) - std_index = dist_method("ppf", params_norm, probs) + si = dist_method("ppf", params_norm, probs) # A cdf value of 0 or 1 gives ±np.inf when inverted to the normal distribution. # The neighbouring values 0.00...1 and 0.99...9 with a float64 give a standardized index of ± 8.21 # We use this index as reference for maximal/minimal standardized values. @@ -793,5 +923,10 @@ def reindex_time(da, da_ref): # notes that precipitation events (over a month/season) generally occur less than 100 times in the US. # Values of standardized index outside the range [-3.09, 3.09] correspond to probabilities smaller than 0.001 # or greater than 0.999, which could be considered non-statistically relevant for this sample size. - std_index = std_index.clip(-8.21, 8.21) - return std_index + si = si.clip(-8.21, 8.21) + + si.attrs = params.attrs + si.attrs["freq"] = (freq or xr.infer_freq(si.time)) or "undefined" + si.attrs["window"] = window + si.attrs["units"] = "" + return si