Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Confidence intervals #1656

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 40 additions & 3 deletions xclim/indices/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def fit(
dist: str | scipy.stats.rv_continuous = "norm",
method: str = "ML",
dim: str = "time",
confidence: float | None = None,
**fitkwargs: Any,
) -> xr.DataArray:
r"""Fit an array to a univariate distribution along the time dimension.
Expand All @@ -90,13 +91,19 @@ def fit(
The PWM method is usually more robust to outliers.
dim : str
The dimension upon which to perform the indexing (default: "time").
confidence : float, optional
If set, the interval for the corresponding confidence is returned along the parameters.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
If set, the interval for the corresponding confidence is returned along the parameters.
If set, the interval for the corresponding confidence is returned along with the parameters.

Default (None), returns only the parameters.
\*\*fitkwargs
Other arguments passed directly to :py:func:`_fitstart` and to the distribution's `fit`.

Returns
-------
xr.DataArray
An array of fitted distribution parameters.
xr.DataArray
Returned if `confidence` is not None. Bounds of the confidence interval.
`confidence * 100` % are the distribution's possible values are inside this range.

Notes
-----
Expand Down Expand Up @@ -165,6 +172,9 @@ def fit(
),
)
out.attrs.update(attrs)
out = out.rename("params")
if confidence is not None:
return out, dist_method("interval", out, confidence, dist).rename("interval")
return out


Expand Down Expand Up @@ -309,6 +319,7 @@ def fa(
dist: str | scipy.stats.rv_continuous = "norm",
mode: str = "max",
method: str = "ML",
confidence: float | None = None,
) -> xr.DataArray:
"""Return the value corresponding to the given return period.

Expand All @@ -329,18 +340,27 @@ def fa(
Fitting method, either maximum likelihood (ML or MLE), method of moments (MOM) or approximate method (APP).
Also accepts probability weighted moments (PWM), also called L-Moments, if `dist` is an instance from the lmoments3 library.
The PWM method is usually more robust to outliers.
confidence : float, optional
If set, the confidence interval for the fit is returned along with the probability of exceedance.
Default (None), returns only the probability of exceedance.

Returns
-------
xarray.DataArray
An array of values with a 1/t probability of exceedance (if mode=='max').
xr.DataArray
Returned if `confidence` is not None. Bounds of the confidence interval.
`confidence * 100` % are the distribution's possible values are inside this range.

See Also
--------
scipy.stats : For descriptions of univariate distribution types.
"""
# Fit the parameters of the distribution
p = fit(da, dist, method=method)
p = fit(da, dist, method=method, confidence=confidence)
if confidence is not None:
confint = p[-1]
p = [0]
t = np.atleast_1d(t)

if mode in ["max", "high"]:
Expand All @@ -359,6 +379,8 @@ def fa(
.assign_coords(return_period=t)
)
out.attrs["mode"] = mode
if confidence is not None:
return out, confint
return out


Expand All @@ -370,6 +392,7 @@ def frequency_analysis(
window: int = 1,
freq: str | None = None,
method: str = "ML",
confidence: float | None = None,
**indexer: int | float | str,
) -> xr.DataArray:
r"""Return the value corresponding to a return period.
Expand All @@ -395,6 +418,9 @@ def frequency_analysis(
Fitting method, either maximum likelihood (ML or MLE), method of moments (MOM) or approximate method (APP).
Also accepts probability weighted moments (PWM), also called L-Moments, if `dist` is an instance from the lmoments3 library.
The PWM method is usually more robust to outliers.
confidence : float, optional
If set, the interval for the corresponding confidence is returned along the probability of exceedance.
Default (None), returns only the probability of exceedance.
\*\*indexer
Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values,
month=1 to select January, or month=[6,7,8] to select summer months. If indexer is not provided, all values are
Expand All @@ -404,6 +430,9 @@ def frequency_analysis(
-------
xarray.DataArray
An array of values with a 1/t probability of exceedance or non-exceedance when mode is high or low respectively.
xr.DataArray
Returned if `confidence` is not None. Bounds of the confidence interval.
`confidence * 100` % are the distribution's possible values are inside this range.

See Also
--------
Expand All @@ -424,7 +453,7 @@ def frequency_analysis(
if uses_dask(sel):
sel = sel.chunk({"time": -1})
# Frequency analysis
return fa(sel, t, dist=dist, mode=mode, method=method)
return fa(sel, t, dist=dist, mode=mode, method=method, confidence=confidence)


def get_dist(dist: str | scipy.stats.rv_continuous):
Expand Down Expand Up @@ -538,7 +567,10 @@ def _dist_method_1D(
array_like
"""
dist = get_dist(dist)
return getattr(dist, function)(*args, **kwargs)
out = getattr(dist, function)(*args, **kwargs)
if isinstance(out, tuple) and len(out) > 1:
return np.stack(out, axis=-1)
return out


def dist_method(
Expand Down Expand Up @@ -578,6 +610,10 @@ def dist_method(
# Typically the data to be transformed
arg = [arg] if arg is not None else []
args = arg + [fit_params.sel(dparams=dp) for dp in fit_params.dparams.values]
if function == "interval":
extra = {"output_core_dims": [["bounds"]]}
else:
extra = {}
return xr.apply_ufunc(
_dist_method_1D,
*args,
Expand All @@ -588,6 +624,7 @@ def dist_method(
},
output_dtypes=[float],
dask="parallelized",
**extra,
)


Expand Down