Skip to content

Commit

Permalink
Merge branch 'master' into fix-cssa
Browse files Browse the repository at this point in the history
  • Loading branch information
Zeitsperre committed Jun 20, 2023
2 parents 28856ec + 3b2bff6 commit c0a8244
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 17 deletions.
3 changes: 2 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ New features and enhancements
* New variables: Snowfall rate ``prsnd`` and surface maximum wind speed ``sfcWindmax``. (:issue:`1352`, :pull:`1358`).
* Docstring for `freq` links to pandas offset aliases documentation. (:issue:`1310`, :pull:`1392`).
* New function ``xclim.indces.run_length.extract_events`` for determining runs whose starting and stopping points are defined through run length conditions. (:pull:`1256`).
* Stats functions `frequency_analysis` now takes `method` parameter to select other fitting methods such as PWM or MOM. (:issue:`1168`, :pull:`1398`)

Bug fixes
^^^^^^^^^
Expand All @@ -49,7 +50,7 @@ Internal changes
* Two new GitHub CI Actions have been added to the existing Workflows (:pull:`1390`):
* `actions/add-to-project`: Automatically adds issues to the `xclim` project.
* `saadmk11/github-actions-version-updater`: Updates GitHub Action versions in all Workflows (triggered monthly).

* Added `method` parameter to `frequency_analysis` and `fa`. (:issue:`1168`, :pull:`1398`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/frequency_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"The next step is to fit the statistical distribution on these maxima. This is done by the `.fit` method, which takes as argument the sample series, the distribution's name and the parameter estimation method. The fit is done by default using the Maximum Likelihood algorithm. For some extreme value distributions, however, the maximum likelihood is not always robust, and `xclim` offers the possibility to use Probability Weighted Moments (PWM) to estimate parameters. Note that the `lmoments3` package which is used by `xclim` to compute the PWM only supports `expon`, `gamma`, `genextreme`, `genpareto`, `gumbel_r`, `pearson3` and `weibull_min`."
"The next step is to fit the statistical distribution on these maxima. This is done by the `.fit` method, which takes as argument the sample series, the distribution's name and the parameter estimation `method`. The fit is done by default using the Maximum Likelihood algorithm (`method=\"ML\"`). For some extreme value distributions, however, the maximum likelihood is not always robust, and `xclim` offers the possibility to use Probability Weighted Moments (`method=\"PWM\"`) to estimate parameters. Note that the `lmoments3` package which is used by `xclim` to compute the PWM only supports `expon`, `gamma`, `genextreme`, `genpareto`, `gumbel_r`, `pearson3` and `weibull_min`. Parameters can also be estimated using the method of moments (`method=\"MM\"`)."
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.43.15-beta
current_version = 0.43.16-beta
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+).(?P<patch>\d+)(\-(?P<release>[a-z]+))?
Expand Down
32 changes: 29 additions & 3 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@

@pytest.fixture(params=[True, False])
def fitda(request):
nx, ny, nt = 2, 3, 50
nx, ny, nt = 2, 3, 100
x = np.arange(nx)
y = np.arange(ny)

time = xr.cftime_range("2045-02-02", periods=nt, freq="D")

da = xr.DataArray(
np.random.lognormal(10, 1, (nt, nx, ny)),
np.random.lognormal(2, 1, (nt, nx, ny)),
dims=("time", "x", "y"),
coords={"time": time, "x": x, "y": y},
)
Expand Down Expand Up @@ -122,7 +122,11 @@ def genextreme(request):

class TestFit:
def test_fit(self, fitda):
p = stats.fit(fitda, "lognorm")
p = stats.fit(fitda, "lognorm", method="ML")

# Test with explicit MLE (vs ML should be synonyms)
p2 = stats.fit(fitda, "lognorm", method="MLE")
np.testing.assert_array_almost_equal(p.values, p2.values)

assert p.dims[0] == "dparams"
assert p.get_axis_num("dparams") == 0
Expand All @@ -134,6 +138,11 @@ def test_fit(self, fitda):
assert cdf.shape == (fitda.x.size, fitda.y.size)
assert p.attrs["estimator"] == "Maximum likelihood"

# Test with MM
pm = stats.fit(fitda, "lognorm", method="MM")
mm, mv = lognorm(*pm.values).stats()
np.testing.assert_allclose(np.exp(2 + 1 / 2), mm, rtol=0.5)


def test_weibull_min_fit(weibull_min):
"""Check ML fit with a series that leads to poor values without good initial conditions."""
Expand All @@ -156,6 +165,13 @@ def test_fa(fitda):
np.testing.assert_array_equal(q[0, 0, 0], q0)


def test_fa_gamma(fitda):
T = 10
q = stats.fa(fitda, T, "lognorm", method="MM")
q1 = stats.fa(fitda, T, "gamma", method="PWM")
np.testing.assert_allclose(q1, q, rtol=0.2)


def test_fit_nan(fitda):
da = fitda.where((fitda.x > 0) & (fitda.y > 0))
out_nan = stats.fit(da, "lognorm")
Expand Down Expand Up @@ -252,6 +268,16 @@ def test_frequency_analysis(ndq_series, use_dask):
q.transpose(), mode="max", t=2, dist="genextreme", window=6, freq="YS"
)

# Test with PWM fitting method
out1 = stats.frequency_analysis(
q, mode="max", t=2, dist="genextreme", window=6, freq="YS", method="PWM"
)
np.testing.assert_allclose(
out1,
out,
rtol=0.5,
)


@pytest.mark.parametrize("use_dask", [True, False])
def test_parametric_quantile(use_dask):
Expand Down
2 changes: 1 addition & 1 deletion xclim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

__author__ = """Travis Logan"""
__email__ = "logan.travis@ouranos.ca"
__version__ = "0.43.15-beta"
__version__ = "0.43.16-beta"


# Load official locales
Expand Down
41 changes: 31 additions & 10 deletions xclim/indices/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,11 @@ def _fitfunc_1d(arr, *, dist, nparams, method, **fitkwargs):
return np.asarray([np.nan] * nparams)

# Estimate parameters
if method == "ML":
if method in ["ML", "MLE"]:
args, kwargs = _fit_start(x, dist.name, **fitkwargs)
params = dist.fit(x, *args, **kwargs, **fitkwargs)
params = dist.fit(x, *args, method="mle", **kwargs, **fitkwargs)
elif method == "MM":
params = dist.fit(x, method="mm", **fitkwargs)
elif method == "PWM":
params = list(dist.lmom_fit(x).values())
elif method == "APP":
Expand Down Expand Up @@ -91,9 +93,9 @@ def fit(
Name of the univariate distribution, such as beta, expon, genextreme, gamma, gumbel_r, lognorm, norm
(see :py:mod:scipy.stats for full list). If the PWM method is used, only the following distributions are
currently supported: 'expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'.
method : {"ML", "PWM", "APP"}
Fitting method, either maximum likelihood (ML), probability weighted moments (PWM),
also called L-Moments, or approximate method (APP).
method : {"ML" or "MLE", "MM", "PWM", "APP"}
Fitting method, either maximum likelihood (ML or MLE), method of moments (MM),
probability weighted moments (PWM), also called L-Moments, or approximate method (APP).
The PWM method is usually more robust to outliers.
dim : str
The dimension upon which to perform the indexing (default: "time").
Expand All @@ -110,11 +112,16 @@ def fit(
Coordinates for which all values are NaNs will be dropped before fitting the distribution. If the array still
contains NaNs, the distribution parameters will be returned as NaNs.
"""
method = method.upper()
method_name = {
"ML": "maximum likelihood",
"MM": "method of moments",
"MLE": "maximum likelihood",
"PWM": "probability weighted moments",
"APP": "approximative method",
}
if method not in method_name:
raise ValueError(f"Fitting method not recognized: {method}")

# Get the distribution
dc = get_dist(dist)
Expand All @@ -135,7 +142,7 @@ def fit(
keep_attrs=True,
kwargs=dict(
# Don't know how APP should be included, this works for now
dist=dc if method in ["ML", "APP"] else lm3dc,
dist=dc if method in ["ML", "MLE", "MM", "APP"] else lm3dc,
nparams=len(dist_params),
method=method,
**fitkwargs,
Expand Down Expand Up @@ -295,7 +302,11 @@ def func(x):


def fa(
da: xr.DataArray, t: int | Sequence, dist: str = "norm", mode: str = "max"
da: xr.DataArray,
t: int | Sequence,
dist: str = "norm",
mode: str = "max",
method: str = "ML",
) -> xr.DataArray:
"""Return the value corresponding to the given return period.
Expand All @@ -310,6 +321,10 @@ def fa(
Name of the univariate distribution, such as `beta`, `expon`, `genextreme`, `gamma`, `gumbel_r`, `lognorm`, `norm`
mode : {'min', 'max}
Whether we are looking for a probability of exceedance (max) or a probability of non-exceedance (min).
method : {"ML" or "MLE", "MOM", "PWM", "APP"}
Fitting method, either maximum likelihood (ML or MLE), method of moments (MOM),
probability weighted moments (PWM), also called L-Moments, or approximate method (APP).
The PWM method is usually more robust to outliers.
Returns
-------
Expand All @@ -321,7 +336,7 @@ def fa(
scipy.stats : For descriptions of univariate distribution types.
"""
# Fit the parameters of the distribution
p = fit(da, dist)
p = fit(da, dist, method=method)
t = np.atleast_1d(t)

if mode in ["max", "high"]:
Expand Down Expand Up @@ -350,6 +365,7 @@ def frequency_analysis(
dist: str,
window: int = 1,
freq: str | None = None,
method: str = "ML",
**indexer,
) -> xr.DataArray:
"""Return the value corresponding to a return period.
Expand All @@ -370,6 +386,10 @@ def frequency_analysis(
freq : str
Resampling frequency. If None, the frequency is assumed to be 'YS' unless the indexer is season='DJF',
in which case `freq` would be set to `AS-DEC`.
method : {"ML" or "MLE", "MOM", "PWM", "APP"}
Fitting method, either maximum likelihood (ML or MLE), method of moments (MOM),
probability weighted moments (PWM), also called L-Moments, or approximate method (APP).
The PWM method is usually more robust to outliers.
indexer : {dim: indexer, }, optional
Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values,
month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are
Expand Down Expand Up @@ -399,7 +419,7 @@ def frequency_analysis(
if uses_dask(sel):
sel = sel.chunk({"time": -1})
# Frequency analysis
return fa(sel, t, dist, mode)
return fa(sel, t, dist=dist, mode=mode, method=method)


def get_dist(dist):
Expand All @@ -417,7 +437,8 @@ def get_lm3_dist(dist):
"""Return a distribution object from `lmoments3.distr`."""
if dist not in _lm3_dist_map:
raise ValueError(
f"The {dist} distribution is not supported by `lmoments3` or `xclim`."
f"The PWM fitting method cannot be used with the {dist} distribution, as it is not supported "
f"by `lmoments3`."
)

return getattr(lmoments3.distr, _lm3_dist_map[dist])
Expand Down

0 comments on commit c0a8244

Please sign in to comment.