Merge branch 'master' into fix-cssa

Ouranosinc · Jun 20, 2023 · c0a8244 · c0a8244
2 parents 28856ec + 3b2bff6
commit c0a8244
Show file tree

Hide file tree

Showing 6 changed files with 65 additions and 17 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -28,6 +28,7 @@ New features and enhancements
 * New variables: Snowfall rate ``prsnd`` and surface maximum wind speed ``sfcWindmax``. (:issue:`1352`, :pull:`1358`).
 * Docstring for `freq` links to pandas offset aliases documentation. (:issue:`1310`, :pull:`1392`).
 * New function ``xclim.indces.run_length.extract_events`` for determining runs whose starting and stopping points are defined through run length conditions. (:pull:`1256`).
+* Stats functions `frequency_analysis` now takes `method` parameter to select other fitting methods such as PWM or MOM. (:issue:`1168`, :pull:`1398`)
 
 Bug fixes
 ^^^^^^^^^
@@ -49,7 +50,7 @@ Internal changes
 * Two new GitHub CI Actions have been added to the existing Workflows (:pull:`1390`):
     * `actions/add-to-project`: Automatically adds issues to the `xclim` project.
     * `saadmk11/github-actions-version-updater`: Updates GitHub Action versions in all Workflows (triggered monthly).
-
+* Added `method` parameter to `frequency_analysis` and `fa`. (:issue:`1168`, :pull:`1398`).
 
 Breaking changes
 ^^^^^^^^^^^^^^^^

diff --git a/docs/notebooks/frequency_analysis.ipynb b/docs/notebooks/frequency_analysis.ipynb
@@ -102,7 +102,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The next step is to fit the statistical distribution on these maxima. This is done by the `.fit` method, which takes as argument the sample series, the distribution's name and the parameter estimation method. The fit is done by default using the Maximum Likelihood algorithm. For some extreme value distributions, however, the maximum likelihood is not always robust, and `xclim` offers the possibility to use Probability Weighted Moments (PWM) to estimate parameters. Note that the `lmoments3` package which is used by `xclim` to compute the PWM only supports `expon`, `gamma`, `genextreme`, `genpareto`, `gumbel_r`, `pearson3` and `weibull_min`."
+    "The next step is to fit the statistical distribution on these maxima. This is done by the `.fit` method, which takes as argument the sample series, the distribution's name and the parameter estimation `method`. The fit is done by default using the Maximum Likelihood algorithm (`method=\"ML\"`). For some extreme value distributions, however, the maximum likelihood is not always robust, and `xclim` offers the possibility to use Probability Weighted Moments (`method=\"PWM\"`) to estimate parameters. Note that the `lmoments3` package which is used by `xclim` to compute the PWM only supports `expon`, `gamma`, `genextreme`, `genpareto`, `gumbel_r`, `pearson3` and `weibull_min`. Parameters can also be estimated using the method of moments (`method=\"MM\"`)."
    ]
   },
   {

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.43.15-beta
+current_version = 0.43.16-beta
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+).(?P<patch>\d+)(\-(?P<release>[a-z]+))?

diff --git a/tests/test_stats.py b/tests/test_stats.py
@@ -11,14 +11,14 @@
 
 @pytest.fixture(params=[True, False])
 def fitda(request):
-    nx, ny, nt = 2, 3, 50
+    nx, ny, nt = 2, 3, 100
     x = np.arange(nx)
     y = np.arange(ny)
 
     time = xr.cftime_range("2045-02-02", periods=nt, freq="D")
 
     da = xr.DataArray(
-        np.random.lognormal(10, 1, (nt, nx, ny)),
+        np.random.lognormal(2, 1, (nt, nx, ny)),
         dims=("time", "x", "y"),
         coords={"time": time, "x": x, "y": y},
     )
@@ -122,7 +122,11 @@ def genextreme(request):
 
 class TestFit:
     def test_fit(self, fitda):
-        p = stats.fit(fitda, "lognorm")
+        p = stats.fit(fitda, "lognorm", method="ML")
+
+        # Test with explicit MLE (vs ML should be synonyms)
+        p2 = stats.fit(fitda, "lognorm", method="MLE")
+        np.testing.assert_array_almost_equal(p.values, p2.values)
 
         assert p.dims[0] == "dparams"
         assert p.get_axis_num("dparams") == 0
@@ -134,6 +138,11 @@ def test_fit(self, fitda):
         assert cdf.shape == (fitda.x.size, fitda.y.size)
         assert p.attrs["estimator"] == "Maximum likelihood"
 
+        # Test with MM
+        pm = stats.fit(fitda, "lognorm", method="MM")
+        mm, mv = lognorm(*pm.values).stats()
+        np.testing.assert_allclose(np.exp(2 + 1 / 2), mm, rtol=0.5)
+
 
 def test_weibull_min_fit(weibull_min):
     """Check ML fit with a series that leads to poor values without good initial conditions."""
@@ -156,6 +165,13 @@ def test_fa(fitda):
     np.testing.assert_array_equal(q[0, 0, 0], q0)
 
 
+def test_fa_gamma(fitda):
+    T = 10
+    q = stats.fa(fitda, T, "lognorm", method="MM")
+    q1 = stats.fa(fitda, T, "gamma", method="PWM")
+    np.testing.assert_allclose(q1, q, rtol=0.2)
+
+
 def test_fit_nan(fitda):
     da = fitda.where((fitda.x > 0) & (fitda.y > 0))
     out_nan = stats.fit(da, "lognorm")
@@ -252,6 +268,16 @@ def test_frequency_analysis(ndq_series, use_dask):
         q.transpose(), mode="max", t=2, dist="genextreme", window=6, freq="YS"
     )
 
+    # Test with PWM fitting method
+    out1 = stats.frequency_analysis(
+        q, mode="max", t=2, dist="genextreme", window=6, freq="YS", method="PWM"
+    )
+    np.testing.assert_allclose(
+        out1,
+        out,
+        rtol=0.5,
+    )
+
 
 @pytest.mark.parametrize("use_dask", [True, False])
 def test_parametric_quantile(use_dask):

diff --git a/xclim/__init__.py b/xclim/__init__.py
@@ -11,7 +11,7 @@
 
 __author__ = """Travis Logan"""
 __email__ = "logan.travis@ouranos.ca"
-__version__ = "0.43.15-beta"
+__version__ = "0.43.16-beta"
 
 
 # Load official locales

diff --git a/xclim/indices/stats.py b/xclim/indices/stats.py
@@ -53,9 +53,11 @@ def _fitfunc_1d(arr, *, dist, nparams, method, **fitkwargs):
         return np.asarray([np.nan] * nparams)
 
     # Estimate parameters
-    if method == "ML":
+    if method in ["ML", "MLE"]:
         args, kwargs = _fit_start(x, dist.name, **fitkwargs)
-        params = dist.fit(x, *args, **kwargs, **fitkwargs)
+        params = dist.fit(x, *args, method="mle", **kwargs, **fitkwargs)
+    elif method == "MM":
+        params = dist.fit(x, method="mm", **fitkwargs)
     elif method == "PWM":
         params = list(dist.lmom_fit(x).values())
     elif method == "APP":
@@ -91,9 +93,9 @@ def fit(
         Name of the univariate distribution, such as beta, expon, genextreme, gamma, gumbel_r, lognorm, norm
         (see :py:mod:scipy.stats for full list). If the PWM method is used, only the following distributions are
         currently supported: 'expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'.
-    method : {"ML", "PWM", "APP"}
-        Fitting method, either maximum likelihood (ML), probability weighted moments (PWM),
-        also called L-Moments, or approximate method (APP).
+    method : {"ML" or "MLE", "MM", "PWM", "APP"}
+        Fitting method, either maximum likelihood (ML or MLE), method of moments (MM),
+        probability weighted moments (PWM), also called L-Moments, or approximate method (APP).
         The PWM method is usually more robust to outliers.
     dim : str
         The dimension upon which to perform the indexing (default: "time").
@@ -110,11 +112,16 @@ def fit(
     Coordinates for which all values are NaNs will be dropped before fitting the distribution. If the array still
     contains NaNs, the distribution parameters will be returned as NaNs.
     """
+    method = method.upper()
     method_name = {
         "ML": "maximum likelihood",
+        "MM": "method of moments",
+        "MLE": "maximum likelihood",
         "PWM": "probability weighted moments",
         "APP": "approximative method",
     }
+    if method not in method_name:
+        raise ValueError(f"Fitting method not recognized: {method}")
 
     # Get the distribution
     dc = get_dist(dist)
@@ -135,7 +142,7 @@ def fit(
         keep_attrs=True,
         kwargs=dict(
             # Don't know how APP should be included, this works for now
-            dist=dc if method in ["ML", "APP"] else lm3dc,
+            dist=dc if method in ["ML", "MLE", "MM", "APP"] else lm3dc,
             nparams=len(dist_params),
             method=method,
             **fitkwargs,
@@ -295,7 +302,11 @@ def func(x):
 
 
 def fa(
-    da: xr.DataArray, t: int | Sequence, dist: str = "norm", mode: str = "max"
+    da: xr.DataArray,
+    t: int | Sequence,
+    dist: str = "norm",
+    mode: str = "max",
+    method: str = "ML",
 ) -> xr.DataArray:
     """Return the value corresponding to the given return period.
 
@@ -310,6 +321,10 @@ def fa(
         Name of the univariate distribution, such as `beta`, `expon`, `genextreme`, `gamma`, `gumbel_r`, `lognorm`, `norm`
     mode : {'min', 'max}
         Whether we are looking for a probability of exceedance (max) or a probability of non-exceedance (min).
+    method : {"ML" or "MLE", "MOM", "PWM", "APP"}
+        Fitting method, either maximum likelihood (ML or MLE), method of moments (MOM),
+        probability weighted moments (PWM), also called L-Moments, or approximate method (APP).
+        The PWM method is usually more robust to outliers.
 
     Returns
     -------
@@ -321,7 +336,7 @@ def fa(
     scipy.stats : For descriptions of univariate distribution types.
     """
     # Fit the parameters of the distribution
-    p = fit(da, dist)
+    p = fit(da, dist, method=method)
     t = np.atleast_1d(t)
 
     if mode in ["max", "high"]:
@@ -350,6 +365,7 @@ def frequency_analysis(
     dist: str,
     window: int = 1,
     freq: str | None = None,
+    method: str = "ML",
     **indexer,
 ) -> xr.DataArray:
     """Return the value corresponding to a return period.
@@ -370,6 +386,10 @@ def frequency_analysis(
     freq : str
         Resampling frequency. If None, the frequency is assumed to be 'YS' unless the indexer is season='DJF',
         in which case `freq` would be set to `AS-DEC`.
+    method : {"ML" or "MLE", "MOM", "PWM", "APP"}
+        Fitting method, either maximum likelihood (ML or MLE), method of moments (MOM),
+        probability weighted moments (PWM), also called L-Moments, or approximate method (APP).
+        The PWM method is usually more robust to outliers.
     indexer : {dim: indexer, }, optional
         Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values,
         month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, all values are
@@ -399,7 +419,7 @@ def frequency_analysis(
     if uses_dask(sel):
         sel = sel.chunk({"time": -1})
     # Frequency analysis
-    return fa(sel, t, dist, mode)
+    return fa(sel, t, dist=dist, mode=mode, method=method)
 
 
 def get_dist(dist):
@@ -417,7 +437,8 @@ def get_lm3_dist(dist):
     """Return a distribution object from `lmoments3.distr`."""
     if dist not in _lm3_dist_map:
         raise ValueError(
-            f"The {dist} distribution is not supported by `lmoments3` or `xclim`."
+            f"The PWM fitting method cannot be used with the {dist} distribution, as it is not supported "
+            f"by `lmoments3`."
         )
 
     return getattr(lmoments3.distr, _lm3_dist_map[dist])