diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..dcd2fa80 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,73 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- given-names: Tennessee + family-names: Leeuwenburg + orcid: "https://orcid.org/0009-0008-2024-1967" +- given-names: Nicholas + family-names: Loveday + orcid: "https://orcid.org/0009-0000-5796-7069" +- given-names: Elizabeth E. + family-names: Ebert +- given-names: Harrison + family-names: Cook + orcid: "https://orcid.org/0009-0009-3207-4876" +- given-names: Mohammadreza + family-names: Khanarmuei + orcid: "https://orcid.org/0000-0002-5017-9622" +- given-names: Robert J. + family-names: Taggart + orcid: "https://orcid.org/0000-0002-0067-5687" +- given-names: Nikeeth + family-names: Ramanathan + orcid: "https://orcid.org/0009-0002-7406-7438" +- given-names: Maree + family-names: Carroll + orcid: "https://orcid.org/0009-0008-6830-8251" +- given-names: Stephanie + family-names: Chong + orcid: "https://orcid.org/0009-0007-0796-4127" +- given-names: Aidan + family-names: Griffiths +- given-names: John + family-names: Sharples + +title: "scores: A Python package for verifying and evaluating models and predictions with xarray and pandas" + +preferred-citation: + type: article + doi: "10.48550/arXiv.2406.07817" + journal: "arXiv" + title: "scores: A Python package for verifying and evaluating models and predictions with xarray and pandas" + authors: + - given-names: Tennessee + family-names: Leeuwenburg + orcid: "https://orcid.org/0009-0008-2024-1967" + - given-names: Nicholas + family-names: Loveday + orcid: "https://orcid.org/0009-0000-5796-7069" + - given-names: Elizabeth E. + family-names: Ebert + - given-names: Harrison + family-names: Cook + orcid: "https://orcid.org/0009-0009-3207-4876" + - given-names: Mohammadreza + family-names: Khanarmuei + orcid: "https://orcid.org/0000-0002-5017-9622" + - given-names: Robert J. + family-names: Taggart + orcid: "https://orcid.org/0000-0002-0067-5687" + - given-names: Nikeeth + family-names: Ramanathan + orcid: "https://orcid.org/0009-0002-7406-7438" + - given-names: Maree + family-names: Carroll + orcid: "https://orcid.org/0009-0008-6830-8251" + - given-names: Stephanie + family-names: Chong + orcid: "https://orcid.org/0009-0007-0796-4127" + - given-names: Aidan + family-names: Griffiths + - given-names: John + family-names: Sharples + year: 2024 \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 0d761666..c4371d83 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,7 +9,7 @@ project = "scores" copyright = "Licensed under Apache 2.0 - https://www.apache.org/licenses/LICENSE-2.0" -release = "0.9.0" +release = "0.9.1" version = __version__ diff --git a/docs/data.md b/docs/data.md index a448a045..a5a85efa 100644 --- a/docs/data.md +++ b/docs/data.md @@ -2,9 +2,9 @@ Overview of Some Relevant Data Sources -This section suggests how to obtain some 'getting started' weather and climate data for examining the use of the scores and metrics contained in this package. Data referred to here is available under various licenses, and the onus is on the user to understand the conditions of those licenses. The tutorials and walkthroughs in the 'tutorials' directory contain more information and explore the data in more depth. +This section suggests how to obtain sample data for 'getting started' with weather and climate information. Data referred to here is available under various licenses, and the onus is on the user to understand the conditions of those licenses. The tutorials and walkthroughs in the 'tutorials' directory contain more information and explore the data in more depth. -This page will be improved to provide more specific instructions on downloading and preparing the data in accordance with the scores roadmap. For the moment, it notes a few key datasets which have global coverage and are easily accessible. +This page will be improved to provide more specific instructions on downloading and preparing the data. For the moment, it notes a few key datasets which have global coverage and are easily accessible. ## Gridded global numerical weather prediction data Global weather prediction models are used to generate medium range forecasts and provide the initial and boundary conditions for higher-resolution regional models. Their global coverage makes them a good starting point for demonstrating the application of scoring methods in any region of interest. diff --git a/docs/maintainer.md b/docs/maintainer.md index 81bef723..61c2736d 100644 --- a/docs/maintainer.md +++ b/docs/maintainer.md @@ -73,7 +73,7 @@ Information relevant for package maintenance | what | when | why | | ------------ | ----------- | ------------ | -| README | a new is score added | in case it deserves a mention +| README | a new score is added | in case it deserves a mention | api.md | a new function is added | each function must be added individually | included.md | a new function is added | each function (and each variation of the function name) must be added individually | Tutorial_Gallery.ipynb | a new tutorial is added | navigation throughout the docs diff --git a/src/scores/__init__.py b/src/scores/__init__.py index b3b9d0e2..e401652b 100644 --- a/src/scores/__init__.py +++ b/src/scores/__init__.py @@ -13,7 +13,7 @@ import scores.sample_data import scores.stats.statistical_tests # noqa: F401 -__version__ = "0.9.0" +__version__ = "0.9.1" __all__ = [ "scores.categorical", diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py index 20a89fee..aab6790a 100644 --- a/src/scores/categorical/contingency_impl.py +++ b/src/scores/categorical/contingency_impl.py @@ -18,6 +18,8 @@ Users can supply their own event operators to the top-level module functions. """ +# pylint: disable=too-many-lines + import operator from abc import ABC, abstractmethod from typing import Optional @@ -33,14 +35,25 @@ class BasicContingencyManager: # pylint: disable=too-many-public-methods """ - A BasicContingencyManager is produced when a BinaryContingencyManager is transformed. + See https://scores.readthedocs.io/en/stable/tutorials/Binary_Contingency_Scores.html for + a detailed walkthrough showing the use of this class in practice. + + A BasicContingencyManager object provides the scoring functions which are calculated + from a contingency table. A BasicContingencyManager can be efficiently and repeatedly + queried for a wide variety of scores. - A basic contingency table is built only from the event counts, losing the connection - to the actual event tables in their full dimensionality. + A BasicContingencyManager is produced when a :py:class:`BinaryContingencyManager` is + transformed. It is also possible to create a BasicContingencyManager from event counts or + a contingency table, although this not a common user requirement. - The event count data is much smaller than the full event tables, particularly when - considering very large data sets like Numerical Weather Prediction (NWP) data, which - could be terabytes to petabytes in size. + A contingency table is built only from event counts, losing the connection + to the actual event tables in their full dimensionality. The event count data is much + smaller than the full event tables, particularly when considering very large data sets + like Numerical Weather Prediction (NWP) data, which could be terabytes to petabytes in + size. + + By contrast, A :py:class:`BinaryContingencyManager` retains the full event data, which provides + some more flexbility but may reduce efficiency. """ def __init__(self, counts: dict): @@ -80,13 +93,63 @@ def __str__(self): def get_counts(self) -> dict: """ - Return the contingency table counts (tp, fp, tn, fn) + + Returns: + dict: A dictionary of the contingency table counts. Values are xr.DataArray instances. + The keys are: + + - tp_count for true positive count + - tn_count for true negative count, + - fp_count for false positive count + - fn_count for false negative count + - total_count for the total number of events. + + Here is an example of what may be returned: + + .. code-block :: python + + {'tp_count': Size: 8B array(5.), + 'tn_count': Size: 8B array(11.), + 'fp_count': Size: 8B array(1.), + 'fn_count': Size: 8B array(1.), + 'total_count': Size: 8B array(18.)} + """ return self.counts def get_table(self) -> xr.DataArray: """ - Return the contingency table as an xarray object + Returns: + xr.DataArray: The contingency table as an xarray object. Contains + a coordinate dimension called 'contingency'. Valid coordinates + for this dimenstion are: + + - tp_count for true positive count + - tn_count for true negative count + - fp_count for false positive count + - fn_count for false negative count + - total_count for the total number of events. + + Here is an example of what may be returned: + + .. code-block :: python + + array([ 5., 11., 1., 1., 18.]) + + Coordinates: + + contingency + (contingency) + xr.DataArray: xr.DataArray: An xarray object containing the probability of false detection .. math:: - \\text{probability of false detection} = \\frac{\\text{false positives}}{\\text{true negatives} + \\text{false positives}} + \\text{probability of false detection} = \\frac{\\text{false positives}}{\\text{true negatives} + + \\text{false positives}} Notes: - Range: 0 to 1. Perfect score: 0. @@ -390,7 +454,8 @@ def success_ratio(self) -> xr.DataArray: xr.DataArray: An xarray object containing the success ratio .. math:: - \\text{success ratio} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives}} + \\text{success ratio} = \\frac{\\text{true positives}}{\\text{true positives} + + \\text{false positives}} Notes: - Range: 0 to 1. Perfect score: 1. @@ -413,7 +478,8 @@ def threat_score(self) -> xr.DataArray: xr.DataArray: An xarray object containing the threat score .. math:: - \\text{threat score} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives} + \\text{false negatives}} + \\text{threat score} = \\frac{\\text{true positives}}{\\text{true positives} + + \\text{false positives} + \\text{false negatives}} Notes: - Range: 0 to 1, 0 indicates no skill. Perfect score: 1. @@ -438,7 +504,8 @@ def critical_success_index(self) -> xr.DataArray: xr.DataArray: An xarray object containing the critical success index .. math:: - \\text{threat score} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives} + \\text{false negatives}} + \\text{threat score} = \\frac{\\text{true positives}}{\\text{true positives} + + \\text{false positives} + \\text{false negatives}} Notes: - Range: 0 to 1, 0 indicates no skill. Perfect score: 1. @@ -462,7 +529,9 @@ def peirce_skill_score(self) -> xr.DataArray: xr.DataArray: An xarray object containing the Peirce Skill Score .. math:: - \\text{Peirce skill score} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + \\text{true negatives}} + \\text{Peirce skill score} = \\frac{\\text{true positives}}{\\text{true positives} + + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + + \\text{true negatives}} Notes: - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. @@ -492,7 +561,9 @@ def true_skill_statistic(self) -> xr.DataArray: xr.DataArray: An xarray object containing the true skill statistic .. math:: - \\text{true skill statistic} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + \\text{true negatives}} + \\text{true skill statistic} = \\frac{\\text{true positives}}{\\text{true positives} + + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + + \\text{true negatives}} Notes: - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. @@ -516,7 +587,9 @@ def hanssen_and_kuipers_discriminant(self) -> xr.DataArray: xr.DataArray: An xarray object containing Hanssen and Kuipers' Discriminant .. math:: - \\text{HK} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + \\text{true negatives}} + \\text{HK} = \\frac{\\text{true positives}}{\\text{true positives} + + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + + \\text{true negatives}} where :math:`\\text{HK}` is Hansen and Kuipers Discriminant @@ -655,7 +728,8 @@ def f1_score(self) -> xr.DataArray: xr.DataArray: An xarray object containing the F1 score .. math:: - \\text{F1} = \\frac{2 \\cdot \\text{true positives}}{(2 \\cdot \\text{true positives}) + \\text{false positives} + \\text{false negatives}} + \\text{F1} = \\frac{2 \\cdot \\text{true positives}}{(2 \\cdot \\text{true positives}) + + \\text{false positives} + \\text{false negatives}} Notes: - "True positives" is the same as "hits". @@ -683,12 +757,14 @@ def equitable_threat_score(self) -> xr.DataArray: .. math:: \\text{ETS} = \\frac{\\text{true positives} - \\text{true positives} _\\text{random}}\ - {\\text{true positives} + \\text{false negatives} + \\text{false positives} - \\text{true positives} _\\text{random}} + {\\text{true positives} + \\text{false negatives} + \\text{false positives} - + \\text{true positives} _\\text{random}} where .. math:: - \\text{true_positives}_{\\text{random}} = \\frac{(\\text{true positives} + \\text{false negatives}) (\\text{true positives} + \\text{false positives})}{\\text{total count}} + \\text{true_positives}_{\\text{random}} = \\frac{(\\text{true positives} + + \\text{false negatives}) (\\text{true positives} + \\text{false positives})}{\\text{total count}} Notes: - Range: -1/3 to 1, 0 indicates no skill. Perfect score: 1. @@ -722,12 +798,14 @@ def gilberts_skill_score(self) -> xr.DataArray: .. math:: \\text{GSS} = \\frac{\\text{true positives} - \\text{true positives} _\\text{random}}\ - {\\text{true positives} + \\text{false negatives} + \\text{false positives} - \\text{true positivies} _\\text{random}} + {\\text{true positives} + \\text{false negatives} + \\text{false positives} - + \\text{true positivies} _\\text{random}} where .. math:: - \\text{true_positives}_{\\text{random}} = \\frac{(\\text{true positives} + \\text{false negatives}) (\\text{true positives} + \\text{false positives})}{\\text{total count}} + \\text{true_positives}_{\\text{random}} = \\frac{(\\text{true positives} + + \\text{false negatives}) (\\text{true positives} + \\text{false positives})}{\\text{total count}} Notes: - Range: -1/3 to 1, 0 indicates no skill. Perfect score: 1. @@ -1014,49 +1092,60 @@ def symmetric_extremal_dependence_index(self) -> xr.DataArray: class BinaryContingencyManager(BasicContingencyManager): """ + See https://scores.readthedocs.io/en/stable/tutorials/Binary_Contingency_Scores.html for + a detailed walkthrough showing the use in practice. - At each location, the value will either be: + A BinaryContingencyManager holds the underlying binary forecast and observed event data, + from which it builds a contingency table and provides scoring functionality based on that table. - - A true positive (hit) - - A false positive (false alarm) - - A true negative (correct negative) - - A false negative (miss) + A BinaryContingencyManager is typically created by an :py:class:`EventOperator`, such as a + :py:class:`ThresholdOperator`, but can also be created directly from binary data if the user + wishes. + Supported operations include: + - "Transforming" the data in various ways, such as dimensional reduction + - Producing contingency tables + - Calculating scores and metrics based on contingency tables - It will be common to want to operate on masks of these values, - such as: + The full data comprises several n-dimensional binary arrays which can be considered maps of: + - True positives (hits) + - False positives (false alarms) + - True negatives (correct negatives) + - False negatives (misses) + + These masks, in addition to supporting score calculations, can be accessed and used for: - Plotting these attributes on a map - - Calculating the total number of these attributes - - Calculating various ratios of these attributes, potentially masked by geographical area (e.g. accuracy in a region) + - Masking these values by a geographical region prior to score calculation - As such, the per-pixel information is useful as well as the overall - ratios involved. + As such, the per-pixel information is useful as well as the overall ratios involved. - BinaryContingencyManager utilises the BasicContingencyManager class to provide - most functionality. + BinaryContingencyManager inherits from (uses) the :py:class:`BasicContingencyManager` class to + provide score calculations on the final contingency table. Documentation for the available scores + is found in the :py:class:`BasicContingencyManager` API entry but the calls can be made directly + against instances of BinaryContingencyManager where performance or transformation are not a concern. """ def __init__( - self, forecast_events: FlexibleArrayType, observed_events: FlexibleArrayType + self, fcst_events: FlexibleArrayType, obs_events: FlexibleArrayType ): # pylint: disable=super-init-not-called - self.forecast_events = forecast_events - self.observed_events = observed_events + self.fcst_events = fcst_events + self.obs_events = obs_events - self.tp = (self.forecast_events == 1) & (self.observed_events == 1) # true positives - self.tn = (self.forecast_events == 0) & (self.observed_events == 0) # true negatives - self.fp = (self.forecast_events == 1) & (self.observed_events == 0) # false positives - self.fn = (self.forecast_events == 0) & (self.observed_events == 1) # false negatives + self.tp = (self.fcst_events == 1) & (self.obs_events == 1) # true positives + self.tn = (self.fcst_events == 0) & (self.obs_events == 0) # true negatives + self.fp = (self.fcst_events == 1) & (self.obs_events == 0) # false positives + self.fn = (self.fcst_events == 0) & (self.obs_events == 1) # false negatives # Bring back NaNs where there is either a forecast or observed event nan - self.tp = self.tp.where(~np.isnan(forecast_events)) - self.tp = self.tp.where(~np.isnan(observed_events)) - self.tn = self.tn.where(~np.isnan(forecast_events)) - self.tn = self.tn.where(~np.isnan(observed_events)) - self.fp = self.fp.where(~np.isnan(forecast_events)) - self.fp = self.fp.where(~np.isnan(observed_events)) - self.fn = self.fn.where(~np.isnan(forecast_events)) - self.fn = self.fn.where(~np.isnan(observed_events)) + self.tp = self.tp.where(~np.isnan(fcst_events)) + self.tp = self.tp.where(~np.isnan(obs_events)) + self.tn = self.tn.where(~np.isnan(fcst_events)) + self.tn = self.tn.where(~np.isnan(obs_events)) + self.fp = self.fp.where(~np.isnan(fcst_events)) + self.fp = self.fp.where(~np.isnan(obs_events)) + self.fn = self.fn.where(~np.isnan(fcst_events)) + self.fn = self.fn.where(~np.isnan(obs_events)) # Variables for count-based metrics self.counts = self._get_counts() @@ -1069,7 +1158,15 @@ def transform( preserve_dims: Optional[FlexibleDimensionTypes] = None, ) -> BasicContingencyManager: """ - Calculate and compute the contingency table according to the specified dimensions + Compute the contingency table, preserving or reducing the specified dimensions. + + Args: + - reduce_dims: Dimensions to reduce. Can be "all" to reduce all dimensions. + - preserve_dims: Dimensions to preserve. Can be "all" to preserve all dimensions. + + Returns: + scores.categorical.BasicContingencyManager: A `scores` class which supports efficient + calculation of contingency metrics. """ cd = self._get_counts(reduce_dims=reduce_dims, preserve_dims=preserve_dims) return BasicContingencyManager(cd) @@ -1085,8 +1182,8 @@ def _get_counts( """ to_reduce = scores.utils.gather_dimensions( - self.forecast_events.dims, - self.observed_events.dims, + self.fcst_events.dims, + self.obs_events.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims, ) @@ -1105,15 +1202,13 @@ def _get_counts( class EventOperator(ABC): """ - Base class for event operators which can be used in deriving contingency - tables. This will be expanded as additional use cases are incorporated - beyond the ThresholdEventOperator. + Abstract Base Class (ABC) for event operators which can be used in deriving contingency + tables. ABCs are not used directly but instead define the requirements for other classes + and may be used for type checking. """ @abstractmethod - def make_event_tables( - self, forecast: FlexibleArrayType, observed: FlexibleArrayType, *, event_threshold=None, op_fn=None - ): + def make_event_tables(self, fcst: FlexibleArrayType, obs: FlexibleArrayType, *, event_threshold=None, op_fn=None): """ This method should be over-ridden to return forecast and observed event tables """ @@ -1121,7 +1216,7 @@ def make_event_tables( @abstractmethod def make_contingency_manager( - self, forecast: FlexibleArrayType, observed: FlexibleArrayType, *, event_threshold=None, op_fn=None + self, fcst: FlexibleArrayType, obs: FlexibleArrayType, *, event_threshold=None, op_fn=None ): """ This method should be over-ridden to return a contingency table. @@ -1131,10 +1226,15 @@ def make_contingency_manager( class ThresholdEventOperator(EventOperator): """ - Given a forecast and and an observation, consider an event defined by - particular variables meeting a threshold condition (e.g. rainfall above 1mm). + See https://scores.readthedocs.io/en/stable/tutorials/Binary_Contingency_Scores.html for + a detailed walkthrough showing the use in practice. + + A ThresholdEventOperator is used to produce a :py:class:`BinaryContingencyManager` from + forecast and observed data. It considers an event to be defined when the forecast and + observed variables meet a particular threshold condition (e.g. rainfall above 1mm). - This class abstracts that concept for any event definition. + The class may be used for any variable, for any threshold, and for any comparison + operator (e.g. greater-than, less-than, greater-than-or-equal-to, ... ) """ def __init__(self, *, precision=DEFAULT_PRECISION, default_event_threshold=0.001, default_op_fn=operator.ge): @@ -1142,9 +1242,7 @@ def __init__(self, *, precision=DEFAULT_PRECISION, default_event_threshold=0.001 self.default_event_threshold = default_event_threshold self.default_op_fn = default_op_fn - def make_event_tables( - self, forecast: FlexibleArrayType, observed: FlexibleArrayType, *, event_threshold=None, op_fn=None - ): + def make_event_tables(self, fcst: FlexibleArrayType, obs: FlexibleArrayType, *, event_threshold=None, op_fn=None): """ Using this function requires a careful understanding of the structure of the data and the use of the operator function. The default operator is a simple greater-than @@ -1160,17 +1258,17 @@ def make_event_tables( if op_fn is None: op_fn = self.default_op_fn - forecast_events = op_fn(forecast, event_threshold) - observed_events = op_fn(observed, event_threshold) + fcst_events = op_fn(fcst, event_threshold) + obs_events = op_fn(obs, event_threshold) # Bring back NaNs - forecast_events = forecast_events.where(~np.isnan(forecast)) - observed_events = observed_events.where(~np.isnan(observed)) + fcst_events = fcst_events.where(~np.isnan(fcst)) + obs_events = obs_events.where(~np.isnan(obs)) - return (forecast_events, observed_events) + return (fcst_events, obs_events) def make_contingency_manager( - self, forecast: FlexibleArrayType, observed: FlexibleArrayType, *, event_threshold=None, op_fn=None + self, fcst: FlexibleArrayType, obs: FlexibleArrayType, *, event_threshold=None, op_fn=None ) -> BinaryContingencyManager: """ Using this function requires a careful understanding of the structure of the data @@ -1187,12 +1285,12 @@ def make_contingency_manager( if op_fn is None: op_fn = self.default_op_fn - forecast_events = op_fn(forecast, event_threshold) - observed_events = op_fn(observed, event_threshold) + fcst_events = op_fn(fcst, event_threshold) + obs_events = op_fn(obs, event_threshold) # Bring back NaNs - forecast_events = forecast_events.where(~np.isnan(forecast)) - observed_events = observed_events.where(~np.isnan(observed)) + fcst_events = fcst_events.where(~np.isnan(fcst)) + obs_events = obs_events.where(~np.isnan(obs)) - table = BinaryContingencyManager(forecast_events, observed_events) + table = BinaryContingencyManager(fcst_events, obs_events) return table diff --git a/src/scores/continuous/quantile_loss_impl.py b/src/scores/continuous/quantile_loss_impl.py index 2c333956..c14d7481 100644 --- a/src/scores/continuous/quantile_loss_impl.py +++ b/src/scores/continuous/quantile_loss_impl.py @@ -1,7 +1,7 @@ """ Implementation of quantile loss (score) """ -from typing import Optional, Sequence +from typing import Optional import xarray as xr diff --git a/src/scores/continuous/standard_impl.py b/src/scores/continuous/standard_impl.py index 5218d080..00094d3c 100644 --- a/src/scores/continuous/standard_impl.py +++ b/src/scores/continuous/standard_impl.py @@ -210,7 +210,8 @@ def correlation( Calculates the Pearson's correlation coefficient between two xarray DataArrays .. math:: - \\rho = \\frac{\\sum_{i=1}^{n}{(x_i - \\bar{x})(y_i - \\bar{y})}}{\\sqrt{\\sum_{i=1}^{n}{(x_i-\\bar{x})^2}\\sum_{i=1}^{n}{(y_i - \\bar{y})^2}}} + \\rho = \\frac{\\sum_{i=1}^{n}{(x_i - \\bar{x})(y_i - \\bar{y})}} + {\\sqrt{\\sum_{i=1}^{n}{(x_i-\\bar{x})^2}\\sum_{i=1}^{n}{(y_i - \\bar{y})^2}}} where: - :math:`\\rho` = Pearson's correlation coefficient diff --git a/src/scores/probability/crps_impl.py b/src/scores/probability/crps_impl.py index c1265afc..77fbcf07 100644 --- a/src/scores/probability/crps_impl.py +++ b/src/scores/probability/crps_impl.py @@ -176,18 +176,22 @@ def crps_cdf( Given: - a predictive CDF `fcst` indexed at thresholds by variable x - - an observation in CDF form `obs_cdf` (i.e., :math:`\\text{obs_cdf}(x) = 0` if :math:`x < \\text{obs}` and 1 if :math:`x >= \\text{obs}`) + - an observation in CDF form `obs_cdf` (i.e., :math:`\\text{obs_cdf}(x) = 0` if \ + :math:`x < \\text{obs}` and 1 if :math:`x >= \\text{obs}`) - a `threshold_weight` array indexed by variable x The threshold-weighted CRPS is given by: - - :math:`twCRPS = \\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times (\\text{fcst}(x) - \\text{obs_cdf}(x))^2]\\text{d}x}`, over all thresholds x. + - :math:`twCRPS = \\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \ + (\\text{fcst}(x) - \\text{obs_cdf}(x))^2]\\text{d}x}`, over all thresholds x. - The usual CRPS is the threshold-weighted CRPS with :math:`\\text{threshold_weight}(x) = 1` for all x. This can be decomposed into an over-forecast penalty: - :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{fcst}(x) - \\text{obs_cdf}(x))^2]\\text{d}x}`, over all thresholds x where x >= obs + :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{fcst}(x) - + \\text{obs_cdf}(x))^2]\\text{d}x}`, over all thresholds x where x >= obs and an under-forecast penalty: - :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{(fcst}(x) - \\text{obs_cdf}(x)^2]\\text{d}x}`, over all thresholds x where x <= obs. + :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{(fcst}(x) - + \\text{obs_cdf}(x)^2]\\text{d}x}`, over all thresholds x where x <= obs. Note that the function `crps_cdf` is designed so that the `obs` argument contains actual observed values. `crps_cdf` will convert `obs` into CDF form in order to diff --git a/tests/categorical/test_contingency.py b/tests/categorical/test_contingency.py index 9c19ce5e..b175073b 100644 --- a/tests/categorical/test_contingency.py +++ b/tests/categorical/test_contingency.py @@ -157,12 +157,12 @@ def test_categorical_table(): fcst_events2, obs_events2 = match.make_event_tables( simple_forecast, simple_obs, event_threshold=1.3, op_fn=operator.gt ) - xr.testing.assert_equal(fcst_events, table.forecast_events) - xr.testing.assert_equal(fcst_events2, table.forecast_events) - xr.testing.assert_equal(obs_events, table.observed_events) - xr.testing.assert_equal(obs_events2, table.observed_events) - xr.testing.assert_equal(table.forecast_events, table2.forecast_events) - xr.testing.assert_equal(table.observed_events, table2.observed_events) + xr.testing.assert_equal(fcst_events, table.fcst_events) + xr.testing.assert_equal(fcst_events2, table.fcst_events) + xr.testing.assert_equal(obs_events, table.obs_events) + xr.testing.assert_equal(obs_events2, table.obs_events) + xr.testing.assert_equal(table.fcst_events, table2.fcst_events) + xr.testing.assert_equal(table.obs_events, table2.obs_events) # Confirm values in the contingency table are correct assert counts["tp_count"] == 9 @@ -364,11 +364,11 @@ def test_dask_if_available_categorical(): table = match.make_contingency_manager(fcst, obs, event_threshold=1.3) # Assert things start life as dask types - assert isinstance(table.forecast_events.data, dask.array.Array) + assert isinstance(table.fcst_events.data, dask.array.Array) assert isinstance(table.tp.data, dask.array.Array) # That can be computed to hold numpy data types - computed = table.forecast_events.compute() + computed = table.fcst_events.compute() assert isinstance(computed.data, np.ndarray) # And that transformed tables are built out of computed things diff --git a/tutorials/Binary_Contingency_Scores.ipynb b/tutorials/Binary_Contingency_Scores.ipynb index bada0de6..f41168f8 100644 --- a/tutorials/Binary_Contingency_Scores.ipynb +++ b/tutorials/Binary_Contingency_Scores.ipynb @@ -1477,7 +1477,7 @@ "source": [ "# Further, if it turn out you want them, the event tables are still there \n", "\n", - "contingency_manager.forecast_events" + "contingency_manager.fcst_events" ] }, {