Skip to content

Commit

Permalink
Updated docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
holukas committed Jan 9, 2024
1 parent 2c18ae6 commit 35b1ce7
Show file tree
Hide file tree
Showing 15 changed files with 351 additions and 201 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

![DIIVE](images/logo_diive1_256px.png)

## v0.67.1 | 10 Jan 2024

- Updated: many docstrings.

## v0.67.0 | 9 Jan 2024

### Updates to flux processing chain
Expand Down
4 changes: 2 additions & 2 deletions diive/core/dfun/_BAK_fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
# last update in: v0.23.0
This module is part of DIIVE:
https://gitlab.ethz.ch/holukas/diive
This module is part of the diive library:
https://github.com/holukas/diive
"""

Expand Down
4 changes: 2 additions & 2 deletions diive/core/dfun/fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
DATA FUNCTIONS: FITS
====================
This module is part of DIIVE:
https://gitlab.ethz.ch/holukas/diive
This module is part of the diive library:
https://github.com/holukas/diive
"""

Expand Down
4 changes: 2 additions & 2 deletions diive/core/dfun/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
DATA FUNCTIONS: FRAMES
======================
This module is part of DIIVE:
https://gitlab.ethz.ch/holukas/diive
This module is part of the diive library:
https://github.com/holukas/diive
"""
from pathlib import Path
Expand Down
2 changes: 1 addition & 1 deletion diive/pkgs/formats/fluxnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def remove_erroneous_data(self, var: str, remove_dates: list, showplot: bool):
print(f" REMOVING data for {var} time range between {d} (dates are inclusive)")
series = self.merged_df[var].copy()
mr = ManualRemoval(series=series)
mr.calc(remove_dates=remove_dates, showplot=showplot)
mr._calc(remove_dates=remove_dates, showplot=showplot)
self._merged_df[var] = mr.filteredseries.copy()
print(" Done.")

Expand Down
93 changes: 56 additions & 37 deletions diive/pkgs/outlierdetection/absolutelimits.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
"""
OUTLIER DETECTION: ABSOLUTE LIMITS
==================================
This module is part of the diive library:
https://github.com/holukas/diive
"""
import numpy as np
import pandas as pd
from pandas import Series, DatetimeIndex
Expand All @@ -12,22 +20,10 @@
@ConsoleOutputDecorator()
@repeater # Repeater called for consistency with other methods, absolute limits do not require iterations
class AbsoluteLimitsDaytimeNighttime(FlagBase):
"""
Generate flag that indicates if values in data are outside
"""Generate flag that indicates if values in data are outside
the specified range, defined by providing allowed minimum and
maximum, separately for daytime and nighttime data
Methods:
calc(self, daytime_minmax: float, nighttime_minmax: float): Calculates flag
maximum values, separately for daytime and nighttime data."""

After running calc, results can be accessed with:
flag: Series
Flag series where accepted (ok) values are indicated
with flag=0, rejected values are indicated with flag=2
filteredseries: Series
Data with rejected values set to missing
"""
flagid = 'OUTLIER_ABSLIM_DTNT'

def __init__(self,
Expand All @@ -40,8 +36,27 @@ def __init__(self,
idstr: str = None,
showplot: bool = False,
verbose: bool = False,
repeat: bool = False
):
repeat: bool = False):
"""
Args:
series: Time series in which outliers are identified.
lat: Latitude of location as float, e.g. 46.583056
lon: Longitude of location as float, e.g. 9.790639
utc_offset: UTC offset of *timestamp_index*, e.g. 1 for UTC+01:00
The datetime index of the resulting Series will be in this timezone.
daytime_minmax: Allowed minimum and maximum values in *series* during daytime, e.g. [-50, 50].
nighttime_minmax: Allowed minimum and maximum values in *series* during nighttime, e.g. [-5, 50].
idstr: Identifier, added as suffix to output variable names.
showplot: Show plot with removed data points.
verbose: More text output to console if *True*.
repeat: Repeat until no more outliers can be found.
Returns:
Results dataframe via the @repeater wrapper function, dataframe contains
the filtered time series and flags from all iterations.
"""
super().__init__(series=series, flagid=self.flagid, idstr=idstr)
self.showplot = False
self.verbose = False
Expand Down Expand Up @@ -69,7 +84,7 @@ def __init__(self,
self.is_nighttime = nighttimeflag == 1 # Convert 0/1 flag to False/True flag
self.is_daytime = daytimeflag == 1 # Convert 0/1 flag to False/True flag

def calc(self):
def _calc(self):
"""Calculate flag"""
self.reset()
ok, rejected = self._flagtests()
Expand Down Expand Up @@ -128,22 +143,10 @@ def _flagtests(self) -> tuple[DatetimeIndex, DatetimeIndex]:
@ConsoleOutputDecorator()
@repeater
class AbsoluteLimits(FlagBase):
"""
Generate flag that indicates if values in data are outside
the specified range, defined by providing min, max in method
...
Methods:
calc(self, min: float, max: float): Calculates flag
After running calc, results can be accessed with:
flag: Series
Flag series where accepted (ok) values are indicated
with flag=0, rejected values are indicated with flag=2
filteredseries: Series
Data with rejected values set to missing
"""
"""Generate flag that indicates if values in data are outside
the specified range, defined by providing the allowed minimum and
maximum for values in *series*."""

flagid = 'OUTLIER_ABSLIM'

def __init__(self,
Expand All @@ -154,6 +157,22 @@ def __init__(self,
showplot: bool = False,
verbose: bool = False,
repeat: bool = False):
"""
Args:
series: Time series in which outliers are identified.
minval: Allowed minimum values in *series*, e.g. -20.
maxval: Allowed maximum values in *series*, e.g. 20.
idstr: Identifier, added as suffix to output variable names.
showplot: Show plot with removed data points.
verbose: More text output to console if *True*.
repeat: Repeat until no more outliers can be found.
Returns:
Results dataframe via the @repeater wrapper function, dataframe contains
the filtered time series and flags from all iterations.
"""
super().__init__(series=series, flagid=self.flagid, idstr=idstr)
self.showplot = False
self.verbose = False
Expand All @@ -163,7 +182,7 @@ def __init__(self,
self.verbose = verbose
self.repeat = repeat

def calc(self):
def _calc(self):
"""Calculate flag"""
self.reset()
ok, rejected = self._flagtests()
Expand All @@ -178,8 +197,8 @@ def _flagtests(self) -> tuple[DatetimeIndex, DatetimeIndex]:
rejected = rejected[rejected].index
if self.showplot:
self.plot(ok=ok, rejected=rejected,
plottitle=f"Outlier detection based on "
f"absolute limits for {self.series.name}")
plottitle=f"Outlier detection based on "
f"absolute limits for {self.series.name}")
return ok, rejected


Expand All @@ -193,7 +212,7 @@ def example():
series = pd.Series(data, index=tidx, name='TESTDATA')

al = AbsoluteLimits(series=series, idstr='99')
al.calc(min=16, max=84)
al._calc(min=16, max=84)

print(series.describe())
filteredseries = al.filteredseries
Expand Down
42 changes: 28 additions & 14 deletions diive/pkgs/outlierdetection/incremental.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
"""
OUTLIER DETECTION: INCREMENTAL
==============================
This module is part of the diive library:
https://github.com/holukas/diive
"""
from pandas import Series, DatetimeIndex

from diive.core.base.flagbase import FlagBase
Expand All @@ -9,21 +17,8 @@
@ConsoleOutputDecorator()
@repeater
class zScoreIncrements(FlagBase):
"""
Identify outliers based on the z-score of record increments
...
Methods:
calc(threshold: float = 4): Calculates flag
After running calc(), results can be accessed with:
flag: Series
Flag series where accepted (ok) values are indicated
with flag=0, rejected values are indicated with flag=2
filteredseries: Series
Data with rejected values set to missing
"""Identify outliers based on the z-score of record increments."""

"""
flagid = 'OUTLIER_INCRZ'

def __init__(self,
Expand All @@ -33,6 +28,25 @@ def __init__(self,
showplot: bool = False,
verbose: bool = False,
repeat: bool = True):
"""
Args:
series: Time series in which outliers are identified.
idstr: Identifier, added as suffix to output variable names.
thres_zscore: Threshold for z-score, scores above this value will
be flagged as outlier. NOTE that in this case the z-scores are
calculated from the increments between data records in *series*,
whereby the increment at a point in time t is simply calculated as:
increment(t) = value(t) - value(t-1).
showplot: Show plot with results from the outlier detection.
verbose: Print more text output.
repeat: Repeat until no more outliers can be found.
Returns:
Results dataframe via the @repeater wrapper function, dataframe contains
the filtered time series and flags from all iterations.
"""
super().__init__(series=series, flagid=self.flagid, idstr=idstr)
self.showplot = False
self.verbose = False
Expand Down
43 changes: 28 additions & 15 deletions diive/pkgs/outlierdetection/localsd.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
"""
OUTLIER DETECTION: LOCAL STANDARD DEVIATION
===========================================
This module is part of the diive library:
https://github.com/holukas/diive
"""
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
from pandas import DatetimeIndex, Series
Expand All @@ -12,21 +20,8 @@
@ConsoleOutputDecorator()
@repeater
class LocalSD(FlagBase):
"""
Identify outliers based on the local standard deviation
...
Methods:
calc(): Calculates flag
After running calc, results can be accessed with:
flag: Series
Flag series where accepted (ok) values are indicated
with flag=0, rejected values are indicated with flag=2
filteredseries: Series
Data with rejected values set to missing
"""Identify outliers based on the local standard deviation."""

"""
flagid = 'OUTLIER_LOCALSD'

def __init__(self,
Expand All @@ -37,6 +32,24 @@ def __init__(self,
showplot: bool = False,
verbose: bool = False,
repeat: bool = True):
"""
Args:
series: Time series in which outliers are identified.
idstr: Identifier, added as suffix to output variable names.
winsize: Window size. Is used to calculate the rolling median and
rolling standard deviation in a time window of size *winsize* records.
n_sd: Number of standard deviations. Records with sd outside this value
are flagged as outliers.
showplot: Show plot with removed data points.
verbose: More text output to console if *True*.
repeat: Repeat until no more outliers can be found.
Returns:
Results dataframe via the @repeater wrapper function, dataframe contains
the filtered time series and flags from all iterations.
"""
super().__init__(series=series, flagid=self.flagid, idstr=idstr)
self.showplot = False
self.verbose = False
Expand All @@ -46,7 +59,7 @@ def __init__(self,
self.verbose = verbose
self.repeat = repeat

def calc(self):
def _calc(self):
"""Calculate flag"""
self.reset()
ok, rejected = self._flagtests()
Expand Down
Loading

0 comments on commit 35b1ce7

Please sign in to comment.