fully working dynamic stamps

mever-team · May 18, 2024 · 37c179e · 37c179e
1 parent 70d3277
commit 37c179e
Show file tree

Hide file tree

Showing 10 changed files with 60 additions and 37 deletions.
diff --git a/docs/advanced/modelcards.md b/docs/advanced/modelcards.md
@@ -30,7 +30,7 @@ a stamp per:
 ```python
 import fairbench as fb
 report = ...
-stamp = fb.stamps.four_fifths_rule(report)
+stamp = fb.stamps.four_fifths(report)
 print(stamp)
 # 3/4ths ratio: False
 ```
@@ -57,7 +57,7 @@ for a more thorough explanation (see below).
 stamps = fb.combine(
     fb.stamps.prule(report),
     fb.stamps.accuracy(report),
-    fb.stamps.four_fifths_rule(report)
+    fb.stamps.four_fifths(report)
 )
 print(stamps)
 ```

diff --git a/docs/quickstart.md b/docs/quickstart.md
@@ -81,7 +81,7 @@ You can also export to markdown or yaml formats.
 stamps = fb.combine(
     fb.stamps.prule(report),
     fb.stamps.accuracy(report),
-    fb.stamps.four_fifths_rule(report)
+    fb.stamps.four_fifths(report)
 )
 fb.modelcards.tohtml(stamps, file="output.html", show=True)
 ```

diff --git a/docs/record/stamps.md b/docs/record/stamps.md
@@ -26,17 +26,17 @@ Click on one of the stamps below to get a full description.
 
 ## Classification
 
-<button onclick="toggleCode('four_fifths_rule', this)" class="toggle-reveal">
-four_fifths_rule</button>
+<button onclick="toggleCode('four_fifths', this)" class="toggle-reveal">
+four_fifths</button>
 <button onclick="toggleCode('accuracy', this)" class="toggle-reveal">
 accuracy</button>
 <button onclick="toggleCode('dfpr', this)" class="toggle-reveal">
 dfpr</button>
 <button onclick="toggleCode('dfnr', this)" class="toggle-reveal">
 dfnr</button>
 
-<div id="four_fifths_rule" class="doc" markdown="span" style="display:none;">
-The `four_fifths_rule` stamp refers to the popular 4/5ths rule that infers discrimination
+<div id="four_fifths" class="doc" markdown="span" style="display:none;">
+The `four_fifths` stamp refers to the popular 4/5ths rule that infers discrimination
 if positive rate ratios lies below 80%. We apply this for
 all subgroups, an approach also known as differential fairness.
 </div>

diff --git a/examples/demos/quickstart.ipynb b/examples/demos/quickstart.ipynb
@@ -522,7 +522,7 @@
     "stamps = fb.combine(\n",
     "    fb.stamps.prule(report),\n",
     "    fb.stamps.accuracy(report),\n",
-    "    fb.stamps.four_fifths_rule(report)\n",
+    "    fb.stamps.four_fifths(report)\n",
     ")\n",
     "stamps"
    ]

diff --git a/examples/playground/simple.py b/examples/playground/simple.py
@@ -6,13 +6,10 @@
 report = fb.multireport(predictions=yhat, scores=scores, labels=y, sensitive=sensitive)
 
 stamps = fb.combine(
+    fb.stamps.four_fifths(report),
     fb.stamps.prule(report),
-    fb.stamps.dfpr(report),
-    fb.stamps.dfnr(report),
-    fb.stamps.abroca(report),
-    fb.stamps.accuracy(report),
-    fb.stamps.four_fifths_rule(report),
+    fb.stamps.rbroca(report),
 )
-# print(fb.modelcards.tohtml(stamps, show=False))
+fb.modelcards.tohtml(stamps, show=True)
 
-fb.interactive(report, browser=True)
+# fb.interactive(report, browser=True)
diff --git a/fairbench/export/modelcards/toyaml.py b/fairbench/export/modelcards/toyaml.py
@@ -14,10 +14,16 @@ def toyamlprimitives(report):
         description = stamp.desc
         caveats = stamp.caveats
         value = str(value)
-        if value=="True" and stamp.caveats_accept is not None:
-            caveats += stamp.caveats_reject
-        if value=="False" and stamp.caveats_reject is not None:
-            caveats += stamp.caveats_reject
+        if value == "True" and stamp.caveats_accept is not None:
+            caveats = caveats + stamp.caveats_reject
+        if value == "False" and stamp.caveats_reject is not None:
+            caveats = caveats + stamp.caveats_reject
+        if symbols:
+            for symbol, replace in symbols.items():
+                description = description.replace("{" + symbol + "}", replace)
+                caveats = [
+                    caveat.replace("{" + symbol + "}", replace) for caveat in caveats
+                ]
         metric_dict = {
             "name": metric,
             "description": description,

diff --git a/fairbench/verification.py b/fairbench/verification.py
@@ -29,7 +29,7 @@ def __init__(
         caveats=None,
         caveats_accept=None,
         caveats_reject=None,
-        symbols=None
+        symbols=None,
     ):
         if caveats is None:
             caveats = [
@@ -62,16 +62,36 @@ def __init__(
     def __call__(self, report):
         rets = [self.__call_once(fields, report) for fields in self._fields]
         result = None
-        for ret in rets:
+        for selection, ret in enumerate(rets):
             if not isinstance(ret, ExplainableError):
                 if result is None or isinstance(ret, Explainable):
-                    result = ret
+                    result = (selection, ret)
                 # _check_equals(result, ret)
         if result is None:
             result = ExplainableError(
                 f"Report does not contain any of {', '.join('.'.join(fields) for fields in self._fields)}"
             )
-        result.stamp = self
+            result.stamp = self
+        else:
+            selection, result = result
+            result.stamp = Stamp(
+                self.name,
+                self._fields,
+                self.minimum,
+                self.maximum,
+                self.desc,
+                self.caveats,
+                self.caveats_accept,
+                self.caveats_reject,
+                symbols=(
+                    None
+                    if self.symbols is None
+                    else {
+                        symbol: value[selection]
+                        for symbol, value in self.symbols.items()
+                    }
+                ),
+            )
         return Forklike({self.name: result})
 
     def __call_once(self, fields, report):
@@ -161,7 +181,7 @@ def __getattribute__(self, attr):
             desc=resource.get("description"),
             caveats=resource.get("caveats"),
             caveats_accept=resource.get("caveats_accept", None),
-            caveats_reject=resource.get("caveats_accept", None),
+            caveats_reject=resource.get("caveats_reject", None),
             symbols=resource.get("symbols", None),
         )
         self._stamps[attr] = ret

diff --git a/stamps/common.yaml b/stamps/common.yaml
@@ -1,6 +1,6 @@
 # WARNING: THIS FILE IS USED BY EARLIER VERSIONS OF FAIRBENCH CONSIDER UPGRADING
 
-four_fifths_rule:
+four_fifths:
   title: "4/5 rule"
   alias: ["minratio.pr", "minratio[vsAny].pr", "prule"]
   minimum: 0.8

diff --git a/stamps/dynamic.yaml b/stamps/dynamic.yaml
@@ -81,11 +81,11 @@ abroca:
   symbols:
     COMPARISON:
       - "each pair of groups"
-      - "each group and its complement"
-      - "each group and its complement"
+      - "each group and the total population"
+      - "each group and the rest of the population"
   description:
-    "The maximum area between ROC curves, compared between {COMPARISON}.
-    This is a type of disparate mistreatment for recommendation systems."
+    "Compares the area between ROC curves. This comparison is made between {COMPARISON}
+    and is a type of disparate mistreatment for recommendation systems."
   caveats:
     - "Disparate mistreatment may not always be an appropriate fairness consideration."
     - "Consider input from affected stakeholders to determine whether abroca is an appropriate fairness measure."
@@ -96,11 +96,11 @@ rbroca:
   symbols:
     COMPARISON:
       - "each pair of groups"
-      - "each group and its complement"
-      - "each group and its complement"
+      - "each group and the total population"
+      - "each group and the rest of the population"
   description:
-    "The maximum relative area between ROC curves, compared between {COMPARISON}.
-    This is a type of disparate mistreatment for recommendation systems."
+    "Compares the relative area between ROC curves. This comparison is made between {COMPARISON}
+    and is a type of disparate mistreatment for recommendation systems."
   caveats:
     - "Disparate mistreatment may not always be an appropriate fairness consideration."
     - "Consider input from affected stakeholders to determine whether abroca is an appropriate fairness measure."
@@ -111,10 +111,10 @@ maxbdcg:
   symbols:
     COMPARISON:
       - "each pair of groups"
-      - "each group and its complement"
-      - "each group and its complement"
+      - "each group and the total population"
+      - "each group and the rest of the population"
   description:
-    "The maximum of NDCG-weighted differences between top-k curves of {COMPARISON}.
+    "Compares the NDCG-weighted differences between top-k curves of {COMPARISON}.
     These curves count the number of elements
     represented at different top-k predictions, and this measure 
     is a type of disparate impact for recommendation systems."

diff --git a/tests/test_modelcards.py b/tests/test_modelcards.py
@@ -17,7 +17,7 @@ def test_modelcards(monkeypatch):
                 fb.stamps.accuracy(report),
                 fb.stamps.dfpr(report),
                 fb.stamps.dfnr(report),
-                fb.stamps.four_fifths_rule(report),
+                fb.stamps.four_fifths(report),
             )
 
             fb.modelcards.toyaml(stamps)  # TODO: add this to texts