Merge pull request #826 from sean-morris/summaries

Otter Grade Write Individual Notebook Results
ucbds-infra · Aug 28, 2024 · b76a733 · b76a733
2 parents db6ee4f + 22630d2
commit b76a733
Show file tree

Hide file tree

Showing 19 changed files with 1,318 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 **v5.6.0 (unreleased):**
-
+* Updated Otter Grade to write grading summary for each notebook per [#814](https://github.com/ucbds-infra/otter-grader/issues/814)
 * Updated Otter Grade CSV to indicate which notebooks timeout per [#813](https://github.com/ucbds-infra/otter-grader/issues/813)
 * Updated Otter Grade CSV to include the number of points per question in the first row
 * Updated Otter Grade CSV to include total points column

diff --git a/otter/cli.py b/otter/cli.py
@@ -132,6 +132,7 @@ def generate_cli(*args, **kwargs):
 @click.option("-a", "--autograder", default=defaults["autograder"], help="Path to autograder zip file")
 @click.option("-o", "--output-dir", default=defaults["output_dir"], help="Directory to which to write output")
 @click.option("--ext", default=defaults["ext"], type=click.Choice(_ALLOWED_EXTENSIONS), help="The extension to glob for submissions")
+@click.option("--summaries", is_flag=True, help="Whether to write the otter run results for each graded notebook")
 @click.option("--pdfs", is_flag=True, help="Whether to copy notebook PDFs out of containers")
 @click.option("--containers", default=defaults["containers"], type=click.INT, help="Specify number of containers to run in parallel")
 @click.option("--image", default=defaults["image"], help="A Docker image tag to use as the base image")

diff --git a/otter/grade/__init__.py b/otter/grade/__init__.py
@@ -10,6 +10,7 @@
 from .utils import (
     merge_csv,
     prune_images,
+    POINTS_POSSIBLE_LABEL,
     SCORES_DICT_FILE_KEY,
     SCORES_DICT_PERCENT_CORRECT_KEY,
     SCORES_DICT_TOTAL_POINTS_KEY,
@@ -33,6 +34,7 @@ def main(
     autograder: str = "./autograder.zip",
     containers: int = 4, 
     ext: str = "ipynb",
+    summaries: bool = False,
     no_kill: bool = False,
     image: str = "ubuntu:22.04", 
     pdfs: bool = False,
@@ -155,6 +157,17 @@ def main(
     # write to CSV file
     output_df.to_csv(os.path.join(output_dir, "final_grades.csv"), index=False)
 
+    # write score summaries to files
+    if summaries:
+        grading_summary_path = os.path.join(output_dir, "grading-summaries")
+        if not os.path.exists(grading_summary_path):
+            os.mkdir(grading_summary_path)
+        for df in grade_dfs:
+            df_dict = df.to_dict()
+            if df_dict['file'][0] != POINTS_POSSIBLE_LABEL:
+                with open(os.path.join(grading_summary_path, f"{df_dict['file'][0]}.txt"), mode="w") as f:
+                    f.write(df_dict["summary"][0])
+
     # return percentage if a single file was graded
     if len(paths) == 1 and os.path.isfile(paths[0]):
         return output_df[SCORES_DICT_PERCENT_CORRECT_KEY][1]
diff --git a/test/test_cli.py b/test/test_cli.py
@@ -479,6 +479,10 @@ def test_grade(mocked_grade, run_cli):
         assert_cli_result(result, expect_error=False)
         mocked_grade.assert_called_with(**{**std_kwargs, "ext": ext})
 
+    result = run_cli([*cmd_start, "--summaries"])
+    assert_cli_result(result, expect_error=False)
+    mocked_grade.assert_called_with(**{**std_kwargs, "summaries": True})
+
     result = run_cli([*cmd_start, "--pdfs"])
     assert_cli_result(result, expect_error=False)
     mocked_grade.assert_called_with(**{**std_kwargs, "pdfs": True})

diff --git a/test/test_grade/files/results/fails1.txt b/test/test_grade/files/results/fails1.txt
@@ -0,0 +1,72 @@
+q1 results:
+    q1 - 1 result:
+        ❌ Test case failed
+        Trying:
+            square(3)
+        Expecting:
+            9
+        **********************************************************************
+        Line 2, in q1 0
+        Failed example:
+            square(3)
+        Expected:
+            9
+        Got:
+            27
+
+    q1 - 2 result:
+        ❌ Test case failed
+        Trying:
+            square(2.5)
+        Expecting:
+            6.25
+        **********************************************************************
+        Line 2, in q1 1
+        Failed example:
+            square(2.5)
+        Expected:
+            6.25
+        Got:
+            15.625
+
+    q1 - 3 result:
+        ❌ Test case failed
+        Trying:
+            square(6)
+        Expecting:
+            36
+        **********************************************************************
+        Line 2, in q1 2
+        Failed example:
+            square(6)
+        Expected:
+            36
+        Got:
+            216
+
+    q1 - 4 result:
+        ❌ Test case failed
+        Trying:
+            square(1.5)
+        Expecting:
+            2.25
+        **********************************************************************
+        Line 2, in q1 3
+        Failed example:
+            square(1.5)
+        Expected:
+            2.25
+        Got:
+            3.375
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
diff --git a/test/test_grade/files/results/fails1H.txt b/test/test_grade/files/results/fails1H.txt
@@ -0,0 +1,48 @@
+q1 results:
+    q1 - 1 result:
+        ✅ Test case passed
+
+    q1 - 2 result:
+        ✅ Test case passed
+
+    q1 - 3 result:
+        ❌ Test case failed
+        Trying:
+            square(6)
+        Expecting:
+            36
+        **********************************************************************
+        Line 2, in q1 2
+        Failed example:
+            square(6)
+        Expected:
+            36
+        Got:
+            216
+
+    q1 - 4 result:
+        ❌ Test case failed
+        Trying:
+            square(1.5)
+        Expecting:
+            2.25
+        **********************************************************************
+        Line 2, in q1 3
+        Failed example:
+            square(1.5)
+        Expected:
+            2.25
+        Got:
+            3.375
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
diff --git a/test/test_grade/files/results/fails1and3and6.txt b/test/test_grade/files/results/fails1and3and6.txt
@@ -0,0 +1,164 @@
+q1 results:
+    q1 - 1 result:
+        ❌ Test case failed
+        Trying:
+            square(3)
+        Expecting:
+            9
+        **********************************************************************
+        Line 2, in q1 0
+        Failed example:
+            square(3)
+        Expected:
+            9
+        Got:
+            243
+
+    q1 - 2 result:
+        ❌ Test case failed
+        Trying:
+            square(2.5)
+        Expecting:
+            6.25
+        **********************************************************************
+        Line 2, in q1 1
+        Failed example:
+            square(2.5)
+        Expected:
+            6.25
+        Got:
+            97.65625
+
+    q1 - 3 result:
+        ❌ Test case failed
+        Trying:
+            square(6)
+        Expecting:
+            36
+        **********************************************************************
+        Line 2, in q1 2
+        Failed example:
+            square(6)
+        Expected:
+            36
+        Got:
+            7776
+
+    q1 - 4 result:
+        ❌ Test case failed
+        Trying:
+            square(1.5)
+        Expecting:
+            2.25
+        **********************************************************************
+        Line 2, in q1 3
+        Failed example:
+            square(1.5)
+        Expected:
+            2.25
+        Got:
+            7.59375
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results:
+    q3 - 1 result:
+        ❌ Test case failed
+        Trying:
+            x
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q3 0
+        Failed example:
+            x
+        Exception raised:
+            Traceback (most recent call last):
+              File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
+                exec(compile(example.source, filename, "single",
+              File "<doctest q3 0[0]>", line 1, in <module>
+                x
+            NameError: name 'x' is not defined
+
+    q3 - 2 result:
+        ❌ Test case failed
+        Trying:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Expecting:
+            yep
+        **********************************************************************
+        Line 2, in q3 1
+        Failed example:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Exception raised:
+            Traceback (most recent call last):
+              File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
+                exec(compile(example.source, filename, "single",
+              File "<doctest q3 1[0]>", line 1, in <module>
+                if x:
+            NameError: name 'x' is not defined
+
+q4 results: All test cases passed!
+
+q6 results:
+    q6 - 1 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            next(fib) == 0 and next(fib) == 1
+        Expecting:
+            True
+        **********************************************************************
+        Line 3, in q6 0
+        Failed example:
+            next(fib) == 0 and next(fib) == 1
+        Expected:
+            True
+        Got:
+            False
+
+    q6 - 2 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            for _ in range(10):
+                print(next(fib))
+        Expecting:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        **********************************************************************
+        Line 3, in q6 1
+        Failed example:
+            for _ in range(10):
+                print(next(fib))
+        Exception raised:
+            Traceback (most recent call last):
+              File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
+                exec(compile(example.source, filename, "single",
+              File "<doctest q6 1[1]>", line 2, in <module>
+                print(next(fib))
+            StopIteration
+
+q7 results: All test cases passed!