schubert-winterreise metrical re-eng #99

smashub · Dec 14, 2022 · c8b4c52 · c8b4c52
1 parent 421e984
commit c8b4c52
Show file tree

Hide file tree

Showing 269 changed files with 37,490 additions and 36,614 deletions.
diff --git a/choco/jams_score.py b/choco/jams_score.py
@@ -6,6 +6,7 @@
 from typing import Union
 
 import jams
+import music21
 import numpy as np
 
 logger = logging.getLogger("choco.jams_score")
@@ -15,6 +16,9 @@ class UnexpectedOffsetType(Exception):
     """Raised when the offset type cannot be inferred"""
     pass
 
+class InconsistentMetricalAnnotation(Exception):
+    """Raised when a JAMS contains inconsistent metrical annotations"""
+    pass
 
 def encode_metrical_onset(measure, offset, offset_type="auto"):
     """
@@ -136,3 +140,65 @@ def infer_duration(jams_object: jams.JAMS, append_meta=False):
     Infer the duration of a piece from the longest annotation.
     """
     raise NotImplementedError
+
+
+def create_timesig_annotation(timesig: str, duration: int, jam: jams.JAMS = None):
+    """
+    Create a time signature JAMS annotation from a global time signature,
+    given as a string, and the expected duration of the piece / annotation,
+    given as the number of measures. If a JAMS onject is provided, the new
+    annotation will be appended to it.
+
+    Parameters
+    ----------
+    timesign : str
+        A string encoding the global time signature to consider.
+     duration : int
+        Duration of the piece / annotation expressed in no. of measures.
+    jam : jams.JAMS
+        A JAMS file that will be optionally extended with the new annotation.
+    
+    Returns
+    -------
+    timesig_ann : jams.Annotation
+        The new annotation of the global time signature in the piece.
+
+    """
+    # First create a time signature object via M21
+    m21_timesig = music21.meter.TimeSignature(timesig)
+    beats_per_measure = m21_timesig.beatCount
+    dur_in_beats = beats_per_measure * duration
+    # We can now create the annotation object from the global time signature
+    timesig_ann = jams.Annotation(namespace="timesig", duration=dur_in_beats)
+    timesig_ann.append(time=1, duration=dur_in_beats, confidence=1.,
+                       value={"numerator": m21_timesig.numerator,
+                              "denominator": m21_timesig.denominator})
+
+    if jam:  # updating the JAMS object, if given
+        # The new duration is now expressed in beats. Note that, if another
+        # duration was specified before, this will be overridden now.
+        jam.file_metadata.duration = duration * beats_per_measure
+        jam.annotations.append(timesig_ann)
+
+    return timesig_ann
+
+
+def retrieve_global_timesig(jam: jams.JAMS):
+    """
+    Returns the global time signature, if present, as a `music21` object.
+    """
+    timesig_anns = jam.search(namespace="timesig")
+    if len(timesig_anns) == 0:
+        logger.info("No time signature found in the given JAMS.")
+        return None # still regular behaviour
+
+    gtimesig_ann = [ts_ann for ts_ann in timesig_anns if len(ts_ann.data) == 1]
+    if len(gtimesig_ann) > 1:  # sanity check on the global time signature 
+        raise InconsistentMetricalAnnotation(
+            f"Expected 1 global time signature, {len(gtimesig_ann)} found!")
+
+    gtimesig = gtimesig_ann[0].data[0].value
+    gtimesig = music21.meter.TimeSignature(
+        f"{gtimesig['numerator']}/{gtimesig['denominator']}")
+
+    return gtimesig
diff --git a/choco/parsers/instances.py b/choco/parsers/instances.py
@@ -539,11 +539,12 @@ def parse_schubert_winterreise(annotation_paths, out_dir, format, dataset_name,
         # dataframe containing summative (global) annotations for all pieces.
         q = {"WorkID": meta["score_file"], "PerformanceID": meta["release_id"]} \
             if format == "audio" else {"WorkID": meta["score_file"]}
+        timesig = meta["timesign"] if format=="score" else None
 
         jam = process_text_annotation_multi(
             namespace_sources, schubert_namespace_mapping,
             ignore_annotations=schubert_ignore_namespaces,
-            sum_query=q, duration=meta["duration"])
+            sum_query=q, duration=meta["duration"], timesig=timesig)
         metadata_entry["jams_path"] = os.path.join(
             jams_dir, metadata_entry["id"] + ".jams")
         # Injecting the metadata in the JAMS files

diff --git a/choco/parsers/multifile_parser.py b/choco/parsers/multifile_parser.py
@@ -19,12 +19,18 @@
 
 sys.path.append(os.path.dirname(os.getcwd()))
 
+from jams_score import create_timesig_annotation, retrieve_global_timesig
+
 logger = logging.getLogger("choco.parsers.multifile_parser")
 
 
 def process_summative_annotation(summative_anns, namespace_mapping, sum_query,
     jams_tmp=None, duration=None, sep=";", confidence=1.):
     """
+    Append a new summative annotation to the given JAMS file, by retrieving it
+    from a CSV file using the query specified (needed to find the row that
+    corresponds to the specific piece under analysis). As this is a summative
+    annotation, the duration spans throughout the whole piece. 
     
     Parameters
     ----------
@@ -48,6 +54,7 @@ def process_summative_annotation(summative_anns, namespace_mapping, sum_query,
         The separator to consider for the raw text files to read.
     default_confidence : float
         A float in (0, 1] indicating the confidence/reliability of annotations.
+
     """
     if not isinstance(summative_anns, pd.DataFrame):
         summative_anns = pd.read_csv(summative_anns, sep=sep)
@@ -103,20 +110,41 @@ def process_text_annotation(annotation_file, namespace_mapping, jams_tmp=None,
 
     """
     # Create a fresh new JAMS object if this is the first annotation
+    # XXX This should not happen from the way this method is called
     jam = jams.JAMS() if jams_tmp is None else jams_tmp
 
     annotation_df = pd.read_csv(annotation_file, sep=sep)
     # Renaming columns to match the namespace for the JAMS file
     annotation_df = annotation_df.rename(columns=namespace_mapping)
     # Check whether duration should be inferred from offsets or nullified
     if "end" in annotation_df.columns and "duration" not in annotation_df.columns:
+        # Smooth out the annotation start and end times by rounding
+        annotation_df["start"] = annotation_df["start"].round(2)
+        annotation_df["end"] = annotation_df["end"].round(2)
+        # Compute the duration as the difference between end and start times
         annotation_df["duration"] = annotation_df["end"] - annotation_df["start"]
     elif "end" not in annotation_df.columns:  # duration is assumed as null
+        logger.warning(f"Duration defaulted to 0 for {annotation_file}")
         annotation_df["duration"] = 0.0
     # Check whether confidence is not provided and should be defaulted
     if "confidence" not in annotation_df.columns:
         annotation_df["confidence"] = confidence
-
+
+    # Checking if this is a symbolic annotation, for re-engineering timings
+    if jam.search(namespace="timesig"):
+        gtimesig = retrieve_global_timesig(jam)
+        beats_per_measure = gtimesig.beatCount
+        # Split the start timings to obtain measure and measure offsets
+        start_measures = annotation_df["start"].astype(int)
+        start_offsets = annotation_df["start"] - start_measures
+        # From measure offsets (0.5) to beat offsets (2 in 4/4)
+        new_start_offsets = start_offsets * beats_per_measure
+        new_durations = annotation_df["duration"] * beats_per_measure
+        assert all(start_offsets < 10), "More than 9 beats in measure"
+        # Finally, we can override the start and duration columns
+        annotation_df["start"] = start_measures + new_start_offsets / 10
+        annotation_df["duration"] = new_durations
+
     inner_namespaces = [cname for cname in annotation_df.columns
         if cname not in ["start", "end", "duration", "confidence"]]
     inner_namespaces = [namespace for namespace in inner_namespaces \
@@ -140,9 +168,10 @@ def process_text_annotation(annotation_file, namespace_mapping, jams_tmp=None,
 
 
 def process_text_annotation_multi(namespace_sources, namespace_mapping,
-    sum_query=None, ignore_annotations=[], sep=";", duration=None, confidence=1.):
+    sum_query=None, ignore_annotations=[], sep=";", duration=None, timesig=None,
+    confidence=1.):
     """
-    Parse annotation data from different sources (fodlers, files) containing
+    Parse annotation data from different sources (folders, files) containing
     music annotations of different properties but related to the same pieces.
 
     Parameters
@@ -157,11 +186,20 @@ def process_text_annotation_multi(namespace_sources, namespace_mapping,
     namespace_mapping : dict
         A dictionary mapping dataset-specific annotation names to actual JAMS
         namespaces (e.g. shorthand to chord_harte).
+    sum_query : dict
+        A query to search piece-specific content from summative annotations.
     ignore_annotations : list
         A list of annotations that should not be converted into a namespace.
     sep : str
         The separator to consider for the raw text files to read.
-    default_confidence : float
+    duration : float
+        Duration of the piece in measures (score) or seconds (audio). Note that
+        if the former is given, this will be converted in beats and encoded
+        in the resulting JAMS file accordingly (depending on `timesig`).
+    timesig : str
+        This parameter should be provided only when parsing symbolic annotations
+        and represents the global time signature as a string (e.g. 4/4).
+    confidence : float
         A float in (0, 1] indicating the confidence/reliability of annotations.
 
     Returns
@@ -178,6 +216,9 @@ def process_text_annotation_multi(namespace_sources, namespace_mapping,
     """
     jam = jams.JAMS()  # start creating the JAMS file
     jam.file_metadata.duration = duration  # needed for summative annotations
+    if timesig:  # parameter is provided, hence, score annotation to process
+        # This is going to add a new time signature annotation and update dur
+        create_timesig_annotation(timesig, duration, jam)
 
     for general_namespace, annotation_files in namespace_sources.items():
         # Check whether the annotation is summative or not