smashub · jonnybluesman · Mar 2, 2023 · Dec 13, 2022 · Dec 14, 2022 · Feb 20, 2023
diff --git a/LICENSE.md b/LICENSE.md
@@ -0,0 +1 @@
+ChoCo follows a dual licence scheme to comply with the data sharing strategies of the original collections. Data and code are released under the  [Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/), with the exception of data derived from *Chordify Annotator Subjectivity Dataset*, *Mozart Piano Sonata*, and *Jazz Audio-Aligned Harmony* data. The latter are released under the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).
diff --git a/README.md b/README.md
@@ -202,10 +202,9 @@ We thank all the annotators for contributing to the project. This project is an
 
 ## License
 
-![](https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png)
-
-This work is licensed under a [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/).
+ChoCo follows a dual licence scheme to comply with the data sharing strategies of the original collections. Data and code are released under the  [Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/), with the exception of data derived from *Chordify Annotator Subjectivity Dataset*, *Mozart Piano Sonata*, and *Jazz Audio-Aligned Harmony* data. The latter are released under the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).
 
+---
 
 ## References
 

diff --git a/assets/survey/chord_data_survey.csv b/assets/survey/chord_data_survey.csv
diff --git a/assets/survey/survey_background_b.pdf b/assets/survey/survey_background_b.pdf
diff --git a/assets/survey/survey_background_dtype.pdf b/assets/survey/survey_background_dtype.pdf
diff --git a/assets/survey/survey_evidence.pdf b/assets/survey/survey_evidence.pdf
diff --git a/assets/survey/survey_results.ipynb b/assets/survey/survey_results.ipynb
diff --git a/choco/jams_score.py b/choco/jams_score.py
@@ -6,6 +6,7 @@
 from typing import Union
 
 import jams
+import music21
 import numpy as np
 
 logger = logging.getLogger("choco.jams_score")
@@ -15,6 +16,20 @@ class UnexpectedOffsetType(Exception):
     """Raised when the offset type cannot be inferred"""
     pass
 
+class InconsistentMetricalAnnotation(Exception):
+    """Raised when a JAMS contains inconsistent metrical annotations"""
+    pass
+
+def to_jams_timesignature(time_signature_str):
+    """
+    Converts a string time signature into a namespace-specific annotation value
+    which is returned as a dictionary. This is specific to `timesig`.
+    """
+    # First create a time signature object via M21
+    m21_timesig = music21.meter.TimeSignature(time_signature_str)
+    # We can now create the annotation object from the global time signature
+    return {"numerator": m21_timesig.numerator,
+            "denominator": m21_timesig.denominator}
 
 def encode_metrical_onset(measure, offset, offset_type="auto"):
     """
@@ -65,9 +80,10 @@ def encode_metrical_onset(measure, offset, offset_type="auto"):
     return float(measure) + offset
 
 
-def append_listed_annotation(jams_object:jams.JAMS, namespace:str,
-    ann_listed:list, offset_type='auto', ann_start:float=1.1,
-    ann_duration: Union[float, str]=None, confidence=1., reversed=False):
+def append_listed_annotation(jams_object: jams.JAMS, namespace: str,
+    ann_listed: list, offset_type='auto', value_fn=lambda x: x,
+    ann_start:float=1.1, ann_duration: Union[float, str]=None, 
+    confidence=1., reversed=False):
     """
     Append a score annotation encoded as a list of score observations, each
     providing information of [measure, offset, metrical duration, value], where
@@ -123,7 +139,7 @@ def append_listed_annotation(jams_object:jams.JAMS, namespace:str,
             time=encode_metrical_onset(measure, offset, offset_type),
             duration=duration,  # duration always expected in quarter beats
             confidence=confidence,
-            value=value
+            value=value_fn(value)
         )
 
     # Add namespace annotation to jam file
@@ -136,3 +152,69 @@ def infer_duration(jams_object: jams.JAMS, append_meta=False):
     Infer the duration of a piece from the longest annotation.
     """
     raise NotImplementedError
+
+
+def create_timesig_annotation(timesig: str, duration: int, jam: jams.JAMS = None):
+    """
+    Create a time signature JAMS annotation from a global time signature,
+    given as a string, and the expected duration of the piece / annotation,
+    given as the number of measures. If a JAMS onject is provided, the new
+    annotation will be appended to it.
+
+    Parameters
+    ----------
+    timesign : str
+        A string encoding the global time signature to consider.
+     duration : int
+        Duration of the piece / annotation expressed in no. of measures.
+    jam : jams.JAMS
+        A JAMS file that will be optionally extended with the new annotation.
+
+    Returns
+    -------
+    timesig_ann : jams.Annotation
+        The new annotation of the global time signature in the piece.
+
+    Notes
+    -----
+    - This method does too many things at the moment, like adding metadata.
+
+    """
+    # First create a time signature object via M21
+    m21_timesig = music21.meter.TimeSignature(timesig)
+    beats_per_measure = m21_timesig.beatCount
+    dur_in_beats = beats_per_measure * duration
+    # We can now create the annotation object from the global time signature
+    timesig_ann = jams.Annotation(namespace="timesig", duration=dur_in_beats)
+    timesig_ann.append(time=1, duration=dur_in_beats, confidence=1.,
+                       value={"numerator": m21_timesig.numerator,
+                              "denominator": m21_timesig.denominator})
+
+    if jam:  # updating the JAMS object, if given
+        # The new duration is now expressed in beats. Note that, if another
+        # duration was specified before, this will be overridden now.
+        jam.file_metadata.duration = duration * beats_per_measure
+        jam.annotations.append(timesig_ann)
+
+    return timesig_ann
+
+
+def retrieve_global_timesig(jam: jams.JAMS):
+    """
+    Returns the global time signature, if present, as a `music21` object.
+    """
+    timesig_anns = jam.search(namespace="timesig")
+    if len(timesig_anns) == 0:
+        logger.info("No time signature found in the given JAMS.")
+        return None # still regular behaviour
+
+    gtimesig_ann = [ts_ann for ts_ann in timesig_anns if len(ts_ann.data) == 1]
+    if len(gtimesig_ann) > 1:  # sanity check on the global time signature 
+        raise InconsistentMetricalAnnotation(
+            f"Expected 1 global time signature, {len(gtimesig_ann)} found!")
+
+    gtimesig = gtimesig_ann[0].data[0].value
+    gtimesig = music21.meter.TimeSignature(
+        f"{gtimesig['numerator']}/{gtimesig['denominator']}")
+
+    return gtimesig
diff --git a/choco/namespaces/__init__.py b/choco/namespaces/__init__.py
@@ -3,9 +3,13 @@
 
 ns_dir = os.path.dirname(os.path.abspath(__file__))
 
+# New chord namespaces
 jams.schema.add_namespace(os.path.join(ns_dir, "chord_ireal.json"))
 jams.schema.add_namespace(os.path.join(ns_dir, "chord_jparser_harte.json"))
 jams.schema.add_namespace(os.path.join(ns_dir, "chord_jparser_functional.json"))
 jams.schema.add_namespace(os.path.join(ns_dir, "chord_m21_leadsheet.json"))
 jams.schema.add_namespace(os.path.join(ns_dir, "chord_m21_abc.json"))
 jams.schema.add_namespace(os.path.join(ns_dir, "chord_weimar.json"))
+
+# New symbolic namespaces
+jams.schema.add_namespace(os.path.join(ns_dir, "timesig.json"))
diff --git a/choco/namespaces/timesig.json b/choco/namespaces/timesig.json
@@ -0,0 +1,20 @@
+{"timesig":
+    {
+        "value": {
+            "type": "object",
+            "properties": {
+                "numerator": {
+                    "type": "number",
+                    "minimum": 1
+                },
+                "denominator": {
+                    "type": "number",
+                    "minimum": 1
+                }
+            },
+            "required": ["numerator", "denominator"]
+        },
+        "dense": false,
+        "description": "Time signatures as annotated symbolic content."
+    }
+}
diff --git a/choco/parsers/dcmlab_parser.py b/choco/parsers/dcmlab_parser.py
@@ -13,6 +13,26 @@
 
 import music21
 
+def compress_annotation(annotation):
+    """
+    Creates a new annotation where consecutive observations with the same value
+    are collapsed into a single observation with accumulated durations.
+
+    Parameters
+    ----------
+    annotation : list of list
+        A raw annotation containing [measure, beat, duration, value] records.
+
+    """
+    collapsed_annotation = [annotation[0]]
+    for observation in annotation[1:]:
+        if observation[-1] == collapsed_annotation[-1][-1]:
+            collapsed_annotation[-1][-2] += observation[-2]
+        else:  # this is a novel observation to insert
+            collapsed_annotation.append(observation)
+
+    return collapsed_annotation
+
 
 def process_dcmlab_record(annotation_df: pd.DataFrame):
     """
@@ -81,9 +101,9 @@ def process_dcmlab_record(annotation_df: pd.DataFrame):
     lstack = lambda x,y: [x_i + x_j for x_i, x_j in zip(x,y)]
     stack_time = partial(lstack, x=timing_info)
 
-    local_keys = stack_time(y=local_keys)
     chords_roman = stack_time(y=chords_roman)
     chords_numeral = stack_time(y=chords_numeral)
-    time_signatures = stack_time(y=time_signatures)
+    local_keys = compress_annotation(stack_time(y=local_keys))
+    time_signatures = compress_annotation(stack_time(y=time_signatures))
 
     return chords_roman, chords_numeral, time_signatures, local_keys
diff --git a/choco/parsers/harm_parser.py b/choco/parsers/harm_parser.py
@@ -148,5 +148,7 @@ def process_multiline_annotation(annotation):
         hartelike_ann.append([measure, beat, duration, chord_label])
         romanlike_ann.append([measure, beat, duration,
                               f"{key}:{chord_roman_desc}"])
+
+    time_signature_incomplete = [1, 1, bpb*hartelike_ann[-1][0], f"{bpb}/"]
 
-    return hartelike_ann, romanlike_ann, key_ann
+    return hartelike_ann, romanlike_ann, key_ann, time_signature_incomplete
diff --git a/choco/parsers/instances.py b/choco/parsers/instances.py
@@ -539,11 +539,12 @@ def parse_schubert_winterreise(annotation_paths, out_dir, format, dataset_name,
         # dataframe containing summative (global) annotations for all pieces.
         q = {"WorkID": meta["score_file"], "PerformanceID": meta["release_id"]} \
             if format == "audio" else {"WorkID": meta["score_file"]}
+        timesig = meta["timesign"] if format=="score" else None
 
         jam = process_text_annotation_multi(
             namespace_sources, schubert_namespace_mapping,
             ignore_annotations=schubert_ignore_namespaces,
-            sum_query=q, duration=meta["duration"])
+            sum_query=q, duration=meta["duration"], timesig=timesig)
         metadata_entry["jams_path"] = os.path.join(
             jams_dir, metadata_entry["id"] + ".jams")
         # Injecting the metadata in the JAMS files
@@ -1450,7 +1451,7 @@ def parse_wheninrome(dataset_dir, out_dir, dataset_name, **kwargs):
             "subset": dataset,
             "collection": collection,
             "movement": mov,
-            "duration": inscore_meta["duration_m"],
+            "duration": inscore_meta["duration_beats"],
             "file_path": romant_analysis,
             "jams_path": None
         }
@@ -1751,7 +1752,7 @@ def parse_jazzcorpus(dataset_dir, out_dir, dataset_name, **kwargs):
             "jams_path": None,
         }
 
-        hartelike_ann, romanlike_ann, key_ann = \
+        hartelike_ann, romanlike_ann, key_ann, ts_incomp = \
             process_multiline_annotation(multiline_ann)
         jam = jams.JAMS()  # incremental JAMS constructions
         jams_score.append_listed_annotation(
@@ -1760,10 +1761,13 @@ def parse_jazzcorpus(dataset_dir, out_dir, dataset_name, **kwargs):
             jam, "chord_jparser_functional", romanlike_ann)
         jams_score.append_listed_annotation(
             jam, "key_mode", key_ann)
+        jam.annotations.append(jams.Annotation("timesig", data=[
+            jams.Observation(1, 1, {"numerator": ts_incomp[-1][0],
+                                    "denominator": None}, 1)]))
 
         jams_utils.register_jams_meta(
             jam, jam_type="score", genre="jazz",
-            duration=hartelike_ann[-1][0])
+            duration=ts_incomp[-2])
         jams_utils.register_annotation_meta(jam,
             annotator_name="Mark Granroth-Wilding",
             annotator_type="expert_human",
@@ -1850,7 +1854,7 @@ def parse_mozartsonatas(dataset_dir, out_dir, dataset_name, track_meta, **kwargs
             expanded=True,  # playthrough
             title=choco_meta["title"],
             composers=choco_meta["composers"],
-            duration=meta["duration_m"],
+            duration=meta["duration_beats"],
             identifiers={"musicbrainz": corpus_meta["musicbrainz"],
                          "wikidata": corpus_meta["wikidata"],
                          "imslp": corpus_meta["imslp"]},

diff --git a/choco/parsers/ireal_parser.py b/choco/parsers/ireal_parser.py
@@ -20,7 +20,7 @@
 
 
 from jams_utils import register_jams_meta, register_annotation_meta
-from jams_score import append_listed_annotation
+from jams_score import append_listed_annotation, to_jams_timesignature
 from ireal_db import iRealDatabaseHandler
 from utils import create_dir, pad_substring
 
@@ -558,17 +558,18 @@ def extract_annotations_from_tune(tune: ChoCoTune):
     beat_duration = measure_beats*len(measures)
 
     chords = []  # iterating and timing chords
-    for m, measure in enumerate(measures):
+    for m, measure in enumerate(measures, 1):
         measure_chords = measure.split()
         chord_dur = measure_beats / len(measure_chords)
         # Creating equal onsets depending on within-measure chords and beats
         onsets = np.cumsum([0]+[d for d in (len(measure_chords)-1)*[chord_dur]])
         chords += [[m, o, chord_dur, c] for o, c in zip(onsets, measure_chords)]
     # Encapsulating key information as a single annotation
     assert len(tune.key.split()) == 1, "Single key assumed for iReal tunes"
-    keys = [[0, 0, beat_duration, tune.key]]
-
-    return chords, keys
+    keys = [[1, 1, beat_duration, tune.key]]
+    time_signatures = f"{tune.time_signature[0]}/{tune.time_signature[1]}"
+    time_signatures = [[1, 1, beat_duration, time_signatures]]
+    return chords, keys, time_signatures
 
 
 def jamify_ireal_tune(tune:ChoCoTune):
@@ -591,7 +592,7 @@ def jamify_ireal_tune(tune:ChoCoTune):
     """
     jam = jams.JAMS()
     tune_meta = extract_metadata_from_tune(tune)
-    chords, keys = extract_annotations_from_tune(tune)
+    chords, keys, time_signatures = extract_annotations_from_tune(tune)
 
     register_jams_meta(
         jam, jam_type="score",
@@ -601,9 +602,11 @@ def jamify_ireal_tune(tune:ChoCoTune):
         duration=chords[-1][0]+1,
         genre=tune_meta["genre"],
     )
-    jam.sandbox["tempo"] = tune_meta["tempo"]  # extra metadata
+    jam.sandbox["tempo"] = tune_meta["tempo"]  # XXX should be annotation
     append_listed_annotation(jam, "chord_ireal", chords, offset_type="beat")
     append_listed_annotation(jam, "key_mode", keys, offset_type="beat")
+    append_listed_annotation(jam, "timesig", time_signatures,
+                             offset_type="beat", value_fn=to_jams_timesignature)
 
     return tune_meta, jam