Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regenerated symbolic partitions #100

Merged
merged 17 commits into from
Mar 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
1 change: 1 addition & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ChoCo follows a dual licence scheme to comply with the data sharing strategies of the original collections. Data and code are released under the [Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/), with the exception of data derived from *Chordify Annotator Subjectivity Dataset*, *Mozart Piano Sonata*, and *Jazz Audio-Aligned Harmony* data. The latter are released under the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,9 @@ We thank all the annotators for contributing to the project. This project is an

## License

![](https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png)

This work is licensed under a [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/).
ChoCo follows a dual licence scheme to comply with the data sharing strategies of the original collections. Data and code are released under the [Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/), with the exception of data derived from *Chordify Annotator Subjectivity Dataset*, *Mozart Piano Sonata*, and *Jazz Audio-Aligned Harmony* data. The latter are released under the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).

---

## References

Expand Down
80 changes: 54 additions & 26 deletions assets/survey/chord_data_survey.csv

Large diffs are not rendered by default.

Binary file modified assets/survey/survey_background_b.pdf
Binary file not shown.
Binary file modified assets/survey/survey_background_dtype.pdf
Binary file not shown.
Binary file modified assets/survey/survey_evidence.pdf
Binary file not shown.
151 changes: 85 additions & 66 deletions assets/survey/survey_results.ipynb

Large diffs are not rendered by default.

90 changes: 86 additions & 4 deletions choco/jams_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Union

import jams
import music21
import numpy as np

logger = logging.getLogger("choco.jams_score")
Expand All @@ -15,6 +16,20 @@ class UnexpectedOffsetType(Exception):
"""Raised when the offset type cannot be inferred"""
pass

class InconsistentMetricalAnnotation(Exception):
"""Raised when a JAMS contains inconsistent metrical annotations"""
pass

def to_jams_timesignature(time_signature_str):
"""
Converts a string time signature into a namespace-specific annotation value
which is returned as a dictionary. This is specific to `timesig`.
"""
# First create a time signature object via M21
m21_timesig = music21.meter.TimeSignature(time_signature_str)
# We can now create the annotation object from the global time signature
return {"numerator": m21_timesig.numerator,
"denominator": m21_timesig.denominator}

def encode_metrical_onset(measure, offset, offset_type="auto"):
"""
Expand Down Expand Up @@ -65,9 +80,10 @@ def encode_metrical_onset(measure, offset, offset_type="auto"):
return float(measure) + offset


def append_listed_annotation(jams_object:jams.JAMS, namespace:str,
ann_listed:list, offset_type='auto', ann_start:float=1.1,
ann_duration: Union[float, str]=None, confidence=1., reversed=False):
def append_listed_annotation(jams_object: jams.JAMS, namespace: str,
ann_listed: list, offset_type='auto', value_fn=lambda x: x,
ann_start:float=1.1, ann_duration: Union[float, str]=None,
confidence=1., reversed=False):
"""
Append a score annotation encoded as a list of score observations, each
providing information of [measure, offset, metrical duration, value], where
Expand Down Expand Up @@ -123,7 +139,7 @@ def append_listed_annotation(jams_object:jams.JAMS, namespace:str,
time=encode_metrical_onset(measure, offset, offset_type),
duration=duration, # duration always expected in quarter beats
confidence=confidence,
value=value
value=value_fn(value)
)

# Add namespace annotation to jam file
Expand All @@ -136,3 +152,69 @@ def infer_duration(jams_object: jams.JAMS, append_meta=False):
Infer the duration of a piece from the longest annotation.
"""
raise NotImplementedError


def create_timesig_annotation(timesig: str, duration: int, jam: jams.JAMS = None):
"""
Create a time signature JAMS annotation from a global time signature,
given as a string, and the expected duration of the piece / annotation,
given as the number of measures. If a JAMS onject is provided, the new
annotation will be appended to it.

Parameters
----------
timesign : str
A string encoding the global time signature to consider.
duration : int
Duration of the piece / annotation expressed in no. of measures.
jam : jams.JAMS
A JAMS file that will be optionally extended with the new annotation.

Returns
-------
timesig_ann : jams.Annotation
The new annotation of the global time signature in the piece.

Notes
-----
- This method does too many things at the moment, like adding metadata.

"""
# First create a time signature object via M21
m21_timesig = music21.meter.TimeSignature(timesig)
beats_per_measure = m21_timesig.beatCount
dur_in_beats = beats_per_measure * duration
# We can now create the annotation object from the global time signature
timesig_ann = jams.Annotation(namespace="timesig", duration=dur_in_beats)
timesig_ann.append(time=1, duration=dur_in_beats, confidence=1.,
value={"numerator": m21_timesig.numerator,
"denominator": m21_timesig.denominator})

if jam: # updating the JAMS object, if given
# The new duration is now expressed in beats. Note that, if another
# duration was specified before, this will be overridden now.
jam.file_metadata.duration = duration * beats_per_measure
jam.annotations.append(timesig_ann)

return timesig_ann


def retrieve_global_timesig(jam: jams.JAMS):
"""
Returns the global time signature, if present, as a `music21` object.
"""
timesig_anns = jam.search(namespace="timesig")
if len(timesig_anns) == 0:
logger.info("No time signature found in the given JAMS.")
return None # still regular behaviour

gtimesig_ann = [ts_ann for ts_ann in timesig_anns if len(ts_ann.data) == 1]
if len(gtimesig_ann) > 1: # sanity check on the global time signature
raise InconsistentMetricalAnnotation(
f"Expected 1 global time signature, {len(gtimesig_ann)} found!")

gtimesig = gtimesig_ann[0].data[0].value
gtimesig = music21.meter.TimeSignature(
f"{gtimesig['numerator']}/{gtimesig['denominator']}")

return gtimesig
4 changes: 4 additions & 0 deletions choco/namespaces/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@

ns_dir = os.path.dirname(os.path.abspath(__file__))

# New chord namespaces
jams.schema.add_namespace(os.path.join(ns_dir, "chord_ireal.json"))
jams.schema.add_namespace(os.path.join(ns_dir, "chord_jparser_harte.json"))
jams.schema.add_namespace(os.path.join(ns_dir, "chord_jparser_functional.json"))
jams.schema.add_namespace(os.path.join(ns_dir, "chord_m21_leadsheet.json"))
jams.schema.add_namespace(os.path.join(ns_dir, "chord_m21_abc.json"))
jams.schema.add_namespace(os.path.join(ns_dir, "chord_weimar.json"))

# New symbolic namespaces
jams.schema.add_namespace(os.path.join(ns_dir, "timesig.json"))
20 changes: 20 additions & 0 deletions choco/namespaces/timesig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{"timesig":
{
"value": {
"type": "object",
"properties": {
"numerator": {
"type": "number",
"minimum": 1
},
"denominator": {
"type": "number",
"minimum": 1
}
},
"required": ["numerator", "denominator"]
},
"dense": false,
"description": "Time signatures as annotated symbolic content."
}
}
24 changes: 22 additions & 2 deletions choco/parsers/dcmlab_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@

import music21

def compress_annotation(annotation):
"""
Creates a new annotation where consecutive observations with the same value
are collapsed into a single observation with accumulated durations.

Parameters
----------
annotation : list of list
A raw annotation containing [measure, beat, duration, value] records.

"""
collapsed_annotation = [annotation[0]]
for observation in annotation[1:]:
if observation[-1] == collapsed_annotation[-1][-1]:
collapsed_annotation[-1][-2] += observation[-2]
else: # this is a novel observation to insert
collapsed_annotation.append(observation)

return collapsed_annotation


def process_dcmlab_record(annotation_df: pd.DataFrame):
"""
Expand Down Expand Up @@ -81,9 +101,9 @@ def process_dcmlab_record(annotation_df: pd.DataFrame):
lstack = lambda x,y: [x_i + x_j for x_i, x_j in zip(x,y)]
stack_time = partial(lstack, x=timing_info)

local_keys = stack_time(y=local_keys)
chords_roman = stack_time(y=chords_roman)
chords_numeral = stack_time(y=chords_numeral)
time_signatures = stack_time(y=time_signatures)
local_keys = compress_annotation(stack_time(y=local_keys))
time_signatures = compress_annotation(stack_time(y=time_signatures))

return chords_roman, chords_numeral, time_signatures, local_keys
4 changes: 3 additions & 1 deletion choco/parsers/harm_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,5 +148,7 @@ def process_multiline_annotation(annotation):
hartelike_ann.append([measure, beat, duration, chord_label])
romanlike_ann.append([measure, beat, duration,
f"{key}:{chord_roman_desc}"])

time_signature_incomplete = [1, 1, bpb*hartelike_ann[-1][0], f"{bpb}/"]

return hartelike_ann, romanlike_ann, key_ann
return hartelike_ann, romanlike_ann, key_ann, time_signature_incomplete
14 changes: 9 additions & 5 deletions choco/parsers/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,11 +539,12 @@ def parse_schubert_winterreise(annotation_paths, out_dir, format, dataset_name,
# dataframe containing summative (global) annotations for all pieces.
q = {"WorkID": meta["score_file"], "PerformanceID": meta["release_id"]} \
if format == "audio" else {"WorkID": meta["score_file"]}
timesig = meta["timesign"] if format=="score" else None

jam = process_text_annotation_multi(
namespace_sources, schubert_namespace_mapping,
ignore_annotations=schubert_ignore_namespaces,
sum_query=q, duration=meta["duration"])
sum_query=q, duration=meta["duration"], timesig=timesig)
metadata_entry["jams_path"] = os.path.join(
jams_dir, metadata_entry["id"] + ".jams")
# Injecting the metadata in the JAMS files
Expand Down Expand Up @@ -1450,7 +1451,7 @@ def parse_wheninrome(dataset_dir, out_dir, dataset_name, **kwargs):
"subset": dataset,
"collection": collection,
"movement": mov,
"duration": inscore_meta["duration_m"],
"duration": inscore_meta["duration_beats"],
"file_path": romant_analysis,
"jams_path": None
}
Expand Down Expand Up @@ -1751,7 +1752,7 @@ def parse_jazzcorpus(dataset_dir, out_dir, dataset_name, **kwargs):
"jams_path": None,
}

hartelike_ann, romanlike_ann, key_ann = \
hartelike_ann, romanlike_ann, key_ann, ts_incomp = \
process_multiline_annotation(multiline_ann)
jam = jams.JAMS() # incremental JAMS constructions
jams_score.append_listed_annotation(
Expand All @@ -1760,10 +1761,13 @@ def parse_jazzcorpus(dataset_dir, out_dir, dataset_name, **kwargs):
jam, "chord_jparser_functional", romanlike_ann)
jams_score.append_listed_annotation(
jam, "key_mode", key_ann)
jam.annotations.append(jams.Annotation("timesig", data=[
jams.Observation(1, 1, {"numerator": ts_incomp[-1][0],
"denominator": None}, 1)]))

jams_utils.register_jams_meta(
jam, jam_type="score", genre="jazz",
duration=hartelike_ann[-1][0])
duration=ts_incomp[-2])
jams_utils.register_annotation_meta(jam,
annotator_name="Mark Granroth-Wilding",
annotator_type="expert_human",
Expand Down Expand Up @@ -1850,7 +1854,7 @@ def parse_mozartsonatas(dataset_dir, out_dir, dataset_name, track_meta, **kwargs
expanded=True, # playthrough
title=choco_meta["title"],
composers=choco_meta["composers"],
duration=meta["duration_m"],
duration=meta["duration_beats"],
identifiers={"musicbrainz": corpus_meta["musicbrainz"],
"wikidata": corpus_meta["wikidata"],
"imslp": corpus_meta["imslp"]},
Expand Down
17 changes: 10 additions & 7 deletions choco/parsers/ireal_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


from jams_utils import register_jams_meta, register_annotation_meta
from jams_score import append_listed_annotation
from jams_score import append_listed_annotation, to_jams_timesignature
from ireal_db import iRealDatabaseHandler
from utils import create_dir, pad_substring

Expand Down Expand Up @@ -558,17 +558,18 @@ def extract_annotations_from_tune(tune: ChoCoTune):
beat_duration = measure_beats*len(measures)

chords = [] # iterating and timing chords
for m, measure in enumerate(measures):
for m, measure in enumerate(measures, 1):
measure_chords = measure.split()
chord_dur = measure_beats / len(measure_chords)
# Creating equal onsets depending on within-measure chords and beats
onsets = np.cumsum([0]+[d for d in (len(measure_chords)-1)*[chord_dur]])
chords += [[m, o, chord_dur, c] for o, c in zip(onsets, measure_chords)]
# Encapsulating key information as a single annotation
assert len(tune.key.split()) == 1, "Single key assumed for iReal tunes"
keys = [[0, 0, beat_duration, tune.key]]

return chords, keys
keys = [[1, 1, beat_duration, tune.key]]
time_signatures = f"{tune.time_signature[0]}/{tune.time_signature[1]}"
time_signatures = [[1, 1, beat_duration, time_signatures]]
return chords, keys, time_signatures


def jamify_ireal_tune(tune:ChoCoTune):
Expand All @@ -591,7 +592,7 @@ def jamify_ireal_tune(tune:ChoCoTune):
"""
jam = jams.JAMS()
tune_meta = extract_metadata_from_tune(tune)
chords, keys = extract_annotations_from_tune(tune)
chords, keys, time_signatures = extract_annotations_from_tune(tune)

register_jams_meta(
jam, jam_type="score",
Expand All @@ -601,9 +602,11 @@ def jamify_ireal_tune(tune:ChoCoTune):
duration=chords[-1][0]+1,
genre=tune_meta["genre"],
)
jam.sandbox["tempo"] = tune_meta["tempo"] # extra metadata
jam.sandbox["tempo"] = tune_meta["tempo"] # XXX should be annotation
append_listed_annotation(jam, "chord_ireal", chords, offset_type="beat")
append_listed_annotation(jam, "key_mode", keys, offset_type="beat")
append_listed_annotation(jam, "timesig", time_signatures,
offset_type="beat", value_fn=to_jams_timesignature)

return tune_meta, jam

Expand Down
Loading