Skip to content

Commit

Permalink
schubert-winterreise metrical re-eng #99
Browse files Browse the repository at this point in the history
  • Loading branch information
jonnybluesman committed Dec 14, 2022
1 parent 421e984 commit c8b4c52
Show file tree
Hide file tree
Showing 269 changed files with 37,490 additions and 36,614 deletions.
66 changes: 66 additions & 0 deletions choco/jams_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Union

import jams
import music21
import numpy as np

logger = logging.getLogger("choco.jams_score")
Expand All @@ -15,6 +16,9 @@ class UnexpectedOffsetType(Exception):
"""Raised when the offset type cannot be inferred"""
pass

class InconsistentMetricalAnnotation(Exception):
"""Raised when a JAMS contains inconsistent metrical annotations"""
pass

def encode_metrical_onset(measure, offset, offset_type="auto"):
"""
Expand Down Expand Up @@ -136,3 +140,65 @@ def infer_duration(jams_object: jams.JAMS, append_meta=False):
Infer the duration of a piece from the longest annotation.
"""
raise NotImplementedError


def create_timesig_annotation(timesig: str, duration: int, jam: jams.JAMS = None):
"""
Create a time signature JAMS annotation from a global time signature,
given as a string, and the expected duration of the piece / annotation,
given as the number of measures. If a JAMS onject is provided, the new
annotation will be appended to it.
Parameters
----------
timesign : str
A string encoding the global time signature to consider.
duration : int
Duration of the piece / annotation expressed in no. of measures.
jam : jams.JAMS
A JAMS file that will be optionally extended with the new annotation.
Returns
-------
timesig_ann : jams.Annotation
The new annotation of the global time signature in the piece.
"""
# First create a time signature object via M21
m21_timesig = music21.meter.TimeSignature(timesig)
beats_per_measure = m21_timesig.beatCount
dur_in_beats = beats_per_measure * duration
# We can now create the annotation object from the global time signature
timesig_ann = jams.Annotation(namespace="timesig", duration=dur_in_beats)
timesig_ann.append(time=1, duration=dur_in_beats, confidence=1.,
value={"numerator": m21_timesig.numerator,
"denominator": m21_timesig.denominator})

if jam: # updating the JAMS object, if given
# The new duration is now expressed in beats. Note that, if another
# duration was specified before, this will be overridden now.
jam.file_metadata.duration = duration * beats_per_measure
jam.annotations.append(timesig_ann)

return timesig_ann


def retrieve_global_timesig(jam: jams.JAMS):
"""
Returns the global time signature, if present, as a `music21` object.
"""
timesig_anns = jam.search(namespace="timesig")
if len(timesig_anns) == 0:
logger.info("No time signature found in the given JAMS.")
return None # still regular behaviour

gtimesig_ann = [ts_ann for ts_ann in timesig_anns if len(ts_ann.data) == 1]
if len(gtimesig_ann) > 1: # sanity check on the global time signature
raise InconsistentMetricalAnnotation(
f"Expected 1 global time signature, {len(gtimesig_ann)} found!")

gtimesig = gtimesig_ann[0].data[0].value
gtimesig = music21.meter.TimeSignature(
f"{gtimesig['numerator']}/{gtimesig['denominator']}")

return gtimesig
3 changes: 2 additions & 1 deletion choco/parsers/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,11 +539,12 @@ def parse_schubert_winterreise(annotation_paths, out_dir, format, dataset_name,
# dataframe containing summative (global) annotations for all pieces.
q = {"WorkID": meta["score_file"], "PerformanceID": meta["release_id"]} \
if format == "audio" else {"WorkID": meta["score_file"]}
timesig = meta["timesign"] if format=="score" else None

jam = process_text_annotation_multi(
namespace_sources, schubert_namespace_mapping,
ignore_annotations=schubert_ignore_namespaces,
sum_query=q, duration=meta["duration"])
sum_query=q, duration=meta["duration"], timesig=timesig)
metadata_entry["jams_path"] = os.path.join(
jams_dir, metadata_entry["id"] + ".jams")
# Injecting the metadata in the JAMS files
Expand Down
49 changes: 45 additions & 4 deletions choco/parsers/multifile_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@

sys.path.append(os.path.dirname(os.getcwd()))

from jams_score import create_timesig_annotation, retrieve_global_timesig

logger = logging.getLogger("choco.parsers.multifile_parser")


def process_summative_annotation(summative_anns, namespace_mapping, sum_query,
jams_tmp=None, duration=None, sep=";", confidence=1.):
"""
Append a new summative annotation to the given JAMS file, by retrieving it
from a CSV file using the query specified (needed to find the row that
corresponds to the specific piece under analysis). As this is a summative
annotation, the duration spans throughout the whole piece.
Parameters
----------
Expand All @@ -48,6 +54,7 @@ def process_summative_annotation(summative_anns, namespace_mapping, sum_query,
The separator to consider for the raw text files to read.
default_confidence : float
A float in (0, 1] indicating the confidence/reliability of annotations.
"""
if not isinstance(summative_anns, pd.DataFrame):
summative_anns = pd.read_csv(summative_anns, sep=sep)
Expand Down Expand Up @@ -103,20 +110,41 @@ def process_text_annotation(annotation_file, namespace_mapping, jams_tmp=None,
"""
# Create a fresh new JAMS object if this is the first annotation
# XXX This should not happen from the way this method is called
jam = jams.JAMS() if jams_tmp is None else jams_tmp

annotation_df = pd.read_csv(annotation_file, sep=sep)
# Renaming columns to match the namespace for the JAMS file
annotation_df = annotation_df.rename(columns=namespace_mapping)
# Check whether duration should be inferred from offsets or nullified
if "end" in annotation_df.columns and "duration" not in annotation_df.columns:
# Smooth out the annotation start and end times by rounding
annotation_df["start"] = annotation_df["start"].round(2)
annotation_df["end"] = annotation_df["end"].round(2)
# Compute the duration as the difference between end and start times
annotation_df["duration"] = annotation_df["end"] - annotation_df["start"]
elif "end" not in annotation_df.columns: # duration is assumed as null
logger.warning(f"Duration defaulted to 0 for {annotation_file}")
annotation_df["duration"] = 0.0
# Check whether confidence is not provided and should be defaulted
if "confidence" not in annotation_df.columns:
annotation_df["confidence"] = confidence


# Checking if this is a symbolic annotation, for re-engineering timings
if jam.search(namespace="timesig"):
gtimesig = retrieve_global_timesig(jam)
beats_per_measure = gtimesig.beatCount
# Split the start timings to obtain measure and measure offsets
start_measures = annotation_df["start"].astype(int)
start_offsets = annotation_df["start"] - start_measures
# From measure offsets (0.5) to beat offsets (2 in 4/4)
new_start_offsets = start_offsets * beats_per_measure
new_durations = annotation_df["duration"] * beats_per_measure
assert all(start_offsets < 10), "More than 9 beats in measure"
# Finally, we can override the start and duration columns
annotation_df["start"] = start_measures + new_start_offsets / 10
annotation_df["duration"] = new_durations

inner_namespaces = [cname for cname in annotation_df.columns
if cname not in ["start", "end", "duration", "confidence"]]
inner_namespaces = [namespace for namespace in inner_namespaces \
Expand All @@ -140,9 +168,10 @@ def process_text_annotation(annotation_file, namespace_mapping, jams_tmp=None,


def process_text_annotation_multi(namespace_sources, namespace_mapping,
sum_query=None, ignore_annotations=[], sep=";", duration=None, confidence=1.):
sum_query=None, ignore_annotations=[], sep=";", duration=None, timesig=None,
confidence=1.):
"""
Parse annotation data from different sources (fodlers, files) containing
Parse annotation data from different sources (folders, files) containing
music annotations of different properties but related to the same pieces.
Parameters
Expand All @@ -157,11 +186,20 @@ def process_text_annotation_multi(namespace_sources, namespace_mapping,
namespace_mapping : dict
A dictionary mapping dataset-specific annotation names to actual JAMS
namespaces (e.g. shorthand to chord_harte).
sum_query : dict
A query to search piece-specific content from summative annotations.
ignore_annotations : list
A list of annotations that should not be converted into a namespace.
sep : str
The separator to consider for the raw text files to read.
default_confidence : float
duration : float
Duration of the piece in measures (score) or seconds (audio). Note that
if the former is given, this will be converted in beats and encoded
in the resulting JAMS file accordingly (depending on `timesig`).
timesig : str
This parameter should be provided only when parsing symbolic annotations
and represents the global time signature as a string (e.g. 4/4).
confidence : float
A float in (0, 1] indicating the confidence/reliability of annotations.
Returns
Expand All @@ -178,6 +216,9 @@ def process_text_annotation_multi(namespace_sources, namespace_mapping,
"""
jam = jams.JAMS() # start creating the JAMS file
jam.file_metadata.duration = duration # needed for summative annotations
if timesig: # parameter is provided, hence, score annotation to process
# This is going to add a new time signature annotation and update dur
create_timesig_annotation(timesig, duration, jam)

for general_namespace, annotation_files in namespace_sources.items():
# Check whether the annotation is summative or not
Expand Down
Loading

0 comments on commit c8b4c52

Please sign in to comment.