+from collections import Counter, defaultdict, deque
+from dataclasses import asdict, dataclass, replace
+from enum import Enum
+from itertools import groupby, pairwise
+from os import environ
+from typing import Collection, Iterable, Iterator, Optional, Union
+import dataclasses
+import itertools
+import re
+
+from pithub.common import throwing_factory
+
+
+def parse_orchestrations(lines: list[str]) -> Iterator["Orchestration"]:
+ remaining = [l.strip() for l in lines if not l.isspace()]
+ while remaining:
+ if (res := _pop_uninteresting_section(remaining)) is not None:
+ remaining = res
+ else:
+ result = Orchestration.pop_from_lines(remaining)
+ if isinstance(result, Orchestration.ParseFailure):
+ raise RuntimeError(f"Orchestration parsing stuck at line: {result.stuck_line!r}")
+ else:
+ (orchestration, remaining) = result
+ yield orchestration
+
+
+def _fix_bass_part(parts: list["Part"]) -> None:
+ # Does "bass" mean upright or bass guitar? We should be able to find out heuristically.
+ Kind = Enum("Kind", ["BASS", "STRING", "OTHER"])
+
+ def classify(part: Part) -> Kind:
+ if part.name == "Bass":
+ return Kind.BASS
+ elif {*part.required_instruments, *part.optional_instruments} & STRING_INSTRUMENTS:
+ return Kind.STRING
+ else:
+ return Kind.OTHER
+
+ groups = [(kind, list(parts)) for (kind, parts) in groupby(parts, classify)]
+ bass_idxs = [i for (i, (kind, _)) in enumerate(groups) if kind is Kind.BASS]
+ if bass_idxs:
+ [bass_idx] = bass_idxs
+ [bass_part] = groups[bass_idx][1]
+ adjacent_kinds = [
+ groups[i][0] for i in [bass_idx - 1, bass_idx + 1]
+ if 0 <= i < len(groups)
+ ]
+ if not bass_part.required_instruments and not bass_part.optional_instruments:
+ if Kind.STRING in adjacent_kinds:
+ bass_part.required_instruments.insert(0, Instrument.UPRIGHT_BASS)
+ elif bass_part._double:
+ # We don't want to miss e.g. a guitar part calling for acoustic guitar implicitly by
+ # merely stating it doubles electric guitar.
+ assert {Instrument.BASS_GUITAR, Instrument.UPRIGHT_BASS} <= {
+ *bass_part.required_instruments, *bass_part.optional_instruments,
+ }
+
+
+@dataclass
+class Orchestration:
+ @dataclass
+ class ParseFailure:
+ stuck_line: str
+
+ description: str
+ parts: list["Part"]
+
+ @classmethod
+ def pop_from_lines(cls, lines: list[str]) -> tuple["Orchestration", list[str]]|ParseFailure:
+ queue = deque(lines)
+ if "orchestration" in queue[0].lower() or queue[0].endswith(":"):
+ title_line = queue.popleft()
+ title = title_line.removesuffix(":")
+ assert "orchestration" in title.lower() or re.search(r"\d-Piece\b", title)
+ else:
+ title = None
+
+ parts = []
+ optional = False
+ check_piano = False
+ while queue:
+ if queue[0] == "Optional Orchestra Parts:":
+ queue.popleft()
+ optional = True
+ elif (m := re.match(r"\(([^\d]+)\)$", queue[0])) and m[1] in _PART_NOTES:
+ queue.popleft()
+ elif re.match(r"\(Note: .+\)$", queue[0]):
+ queue.popleft()
+ else:
+ if environ.get("LIVE_DEBUG"):
+ result = Part.parse_from_line(queue[0])
+ else:
+ try:
+ result = Part.parse_from_line(queue[0])
+ except Exception:
+ raise RuntimeError(f"Error parsing line: {queue[0]!r}")
+
+ if result is None:
+ break
+ else:
+ queue.popleft()
+ if isinstance(result, list):
+ for v in result:
+ part = v.part if isinstance(v, _ActPart) else v
+ part.optional |= optional
+ parts.append(v)
+ else:
+ assert isinstance(result, IgnoredPart)
+ if result is IgnoredPart.REHEARSAL_PIANO:
+ check_piano = True
+
+ if parts:
+ zipped_parts = []
+ key = lambda v: (
+ isinstance(v, _ActPart),
+ v.part if isinstance(v, _ActPart) else v
+ )
+ for ((act, part), group) in groupby(parts, key):
+ if act:
+ assert len(list(group)) > 1
+ zipped_parts.append(part)
+ else:
+ zipped_parts.extend(group)
+ assert all(isinstance(p, Part) for p in zipped_parts)
+
+ for part in zipped_parts:
+ if Instrument.KEYBOARD in part.instruments():
+ assert isinstance(part, KeyboardPart)
+ # TODO clean this up
+ converted = [p.maybe_convert_down() for p in zipped_parts]
+
+ if check_piano:
+ # We ignore rehearsal-specific parts, but if we see one we expect a real piano
+ # performance part.
+ assert any(isinstance(p, KeyboardPart) for p in converted)
+
+ fixed_parts = deduplicate_keyboard_parts(converted)
+ _fix_bass_part(fixed_parts)
+
+ return (cls(title, fixed_parts), list(queue))
+ else:
+ return cls.ParseFailure(queue[0])
+
+
+@dataclass
+class _ActPart:
+ """
+ Sometimes two parts are listed when one logical part is physically separated by act (in
+ practice, large keyboard parts). We parse these separately but then zip them up, so this tags
+ them internally in the parser interface.
+ """
+ part: "Part"
+
+
+def parts_to_instruments(parts: str) -> list["Instrument"]:
+ ret = list(itertools.chain.from_iterable(
+ parse_instrument(re.sub(r" \d$", "", subpart.removeprefix("AKA ")))
+ for subpart in re.split("/|, | & ", parts)
+ ))
+ assert all(isinstance(v, Instrument) for v in ret)
+ return ret
+
+
+def _is_coverage_note(detail: str) -> bool:
+ num_patt = r"([1-9]*[0-9]a?)"
+ token_patt = fr"{num_patt}(-{num_patt})?"
+ return bool(re.match(fr"Nos\. {token_patt}(, {token_patt})*$", detail))
+
+
+_PART_PATTERN = (
+ r"(?P<count>\d+)(?P<opt> Optional)?"
+ + r" +(?P<part>(?P<instr>[A-Za-z &]+?([ /-][A-Za-z1-2]+){,2}?)( +(?P<nums>\d+(&\d+)*)| [A-D])?)"
+ + r" *(\(((?P<opt2>Optional)|(?P<paren>[^)]+))\))?"
+ + r"( +[–—-] +(?P<list>.+?)(?P<opt3> \(optional\))?| +\((?P<rp>rehearsal|performance)\))?$"
+)
+
+
+@dataclass
+class Part:
+ """
+ Generally this corresponds one-to-one with an entry in an orchestration list (where the entry
+ represents something actually played during a performance), but entries like "Percussion 1 & 2"
+ are expanded into two instances.
+ """
+ name: Optional[str]
+ required_instruments: list[Union["Instrument", "Choice"]]
+ players: Optional[int] = None # None is module internal use only
+ optional: bool = False
+ role: Optional["AdditionalRole"] = None
+ optional_instruments: list["Instrument"] = dataclasses.field(default_factory = list)
+ _double: bool = False # module internal use
+
+ def __post_init__(self) -> None:
+ # remove duplicate instruments from each list
+ self.required_instruments = list(dict.fromkeys(self.required_instruments))
+ self.optional_instruments = list(dict.fromkeys(self.optional_instruments))
+
+ for v in self.instruments():
+ # TODO re-enable these checks when test serialization nonsense is fixed
+ """
+ if isinstance(v, Choice):
+ assert all(isinstance(opt, Instrument) for opt in v.choices)
+ else:
+ assert isinstance(v, Instrument)
+ """
+
+ @classmethod
+ def parse_from_line(cls, line: str) -> Optional[Union[list[Union["Part", "_ActPart"]], "IgnoredPart"]]:
+ # TODO much of this logic is currently Concord-specific and should be factored out into
+ # concord module
+ if re.match(r"\d (Logo Pack|Pre-Show Music Lead Sheets)$", line):
+ return IgnoredPart.OTHER
+ elif match := re.match(_PART_PATTERN, line):
+ is_optional = bool(match["opt"] or match["opt2"] or match["opt3"])
+
+ player_count = int(match["count"])
+ if match["paren"] and (m := re.match(r"(\d+) players?(,? divisi)?$", match["paren"], re.IGNORECASE)):
+ player_count = int(m[1])
+ explicit_player_count = True
+ elif match["paren"] == "one or more players":
+ player_count = 1
+ explicit_player_count = True
+ else:
+ explicit_player_count = False
+
+ if match["nums"]:
+ ordinals = list(map(int, match["nums"].split("&")))
+ else:
+ ordinals = []
+
+ rps = set(filter(None, [match["paren"], match["rp"]])) & {"rehearsal", "performance"}
+ if match["part"] == "Full Score":
+ assert not match["opt"]
+ assert re.match(r"Act \d \(stick conductor\)$", match["list"])
+ return IgnoredPart.CONDUCTOR
+ elif match["part"] in {"Libretto-Vocal Book", "Vocal Book", "Libretti"}:
+ return IgnoredPart.LIBRETTO
+ elif match["instr"] == "Piano-Vocal" and rps <= {"rehearsal"}:
+ # TODO try converting instr check to assertion to tighten rp checking
+ assert not match["opt"]
+ return IgnoredPart.REHEARSAL_PIANO
+ elif part := KeyboardPart.parse(match["part"]):
+ # ignore part count as materials often include extras of these
+ if is_optional:
+ part = replace(part, optional = True)
+ if match["list"]:
+ assert match["list"].lower() in {
+ "doubling celeste",
+ "intended to supplement a small string section",
+ "primarily accordion",
+ "replaces violin, viola & cello",
+ }
+ include = True
+ if match["paren"]:
+ act_part = bool(re.match(r"Act \d$", match["paren"]))
+ if match["paren"] == "rehearsal & stick conductor":
+ include = False
+ else:
+ assert (
+ match["paren"] == "rehearsal & performance"
+ or act_part and match["rp"] == "performance"
+ )
+ else:
+ act_part = False
+ if include:
+ return [_ActPart(part) if act_part else part]
+ else:
+ return []
+ elif player_count == 1:
+ if match["paren"] and not explicit_player_count:
+ assert match["paren"] == "** FOLEY ARTIST FISH"
+ optional_instr = []
+ if match["list"] and not _is_coverage_note(match["list"]):
+ assert not is_optional
+ if match["list"] == "optional part":
+ list_text = ""
+ is_optional = True
+ elif match["list"].startswith("(optional)"):
+ list_text = match["list"].removeprefix("(optional)").strip()
+ is_optional = True
+ else:
+ list_text = match["list"]
+ if list_text.startswith("primarily "):
+ primarily = list_text.removeprefix("primarily ")
+ else:
+ primarily = None
+ if list_text:
+ if match["instr"].lower() in PART_NAMES:
+ context_instr = match["instr"]
+ else:
+ context_instr = parts_to_instruments(match["instr"])[0].name # Concord 93490
+ res = parse_instrument_list(primarily or list_text, context_instr)
+ else:
+ res = PartInstruments(False, [], [])
+ required = res.required
+ optional_instr = res.optional
+ if match["instr"].lower() not in PART_NAMES:
+ from_part = parts_to_instruments(match["instr"])
+ if from_part == [Instrument.KEYBOARD]:
+ required = from_part
+ primarily = None
+ optional_instr = []
+ elif Instrument.GUITAR in from_part:
+ has_specific_instr = any(
+ "GUITAR" in i.name for i in required if i is not Instrument.GUITAR
+ )
+ if not res.double and has_specific_instr:
+ required = [
+ i for i in dict.fromkeys(itertools.chain(from_part, required))
+ if i is not Instrument.GUITAR
+ ]
+ else:
+ required = list(dict.fromkeys(itertools.chain(from_part, required)))
+ else:
+ required = list(dict.fromkeys(itertools.chain(from_part, required)))
+ double = res.double
+ if primarily:
+ assert set(parse_instrument(primarily)) <= {*required, *optional_instr}
+ else:
+ if match["instr"].lower() in PART_NAMES:
+ # non-instrument part with no instrument information
+ required = []
+ else:
+ required = parts_to_instruments(match["part"])
+ double = False
+ return [
+ cls(
+ name = match["part"],
+ required_instruments = required,
+ players = 1,
+ optional_instruments = optional_instr,
+ optional = is_optional,
+ _double = double,
+ )
+ ]
+ elif player_count > 1 and match["part"].endswith("s"):
+ assert not is_optional
+ assert not match["paren"] and not match["list"]
+ clean_part = match["part"].removesuffix("s")
+ required = parse_instrument(clean_part)
+ return [cls(name = clean_part, required_instruments = required, players = player_count)]
+ else:
+ assert not is_optional
+ if match["instr"].lower() in PART_NAMES:
+ assert player_count == 2
+ if len(ordinals) == 2:
+ if match["list"]:
+ sublists = match["list"].split(". ")
+ assert len(sublists) == 2
+ players = {
+ (m := re.match(r"(.+?) Player: (.+)$", sublist))[1]:
+ parse_instrument_list(m[2], match["instr"])
+ for sublist in sublists
+ }
+ optional = match["paren"] and match["paren"].removesuffix(" Player is optional")
+ else:
+ # non-instrument part with no instrument information
+ players = {
+ "{} {}".format(match["instr"], ordinal): PartInstruments(False, [], [])
+ for ordinal in ordinals
+ }
+ optional = None
+ if optional:
+ assert optional in players
+ return [
+ cls(
+ name = "{} {}".format(match["instr"], ordinal),
+ required_instruments = instruments.required,
+ players = 1,
+ optional_instruments = instruments.optional,
+ optional = optional is not None and name == optional,
+ _double = instruments.double,
+ )
+ for (ordinal, (name, instruments)) in zip(ordinals, players.items())
+ ]
+ else:
+ assert not ordinals
+ return [cls(match["part"], required_instruments = [], players = player_count)]
+ elif player_count == len(ordinals) == 2:
+ assert not match["paren"]
+ if match["list"]:
+ # TODO is this code dead?
+ raise RuntimeError("expected dead code; pipe double indication for bass part munging")
+ assert "dbl" in match["list"]
+ double = parse_instrument(match["list"].removeprefix("Both dbl. "))
+ else:
+ double = []
+ return [
+ cls(
+ name = "{} {}".format(match["instr"], ordinal),
+ required_instruments = [*parse_instrument(match["instr"]), *double],
+ players = 1,
+ _double = False,
+ )
+ for ordinal in ordinals
+ ]
+ else:
+ assert not match["list"] or match["list"].lower() == "divisi"
+ assert not match["rp"]
+ return [cls(
+ name = match["part"],
+ required_instruments = parts_to_instruments(match["instr"]),
+ players = player_count,
+ )]
+ else:
+ return None
+
+ def instruments(self) -> Iterator[Union["Instrument", "Choice"]]:
+ return itertools.chain(self.required_instruments, self.optional_instruments)
+
+ def maybe_convert_down(self) -> "Part":
+ if not isinstance(self, KeyboardPart) and set(self.instruments()) == {Instrument.KEYBOARD}:
+ raise RuntimeError("obsolete")
+ if ordinals := re.findall(r"\d+", self.name):
+ [ordinal] = ordinals
+ else:
+ ordinal = None
+
+ kind = (
+ KeyboardPart.Kind.PRIMARY if ordinal in [None, "1"] and any(
+ keyword in self.name.lower()
+ for keyword in ["piano", "keyboard", "synthesizer"]
+ )
+ else KeyboardPart.Kind.AUXILIARY
+ )
+ return KeyboardPart(
+ **asdict(self),
+ ordinal = int(ordinal) if ordinal else None,
+ )
+ else:
+ return self
+
+
+def deduplicate_keyboard_parts(parts: Iterable[Part]) -> list[Part]:
+ """
+ Keyboard 1, Piano
+ Piano-Conductor, Synthesizer
+ KEYBOARD1 - CONDUCTOR SCORE, KEYBOARD 1 (!)
+ """
+ pc_parts = [
+ part for part in parts
+ if isinstance(part, KeyboardPart) and part.role is AdditionalRole.CONDUCTOR
+ ]
+ if pc_parts:
+ pc_part = max(pc_parts, key = lambda p: len(p.name))
+ if pc_part.ordinal is None:
+ conflicts = [
+ part for part in parts if (
+ isinstance(part, KeyboardPart)
+ and part.role is None
+ and set(part.instrument_names) & {"piano", "keyboard"}
+ and part.ordinal in [None, 1]
+ )
+ ]
+ else:
+ assert set(pc_part.instrument_names) <= {"piano", "keyboard"}
+ conflicts = [
+ part for part in parts if (
+ part is not pc_part and isinstance(part, KeyboardPart)
+ and part.instrument_names == ["keyboard"] and part.ordinal == pc_part.ordinal
+ )
+ ]
+ else:
+ pc_part = None
+ conflicts = []
+
+ return [
+ part.convert_up() if isinstance(part, KeyboardPart) else part
+ for part in parts
+ if (
+ part is pc_part
+ or (isinstance(part, KeyboardPart) and part.role is None and part not in conflicts)
+ or not isinstance(part, KeyboardPart)
+ )
+ ]
+
+
+@dataclass
+class KeyboardPart(Part):
+ Kind = Enum("Kind", ["PRIMARY", "AUXILIARY"])
+
+ # Due to fields in the supertype with defaults these must have defaults, but we enforce that
+ # they're always explicitly specified.
+ instrument_names: list[str] = dataclasses.field(default_factory = throwing_factory)
+ ordinal: Optional[int] = dataclasses.field(default_factory = throwing_factory)
+
+ @classmethod
+ def parse(cls, part_name: str) -> Optional["KeyboardPart"]:
+ patt = (
+ r"(?P<base>keyboard|piano|(string )?synth(esizer)?)(?P<pl>s)? ?(?P<ord>\d)?"
+ + r" ?-? ?(?P<cond>conductor(('s)? score)?)?"
+ + r"(/(aka )?(?P<base2>keyboard|synthesizer) ?(?P<ord2>\d)?)?$"
+ )
+ if m := re.match(patt, part_name.lower()):
+ base = re.sub(r"\bsynth\b", "synthesizer", m["base"])
+
+ if m["ord"] or m["ord2"]:
+ [ordinal] = set(filter(None, [m["ord"], m["ord2"]]))
+ else:
+ ordinal = None
+
+ if m["pl"]:
+ # plural part name suggests >1 player
+ assert not (ordinal or m["cond"] or m["base2"])
+ players = 2
+ else:
+ players = None
+
+ return KeyboardPart(
+ part_name,
+ [Instrument.KEYBOARD],
+ players,
+ role = AdditionalRole.CONDUCTOR if m["cond"] else None,
+ instrument_names = [n for n in [base, m["base2"]] if n],
+ ordinal = int(ordinal) if ordinal else None,
+ )
+ elif m := re.match(r"(organ|piano)[/-](dulcimer|synthesizer|celeste)$", part_name.lower()):
+ return KeyboardPart(
+ part_name,
+ [Instrument.KEYBOARD],
+ instrument_names = [m[1], m[2]],
+ ordinal = None,
+ )
+ else:
+ return None
+
+ def convert_up(self) -> Part:
+ generic_field_names = {field.name for field in dataclasses.fields(Part)}
+ params = {
+ name: value
+ for (name, value) in asdict(self).items()
+ if name in generic_field_names
+ }
+ return Part(**params)
+
+
+@dataclass
+class PartInstruments:
+ # whether these instruments are from a list marked as "doubling"
+ double: bool = False
+
+ required: list[Union["Instrument", "Choice"]] = (
+ dataclasses.field(default_factory = list)
+ )
+ optional: list[Union["Instrument", "Choice"]] = (
+ dataclasses.field(default_factory = list)
+ )
+
+ def merge(self, other: "PartInstruments") -> "PartInstruments":
+ [double] = {self.double, other.double}
+ return PartInstruments(
+ double = double,
+ required = self.required + other.required,
+ optional = self.optional + other.optional,
+ )
+
+
+def parse_instrument_list(list_: str, part_instr: str) -> PartInstruments:
+ def parse_list_part(list_part: str) -> list[Union[Instrument, Choice]]:
+ if m := re.match(r"(?P<a>.+) \(or (?P<b>.+)\)$", list_part, re.IGNORECASE):
+ # This could be an instrument choice, or for percussion a note suggesting a way to
+ # emulate the instrument; in the latter case we ignore the note.
+ [a] = parse_instrument(m["a"], part_instr)
+ try:
+ [b] = parse_instrument(m["b"], part_instr)
+ except ParseError:
+ pass
+ else:
+ return [Choice((a, b))]
+ elif m := re.match(r"(?P<a>.+) DOUBLES (?P<b>.+)$", list_part):
+ return parse_instrument(m["a"], part_instr) + parse_instrument(m["b"], part_instr)
+ return parse_instrument(list_part, part_instr)
+
+ def split_simple_list(simple_list: str) -> Iterator[str]:
+ if re.search(r"\([^)]*$", simple_list):
+ simple_list += ")"
+
+ # split into parenthesized and non-parenthesized parts
+ paren_tok = re.split(r"(?=\()|(?<=\)(?!$))", simple_list)
+
+ # split only unparenthesized parts as instrument lists
+ split = list(itertools.chain.from_iterable(
+ re.split(r" *, (?![^(]*\))| (?:&|and) +(?!.*,)", tok) if tok[0] != "(" else [tok]
+ for tok in paren_tok
+ ))
+
+ # combine parenthesized parts with preceding list elements
+ for (a, b, c) in zip(itertools.chain([None], split), split, itertools.chain(split[1:], [None])):
+ if b and b[0] == "(":
+ assert not a or a[0] != "(" and not c or c[0] != "("
+ yield "".join(filter(None, [a, b, c]))
+ elif (not a or a[0] != "(") and (not c or c[0] != "("):
+ yield b
+
+ def parse_simple_list(simple_list: str) -> PartInstruments:
+ # match a comma not contained within parentheses, or &/and appearing after any commas
+ required = []
+ optional = []
+ for part in split_simple_list(simple_list):
+ deopt_patt = r"(?P<opt>\(?(optional|opt\.)\)? *)?(?P<part>.+?)(?P<opt2> \((optional|opt\.)\))?$"
+ m = re.match(deopt_patt, part.strip(), re.IGNORECASE)
+ if part_instr.lower() not in PERCUSSION_PART_NAMES or m["part"] not in _PERCUSSION_WTF:
+ (optional if m["opt"] or m["opt2"] else required).extend(parse_list_part(m["part"]))
+ return PartInstruments(False, required, optional)
+
+ (dbl, dedoubled) = re.match(r"(dbl\. |doubles |doubling )?(.+)$", list_, re.IGNORECASE).groups()
+
+ if m := re.match(r"(.+)\. (.+) and (.+) are double lined for (.+)$", dedoubled):
+ [denoted, *note_instr_names] = m.groups()
+ else:
+ denoted = dedoubled
+ note_instr_names = []
+
+ simple_lists = denoted.split(" & optional ")
+ if len(simple_lists) == 2:
+ (main, optional) = map(parse_simple_list, simple_lists)
+ assert not main.optional and not optional.optional
+ result = PartInstruments(bool(dbl), main.required, optional.required)
+ else:
+ [simple_list] = simple_lists
+ result = replace(parse_simple_list(simple_list), double = bool(dbl))
+
+ for note_instr_name in note_instr_names:
+ [note_instr] = parse_instrument(note_instr_name)
+ assert note_instr in result.required
+ return result
+
+
+class ParseError(Exception):
+ pass
+
+
+def parse_instrument(text: str, context_instr: Optional[str] = None) -> list["Instrument"]:
+ def to_identifier(name: str) -> str:
+ identifier = re.sub(
+ r"\s|[-']", "_",
+ name.upper().replace(" - ", " ").replace("’", "").replace('"', "")
+ )
+ if identifier[0].isdigit():
+ identifier = "_" + identifier
+ return identifier
+
+ normalized = text.lower()
+ if context_instr and context_instr.lower() == "guitar":
+ normalized = re.sub(r"( +|-)(strat(ocaster)?|gretsch|telecaster)$", "", normalized)
+ normalized = re.sub(r"(?<=\b[a-g])(b|[- ]flat)\b", "â™", normalized)
+ normalized = re.sub(r"\bacous\.", "acoustic", normalized)
+ normalized = re.sub(r"\bbari\b", "baritone", normalized)
+ normalized = re.sub(r"\bcym\b\.?", "cymbal", normalized)
+ normalized = re.sub(r"\belec\.", "electric", normalized)
+ normalized = re.sub(r"\bglock\.", "glockenspiel", normalized)
+ normalized = re.sub(r"\bgtr\.", "guitar", normalized)
+ normalized = re.sub(r"\borch\b", "orchestra", normalized)
+ normalized = re.sub(r"\bsax\b", "saxophone", normalized)
+ normalized = re.sub(r"\bsop\. *", "soprano ", normalized)
+ normalized = re.sub(r"\bstrg\b", "string", normalized)
+ normalized = re.sub(r"\bsus\.", "suspended", normalized)
+ normalized = re.sub(r"\bsynth\b", "synthesizer", normalized)
+ normalized = re.sub(r"\bten\. *", "tenor ", normalized)
+
+ m = re.match(r"(?P<name>.+?)( *\((?P<note>.+)\))?$", normalized)
+ name = m["name"].lower()
+ if context_instr:
+ if context_instr.lower() in PERCUSSION_PART_NAMES: # "2 Timpani"
+ name = re.sub(r"^\d+ ", "", name)
+ name = name.removeprefix("loud ")
+ if name == "bass":
+ assert not m["note"]
+ return [Instrument.BASS_DRUM]
+ elif context_instr.lower() == "guitar":
+ name = name.removeprefix("6-string ")
+
+ @dataclass
+ class Form:
+ value: str
+ using_note: bool = False
+
+ forms = [
+ Form(name),
+ Form(name[:-1] if name.endswith("s") else name + "s"),
+ ]
+ if context_instr:
+ forms.append(Form(name + " " + context_instr))
+ if m["note"]:
+ forms.append(Form(m["note"] + " " + name, True))
+
+ for (i, form) in enumerate(forms):
+ identifier = to_identifier(form.value)
+ parsed = _INSTRUMENT_TYPOS.get(identifier)
+ if not parsed:
+ try:
+ parsed = Instrument[identifier]
+ except KeyError as key_error:
+ if i == 0:
+ orig_err = key_error
+ if parsed is Instrument.DRUM_KIT and m["note"]:
+ return list(itertools.chain.from_iterable(
+ parse_instrument(part, context_instr)
+ for part in re.split(r", | (?:and|&) ", m["note"])
+ ))
+ elif parsed:
+ if m["note"]:
+ note_okay = (
+ form.using_note
+ or m["note"] in _INSTRUMENT_NOTES.get(parsed, [])
+ or re.match(r"\d+(-\d+)?(, \d+(-\d+)?)*$", m["note"])
+ )
+ if not note_okay:
+ raise ValueError(f"Unrecognized note {m['note']!r} for {parsed.name}")
+ return [parsed]
+ raise ParseError(f"Unhandled instrument {text!r}") from orig_err
+
+
+IgnoredPart = Enum("IgnoredPart", ["CONDUCTOR", "LIBRETTO", "REHEARSAL_PIANO", "OTHER"])
+
+
+@dataclass(frozen = True)
+class Choice:
+ """For use where the orchestration indicates any of several instruments is sufficient."""
+ choices: tuple["Instrument", ...]
+
+
+_IGNORED_HEADER_PATTS = [
+ "additional materials?:?",
+ "delivered digitally:",
+ "digital download",
+ "optional keyboard patches/programming:",
+]
+
+
+def _is_ignored_header(line: str) -> bool:
+ return bool(any(
+ re.match(patt + "$", line, re.IGNORECASE)
+ for patt in _IGNORED_HEADER_PATTS
+ ))
+
+
+def _may_be_orchestration_header(line: str) -> bool:
+ return "orchestration" in line.lower() or line.endswith(":")
+
+
+def _ignorable(line: str) -> bool:
+ if line.startswith("Note:") and line.endswith(".") or line == "Logo":
+ return True
+ if _may_be_orchestration_header(line):
+ return False
+
+ return not re.match(_PART_PATTERN, line)
+
+
+def _pop_uninteresting_section(lines: list[str]) -> Optional[list[str]]:
+ if _is_ignored_header(lines[0]):
+ end = next(
+ (
+ i for (i, line) in enumerate(lines[1:], start = 1)
+ if not _ignorable(line) or _is_ignored_header(line)
+ ),
+ len(lines)
+ )
+ assert end <= 4 # at most 2 lines of section text
+ return lines[end:]
+ elif _ignorable(lines[0]):
+ return lines[1:]
+ else:
+ return None
+
+
+# There are special considerations for parsing percussion detail sections because of how diverse the
+# instruments/sounds called for can be.
+PERCUSSION_PART_NAMES = {
+ "drums & percussion",
+ "drums",
+ "drums/percussion",
+ "percussion",
+}
+
+
+# Many parts are named after an instrument, which is an implicit requirement for the part. In
+# contrast, parts with these names don't have a clearly implied instrument, and what instruments are
+# called for is dictated by a separate instrument list.
+PART_NAMES = {
+ *PERCUSSION_PART_NAMES,
+ "bass",
+ "drums",
+ "flexbo",
+ "reed",
+ "woodwind",
+ "woodwinds",
+}
+
+
+Instrument = Enum(
+ "Instrument",
+ [
+ "ACCORDION",
+ "ACME_SIREN",
+ "ACOUSTIC_BASS_GUITAR",
+ "ACOUSTIC_ELECTRIC_GUITAR",
+ "ACOUSTIC_GUITAR",
+ "ALTO_FLUTE",
+ "ALTO_HORN",
+ "ALTO_RECORDER",
+ "ALTO_SAXOPHONE",
+ "ARCHTOP_GUITAR",
+ "A_CLARINET",
+ "BAMBOO_FLUTE",
+ "BANJO",
+ "BARITONE_SAXOPHONE",
+ "BASSOON",
+ "BASS_CLARINET",
+ "BASS_DRUM",
+ "BASS_GUITAR",
+ "BASS_OBOE",
+ "BASS_SAXOPHONE",
+ "BASS_TROMBONE",
+ "BELL_CYMBAL",
+ "BELL_TREE",
+ "BIRD_WHISTLE",
+ "BONGO_DRUMS",
+ "BOWED_CYMBAL",
+ "CABASA",
+ "CASTANETS",
+ "CELLO",
+ "CHINA_CYMBAL",
+ "CLARINET",
+ "CLAVES",
+ "CONCERTINA",
+ "CONGAS",
+ "CORNET",
+ "COWBELL",
+ "CRASH_CYMBAL",
+ "CROTALES",
+ "CYMBALS",
+ "DAF",
+ "DJEMBE",
+ "DRUM_KIT",
+ "DULCIMER",
+ "EGG_SHAKER",
+ "ELECTRIC_GUITAR",
+ "ENGLISH_HORN",
+ "EUPHONIUM",
+ "Eâ™_CLARINET",
+ "FIELD_DRUM",
+ "FINGER_CYMBALS",
+ "FLEXATONE",
+ "FLOOR_TOM",
+ "FLUTE",
+ "FLÃœGELHORN",
+ "FRAME_DRUM",
+ "FRENCH_HORN",
+ "GLOCKENSPIEL",
+ "GOBLET_DRUM",
+ "GONG",
+ "GUITAR", # acoustic/electric unknown
+ "GÃœIRO",
+ "HARMONICA",
+ "HARMONIUM",
+ "HARP",
+ "HI_HAT",
+ "JINGLE_BELLS",
+ "KEYBOARD",
+ "MANDOLIN",
+ "MARACAS",
+ "MARIMBA",
+ "MARK_TREE",
+ "MELODICA",
+ "OBOE",
+ "OBOE_D_AMORE",
+ "PEDAL_STEEL_GUITAR",
+ "PIATTI",
+ "PICCOLO",
+ "PICCOLO_TRUMPET",
+ "RATCHET",
+ "RECORDER", # no obvious default size
+ "RESONATOR_GUITAR",
+ "RIDE_CYMBAL",
+ "SANDPAPER_BLOCKS",
+ "SEMI_ACOUSTIC_GUITAR",
+ "SHIPS_BELL",
+ "SLIDE_WHISTLE",
+ "SLIT_DRUM",
+ "SNARE_DRUM",
+ "SOPRANO_CORNET",
+ "SOPRANO_RECORDER",
+ "SOPRANO_SAXOPHONE",
+ "SPLASH_CYMBAL",
+ "STEELPAN",
+ "SUSPENDED_CYMBAL",
+ "TAIKO",
+ "TAMBOURINE",
+ "TEMPLE_BLOCKS",
+ "TENOR_SAXOPHONE",
+ "TIMPANI",
+ "TOM_DRUM",
+ "TRAIN_WHISTLE",
+ "TRIANGLE",
+ "TROMBONE",
+ "TRUMPET",
+ "TUBA",
+ "TUBULAR_BELLS",
+ "UKELELE",
+ "UPRIGHT_BASS",
+ "VIBRAPHONE",
+ "VIBRASLAP",
+ "VIOLA",
+ "VIOLIN",
+ "WHIP",
+ "WHISTLE",
+ "WIND_CHIMES",
+ "WOOD_BLOCK",
+ "XYLOPHONE",
+ "_12_STRING_ACOUSTIC_GUITAR",
+ "_12_STRING_ELECTRIC_GUITAR",
+ "_1_SHOT_SHAKER",
+ ]
+)
+
+
+_INSTRUMENT_TYPOS = {
+ "ACOUSTIC_BASS": Instrument.UPRIGHT_BASS,
+ "ACOUSTIC_NYLON_STRING_GUITAR": Instrument.ACOUSTIC_GUITAR,
+ "ACOUSTIC_PIANO": Instrument.KEYBOARD,
+ "ACOUSTIC_STEEL_STRING_GUITAR": Instrument.ACOUSTIC_GUITAR,
+ "ACOUSTIC_WITH_AMP_GUITAR": Instrument.ACOUSTIC_ELECTRIC_GUITAR,
+ "AFRICAN_DRUM": Instrument.DJEMBE,
+ "ALTO_RECORDER_IN_F": Instrument.ALTO_RECORDER,
+ "BELLS": Instrument.GLOCKENSPIEL,
+ "BIG_BAMBOO_FLUTE": Instrument.BAMBOO_FLUTE,
+ "BIG_SHAKER": Instrument.EGG_SHAKER,
+ "BONGOS": Instrument.BONGO_DRUMS,
+ "Bâ™_CLARINET": Instrument.CLARINET,
+ "CHIMES": Instrument.TUBULAR_BELLS,
+ "CHINESE_BELL_TREE": Instrument.BELL_TREE,
+ "CL": Instrument.CLARINET, # :(
+ "CLAR": Instrument.CLARINET,
+ "CLAR.": Instrument.CLARINET,
+ "COMEDY_SIREN": Instrument.ACME_SIREN,
+ "CONCERT_BASS_DRUM": Instrument.BASS_DRUM,
+ "COR_ANGLAIS": Instrument.ENGLISH_HORN,
+ "COWBELLS": Instrument.COWBELL,
+ "COW_BELL": Instrument.COWBELL,
+ "C_FLUTE": Instrument.FLUTE,
+ "DARBUKA": Instrument.GOBLET_DRUM,
+ "DISCO_WHISTLE": Instrument.WHISTLE,
+ "DOBRO": Instrument.RESONATOR_GUITAR,
+ "DOUBLE_BASS": Instrument.UPRIGHT_BASS,
+ "DRUMS": Instrument.DRUM_KIT,
+ "DRUM_SET": Instrument.DRUM_KIT,
+ "ELECTRIC_BASS": Instrument.BASS_GUITAR,
+ "ELECTRIC_DROP_D_GUITAR": Instrument.ELECTRIC_GUITAR,
+ "ELECTRIC_PIANO": Instrument.KEYBOARD,
+ "ELECTRIC_SLIDE_GUITAR": Instrument.ELECTRIC_GUITAR,
+ "ELECTRONIC_KIT": Instrument.DRUM_KIT,
+ "ENG._HORN": Instrument.ENGLISH_HORN,
+ "FENDER_BASS": Instrument.BASS_GUITAR,
+ "FENDER_RHODES": Instrument.KEYBOARD,
+ "FIDDLE": Instrument.VIOLIN,
+ "FL": Instrument.FLUTE,
+ "FLEXITONE": Instrument.FLEXATONE,
+ "FLUGELHORN": Instrument.FLÃœGELHORN,
+ "GOURD": Instrument.GÃœIRO,
+ "GRAN_CASSA": Instrument.BASS_DRUM,
+ "GUIRO": Instrument.GÃœIRO,
+ "G_FLUTE": Instrument.ALTO_FLUTE,
+ "HIGH_HAT": Instrument.HI_HAT,
+ "HOLLOW_BODY_GUITAR": Instrument.SEMI_ACOUSTIC_GUITAR,
+ "HORN": Instrument.FRENCH_HORN,
+ "ICE_BELL": Instrument.BELL_CYMBAL,
+ "KIT": Instrument.DRUM_KIT,
+ "LES_PAUL_GUITAR": Instrument.ELECTRIC_GUITAR,
+ "MARCHING_SNARE": Instrument.SNARE_DRUM,
+ "MOUTH_SIREN": Instrument.ACME_SIREN,
+ "NON_WESTERN_CHIME": Instrument.TUBULAR_BELLS,
+ "NYLON_ACOUSTIC_GUITAR": Instrument.ACOUSTIC_GUITAR,
+ "NYLON_STRING_ACOUSTIC_GUITAR": Instrument.ACOUSTIC_GUITAR,
+ "NYLON_STRING_GUITAR": Instrument.ACOUSTIC_GUITAR,
+ "ORCHESTRA_BELLS": Instrument.GLOCKENSPIEL,
+ "ORGAN": Instrument.KEYBOARD,
+ "PC": Instrument.PICCOLO, # yikes
+ "PIANO": Instrument.KEYBOARD,
+ "PICC.": Instrument.PICCOLO,
+ "PICCOLO_SNARE": Instrument.SNARE_DRUM,
+ "PIZZA_DRUM": Instrument.BASS_DRUM,
+ "RACHET": Instrument.RATCHET,
+ "RIQ": Instrument.DAF,
+ "SANDPAPER": Instrument.SANDPAPER_BLOCKS,
+ "SCRAPER": Instrument.GÃœIRO,
+ "SHAKER": Instrument.EGG_SHAKER,
+ "SIREN": Instrument.ACME_SIREN,
+ "SIREN_WHISTLE": Instrument.ACME_SIREN,
+ "SLAPSTICK": Instrument.WHIP,
+ "SLEIGHBELLS": Instrument.JINGLE_BELLS,
+ "SLEIGH_BELLS": Instrument.JINGLE_BELLS,
+ "SMALL_&_LARGE_TRIANGLE": Instrument.TRIANGLE,
+ "SMALL_BAMBOO_FLUTE": Instrument.BAMBOO_FLUTE,
+ "SMALL_SHAKER": Instrument.EGG_SHAKER,
+ "SMALL_TOM": Instrument.TOM_DRUM,
+ "SMALL_TRIANGLE": Instrument.TRIANGLE,
+ "SMALL_WOOD_BLOCK": Instrument.WOOD_BLOCK,
+ "SNARE": Instrument.SNARE_DRUM,
+ "SOCK_CYMBAL": Instrument.HI_HAT,
+ "SOLID_BODY_ELECTRIC_GUITAR": Instrument.ELECTRIC_GUITAR,
+ "STEEL_ACOUSTIC_GUITAR": Instrument.ACOUSTIC_GUITAR,
+ "STEEL_DRUMS": Instrument.STEELPAN,
+ "STEEL_STRING_ACOUSTIC": Instrument.ACOUSTIC_GUITAR,
+ "STEEL_STRING_GUITAR": Instrument.ACOUSTIC_GUITAR,
+ "STRATOCASTER": Instrument.ELECTRIC_GUITAR,
+ "STRING_SYNTHESIZER": Instrument.KEYBOARD,
+ "SUSPENDED_CYMBAL": Instrument.CYMBALS,
+ "SYNTHESIZER": Instrument.KEYBOARD,
+ "TAM_TAM": Instrument.SLIT_DRUM,
+ "TELECASTER": Instrument.ELECTRIC_GUITAR,
+ "TENOR_HORN": Instrument.ALTO_HORN,
+ "TENOR_TROMBONE": Instrument.TROMBONE,
+ "TIC_TOC_BLOCKS": Instrument.WOOD_BLOCK,
+ "TOMS": Instrument.TOM_DRUM,
+ "TOM_TOMS": Instrument.TOM_DRUM,
+ "TRAP_SET": Instrument.DRUM_KIT,
+ "TWELVE_STRING_GUITAR": Instrument._12_STRING_ACOUSTIC_GUITAR,
+ "TYMPANI": Instrument.TIMPANI,
+ "T_BLOX": Instrument.TEMPLE_BLOCKS,
+ "VIBES": Instrument.VIBRAPHONE,
+ "VIOLONCELLO": Instrument.CELLO,
+ "WOODBLOCK": Instrument.WOOD_BLOCK,
+ "_5_STRING_BASS": Instrument.BASS_GUITAR,
+ "_5_STRING_ELECTRIC_BASS": Instrument.BASS_GUITAR,
+ "_5_STRING_FRETLESS_BASS": Instrument.BASS_GUITAR,
+}
+
+
+STRING_INSTRUMENTS = {
+ Instrument.VIOLIN,
+ Instrument.VIOLA,
+ Instrument.CELLO,
+ Instrument.UPRIGHT_BASS,
+}
+
+
+# instruments that could be implied by a "Bass" part
+BASS_INSTRUMENTS = {
+ Instrument.ACOUSTIC_BASS_GUITAR,
+ Instrument.BASS_GUITAR,
+ Instrument.UPRIGHT_BASS,
+}
+
+
+_PERCUSSION_WTF = {
+ "ANVIL",
+ "Brushes",
+ "Drum Sticks",
+ "Electronic Drum Pad",
+ "FOOT ON HARDWOOD",
+ "Hand Drums",
+ "Handclap",
+ "Huge Electronic Hits", # ??
+ "Industrial Sounds",
+ "METALLIC PERCUSSION",
+ "Mallet KAT",
+ "OCTOPAD-STADIUM HITS",
+ "Oriental Drum (deep)",
+ "PAD",
+ "POP GUN",
+ "Pop Cork Gun (or similar)",
+ "Popgun",
+ "Rubber Udders",
+ "SWIZZLE",
+ "Storm Drums",
+ "WHIZZER WHISTLE",
+ '"Noisy Things" (Clanky Noisemakers)',
+}
+
+
+_PART_NOTES = {
+ "played by actors, if possible, for certain numbers only",
+}
+
+
+_INSTRUMENT_NOTES = {
+ Instrument.BAMBOO_FLUTE: {
+ "f",
+ "g",
+ },
+ Instrument.BASS_DRUM: {
+ "large drum with ominous, bass drum quality",
+ },
+ Instrument.COWBELL: {
+ "high, medium, low",
+ },
+ Instrument.CYMBALS: {
+ "various suspended, splash, chip, ride",
+ },
+ Instrument.FIELD_DRUM: {
+ "or snare drum w/o snares",
+ },
+ Instrument.GLOCKENSPIEL: {
+ "bells",
+ },
+ Instrument.TEMPLE_BLOCKS: {
+ "3 pitches",
+ "5 pitches",
+ },
+ Instrument.TOM_DRUM: {
+ "3",
+ },
+ Instrument.TUBULAR_BELLS: {
+ "bâ™, eâ™",
+ "e",
+ },
+ Instrument.WHIP: {
+ "whip",
+ },
+ Instrument.WOOD_BLOCK: {
+ "2 pitches",
+ "hi & low",
+ "high and low",
+ },
+}
+
+
+AdditionalRole = Enum("AdditionalRole", ["CONDUCTOR"])