Indentation consistency (one tab in the MusicXML file, two spaces for code)
[pae2xml.git] / pae2xml.pl
blob52a223316ca04badc28afc65f494ba27ea76fa0d
1 #!/usr/bin/perl
4 # Copyright (C) 2003 Rainer Typke
5 #pae2xml is licensed under the terms of the GNU General Public License Version
6 #2 as published by the <a href="http://www.fsf.org/" target="_top">Free Software Foundation</a>.
7 #This gives you legal permission to copy, distribute and/or modify <em>pae2xml</em> under
8 #certain conditions. Read
9 #the <a href="http://www.gnu.org/copyleft/gpl.html" target="_top">online version of the license</a>
10 #for more details. pae2xml is provided AS IS with NO WARRANTY OF ANY KIND,
11 #INCLUDING THE WARRANTY OF DESIGN, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
15 $divisions = 960;
16 $old_duration = $divisions;
17 $old_octave = 4;
19 ($mday, $mon, $year) = (localtime()) [3..5];
20 $encoding_date = sprintf("%4d-%02d-%02d", $year + 1900, $mon+1, $mday);
22 $TIE = 0;
24 foreach $a (@ARGV) {
25 $p = read_file($a);
26 $toprint = "";
27 $p =~ s/\s*\=\=+\s*(.*?)\s*\=\=+\s*/$1/sg;
28 $p =~ s/\s*included.*?-------------*\s*(.*?)\s*/$1/s;
30 ($q, $r) = ($p, $p);
31 if ($q !~ /^.*1\.1\.1.*$/gsx && $r =~ /^.*plain.*$/gsx) {
32 print_error("$a contains 'plain', but not 1.1.1!\n");
33 } else {
34 if ($p =~ /^\s*([^\n]+)\n(.*?)\n((\d+\.\d+\.\d.*?plain.*?\n)+)(.*?)\n?([^\n]+)\n([^\n]+)\s*$/gs) {
35 my ($comp, $title, $incipits, $sonst, $libsig, $rismsig) = ($1, $2, $3, $5, $6, $7);
37 $toprint .= "
38 COMPOSER: $comp
39 TITLE: $title
40 INCIPIT(S): $incipits
41 OTHER INFO: $sonst
42 LIB. SIGN.: $libsig
43 RISM SIGN.: $rismsig\n\n";
44 parse_incipits($incipits, $comp, $title, $sonst, $libsig, $rismsig);
46 else {
47 if (index($p,"plain&easy") > -1) {
48 print_error("Ignoring the following text:\n\n\n$p\n\n\n");
56 sub parse_incipits {
57 my ($incipits, $comp, $title, $sonst, $libsig, $rismsig) = @_;
59 $toprint .= "parsing: $incipits\n";
61 while ($incipits =~ /^(\d+\.\d+\..+?)(\d+\.\d+\..*)$/gs) {
62 my ($inc1) = $1;
63 $incipits = $2;
64 parse_pe($inc1, $comp, $title, $sonst, $libsig, $rismsig);
66 parse_pe($incipits, $comp, $title, $sonst, $libsig, $rismsig);
69 sub parse_pe {
70 my ($pe, $comp, $title, $sonst, $libsig, $rismsig) = @_;
72 $pe =~ s/@ü/@0ü/gs; # make missing time signature explicit
73 while ($pe =~ s/([^\-])(\d+)(\'|\,)(A|B|C|D|E|F|G)/$1$3$2$4/gs) {}; # octave first, then duration. Truly global.
75 if ($pe =~ /^\s*(\d+\.\d+\.\d)(\.|:)\s*(.*?)\nplain&easy:\s*(%([\w\-\d]+))?(@([\d\w\/]+))?\s*&?\s*(\$([^ü]+))(.*)$/gs) {
76 my ($inr, $instr, $clef, $timesig, $keysig, $rest) = ($1, $3, $5, $7, $9, $10);
78 my $filename="$rismsig-$inr.xml";
79 $filename =~ s/RISM\s*A\/II\s*:?\s*//gs;
80 print "Writing $filename...\n";
82 open(OUT, ">$filename");
84 if ($clef =~ /^(\w)\-(\d)$/) {
85 ($clefsign, $clefline) = ($1, $2);
86 } else {
87 ($clefsign, $clefline) = ("G", 2);
90 $timesig = timesignature($timesig);
92 my %fif=("", 0, "xF", 1, "xFC", 2, "xFCG",3, "xFCGD",4, "xFCGDA",5, "xFCGDAE",6, "xFCGDAEB",7, "bB",-1, "bBE",-2, "bBEA",-3, "bBEAD",-4, "bBEADG",-5, "bBEADGC",-6, "bBEADGCF",-7);
93 $keysig =~ s/(\s+)|&//gs; # it is unclear what the & means, so we'll ignore it for now.
94 $keysig =~ s/\[|\]//gs; # IGNORING brackets around a key sig.
95 $fifths = $fif{$keysig};
96 if ($fifths eq "") {
97 $fifths = "0";
98 print_error("Strange key signature '$keysig'.\n");
101 foreach $_ ($rismsig,$title,$inr,$instr,$comp,$encoding_date,$libsig,$sonst)
106 print OUT '<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
107 <!DOCTYPE score-partwise PUBLIC "-//Recordare//DTD MusicXML 0.6 Partwise//EN" "file:/c:/Program Files/MusicXML/partwise.dtd">
108 <score-partwise>
109 <work>
110 <work-number>'.$rismsig.'</work-number>
111 <work-title>'.$title.'</work-title>
112 </work>
113 <movement-number>'.$inr.'</movement-number>
114 <movement-title>'.$instr.'</movement-title>
115 <identification>
116 <creator type="composer">'.$comp.'</creator>
117 <encoding>
118 <software>pae2xml by R. Typke</software>
119 <encoding-date>'.$encoding_date.'</encoding-date>
120 </encoding>
121 <source>'.$libsig.'</source>
122 </identification>
123 <part-list>
124 <score-part id="P1">
125 <part-name>'.$sonst.'</part-name>
126 </score-part>
127 </part-list>
128 <part id="P1">
129 <measure number="1">
130 <attributes>
131 <divisions>'.$divisions.'</divisions>
132 <key>
133 <fifths>'.$fifths.'</fifths>
134 </key>
135 '.$timesig
136 .' <clef>
137 <sign>'.$clefsign.'</sign>
138 <line>'.$clefline.'</line>
139 </clef>
140 </attributes>
144 $toprint .= "
145 INCIPIT NO.: $inr
146 INSTR.: $instr
147 CLEF: $clef
148 KEY SIG.: $keysig
149 TIME SIG.: $timesig
150 REST: $rest\n";
151 parse_notes($rest, $keysig);
153 else { print_error("could not parse $pe\n"); }
154 print OUT " </part>
155 </score-partwise>\n";
156 close OUT;
159 # Repeat $1 by a count of $2
160 sub repeat {
161 (my $e, my $count) = @_;
162 my $res = "";
163 for (my $i=1; $i <= $count; ++$i)
165 $res .= $e;
167 return $res;
170 sub parse_notes {
171 my ($notes, $keysig) = @_;
172 my $qq = 0; # in group of cue notes
174 my $meas = 2; # measure number
175 my $mopen = 1; # measure tag still open
177 if ($notes =~ /^\s*(.*?)\s*$/) {
178 $notes = $1;
181 $notes =~ s/!([^!]*)!(f*)/repeat($1, length($2)+1)/gse; # write out repetitions
182 $notes =~ s/\{([^\}]*)\}/$1/gs; # ignore beamings
183 while ( $notes =~ s/(:?\/+:?|^)([^\/:]*)(:?\/+:?)i(:?\/+:?)/$1$2$3$2$4/gs) {}; # replace whole-measure repeats (i notation)
185 $notes =~ s/(\d+)\(([^;]+\))/\($1$2/gs; # pull note lengths into fermatas or triplets
186 $notes =~ s/(xx|x|bb|b|n)\(/\($1/gs; # pull accidentals into tuplets or fermatas:
187 $notes =~ s/(\d+)(xx|x|bb|b|n)(A|B|C|D|E|F|G)/$2$1$3/gs; # accidentals first, then duration
189 # $notes =~ s/x\(/\(x/gs; # pull accidentals into tuplets or fermatas
190 # $notes =~ s/bb\(/\(bb/gs; # pull accidentals into tuplets or fermatas
191 # $notes =~ s/b\(/\(b/gs; # pull accidentals into tuplets or fermatas
192 # $notes =~ s/n\(/\(n/gs; # pull accidentals into tuplets or fermatas
193 # $notes =~ s/(\'+|\,+)\(/\($1/g; # pull octave marks into tuplets or fermatas
195 $notes =~ s/(\.|\d|\,|\')qq/qq$1/gs; # pull beginning mark of group of grace notes in front of corresponding notes
196 $notes =~ s/(xx|x|bb|b|n)qq/qq$1/gs; # qq first, then parts of notes
198 while ($notes ne "") {
199 if ($notes =~ /^(\'+|\,+)(.*)$/) { # Octave marks
200 ($oct, $notes) = ($1, $2);
201 octave($oct);
202 } elsif ($notes =~ /^qq(.*)$/) { # Begin grace
203 $notes = $1;
204 $qq = 1;
205 } elsif ($notes =~ /^r(.*)$/) { # End grace
206 $notes = $1;
207 $qq = 0;
208 } elsif ($notes =~ /^\=(\d*)(\/.*)$/) { # multi-measure rests
209 $measrest = $1;
210 $notes = $2;
211 if ($measrest eq '') {
212 $measrest = 1;
214 $toprint .= "$measrest measures of rest.\n";
215 if ($measrest > 0) {
216 # Create a real multi-bar rest
217 print OUT ' <attributes>
218 <measure-style>
219 <multiple-rest>'.$measrest.'</multiple-rest>
220 </measure-style>
221 </attributes>
224 # Now create the measures
225 for $n (1..$measrest) {
226 print OUT ' <note>
227 <rest />
228 <duration>'.($beats*$divisions*4/$beattype).'</duration>
229 </note>
231 if ($n < $measrest) {
232 print OUT " </measure>\n";
233 if ($notes ne "") {
234 print OUT ' <measure number="'.$meas.'">
236 $meas++;
237 } else {
238 $mopen = 0;
242 } elsif ($notes =~ /^((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.*)$/) { # a note
243 ($note, $notes) = ($1,$6);
244 parse_note($note, $keysig, "", "", $qq);
245 } elsif ($notes =~ /^(\((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?\))(.*)$/) { # one note with a fermata
246 ($note, $notes) = ($1,$6);
247 parse_note($note, $keysig, "", "", $qq);
248 } elsif ($notes =~ /^(\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?){3}\))(.*)$/) { # a triplet
249 ($triplet, $notes) = ($1,$7);
250 # print "TRIPLET: ".$triplet." -> ";
251 $triplet =~ /^\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.*)\)$/gs;
252 ($note, $triplet) = ($1,$6);
253 #print "$note $triplet\n";
254 parse_note($note, $keysig, '<tuplet type="start"/>', ' <time-modification>
255 <actual-notes>3</actual-notes>
256 <normal-notes>2</normal-notes>
257 </time-modification>', $qq);
258 $triplet =~ /^((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.*)$/gs;
259 ($note, $triplet) = ($1,$6);
260 #print "$note $triplet\n";
261 parse_note($note, $keysig, '', ' <time-modification>
262 <actual-notes>3</actual-notes>
263 <normal-notes>2</normal-notes>
264 </time-modification>', $qq);
265 parse_note($triplet, $keysig, '<tuplet type="stop"/>', ' <time-modification>
266 <actual-notes>3</actual-notes>
267 <normal-notes>2</normal-notes>
268 </time-modification>', $qq);
269 } elsif ($notes =~ /^((\d+)\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)+\;(\d+)\))(.*)$/) { # an n-tuplet
270 ($tuplet, $notes) = ($1,$9);
271 # print "N-TUPLET: ".$tuplet." -> ";
272 $tuplet =~ /^(\d+)\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.*);(\d)\)$/gs;
273 ($combdur, $note, $tuplet, $numval) = ($1,$2,$7,$8);
274 #print "i=$combdur, n=$numval; $note / $tuplet\n";
275 my $ind_dur = duration($combdur)/$numval;
276 # my $norm_notes =
277 my $act_notes = $numval;
278 parse_note($note, $keysig, '<tuplet type="start"/>', ' <time-modification>
279 <actual-notes>'.$act_notes.'</actual-notes>
280 <normal-notes>1</normal-notes>
281 </time-modification>', $qq);
282 while ($tuplet =~ /^((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)(.+)$/gs) {
283 ($note, $tuplet) = ($1,$6);
284 #print "$note / $tuplet\n";
285 parse_note($note, $keysig, '', ' <time-modification>
286 <actual-notes>'.$act_notes.'</actual-notes>
287 <normal-notes>1</normal-notes>
288 </time-modification>', $qq);
290 parse_note($tuplet, $keysig, '<tuplet type="stop"/>', ' <time-modification>
291 <actual-notes>'.$act_notes.'</actual-notes>
292 <normal-notes>1</normal-notes>
293 </time-modification>', $qq);
294 } elsif ($notes =~ /^(%\w-\d)(.*)$/) { # Clef change
295 ($clef,$notes) = ($1,$2);
296 $clef =~ /^%(\w)\-(\d)$/;
297 ($clefsign, $clefline) = ($1, $2);
298 print OUT ' <attributes>
299 <clef>
300 <sign>'.$clefsign.'</sign>
301 <line>'.$clefline.'</line>
302 </clef>
303 </attributes>
305 } elsif ($notes =~ /^@(\d\/\d|c\/?)\s*(.*)$/) { # time signatue change
306 # print "$notes\n";
307 ($timesig,$notes) = ($1,$2);
308 #print "-> $timesig / $notes\n"; exit;
309 $timesig = timesignature($timesig);
310 print OUT " <attributes>\n$timesig
311 </attributes>\n";
312 } elsif ($notes =~ /^(:?\/+:?)(.*)$/) { # Barline (and repeats)
313 $barline = $1;
314 $notes = $2;
315 if ($barline =~ /^:\/\/:/) {
316 print OUT ' <barline location="right">
317 <bar-style>light-light</bar-style>
318 <repeat direction="backward"/>
319 </barline>
321 } elsif ($barline =~ /^:\/\/$/ ) {
322 print OUT ' <barline location="right">
323 <bar-style>light-heavy</bar-style>
324 <repeat direction="backward"/>
325 </barline>
327 } elsif ($barline =~ /^\/\/$/) {
328 print OUT ' <barline location="right">
329 <bar-style>light-light</bar-style>
330 <repeat direction="backward"/>
331 </barline>
334 print OUT " </measure>\n";
335 if ($notes ne "") {
336 print OUT ' <measure number="'.$meas.'">
338 if ($barline =~ /^\/\/:$/) {
339 print OUT ' <barline location="left">
340 <bar-style>heavy-light</bar-style>
341 <repeat direction="forward"/>
342 </barline>
344 } elsif ($barline =~ /^:\/\/:$/) {
345 print OUT ' <barline location="left">
346 <repeat direction="forward"/>
347 </barline>
350 print OUT $clefattr;
351 $meas++;
352 } else {
353 $mopen = 0;
355 $toprint .= "bar line\n";
356 } #elsif ($notes =~ /^(\d*\.*\-)(.*)$/) {
357 #($rst, $notes) = ($1, $2);
358 #$toprint .= "rest: $rst\n";
359 #$rst =~ /^(\d*)(\.*)\-$/;
360 #($rst, $dots) =($1,$2);
361 #print OUT ' <note>
362 # <rest />
363 # <duration>'.duration($rst, $dots).'</duration>
364 #'.# <type>quarter</type>
366 # </note>
368 elsif ($notes =~ /^\((\=)\)(.*)$/) { # a bar of rest with a fermata
369 ($rst, $notes) = ($1, $2);
370 $toprint .= "rest: $rst\n";
371 print OUT ' <note>
372 <rest />
373 <duration>'.($beats*$divisions*4/$beattype).'</duration>
374 <notations>
375 <fermata type="upright"/>
376 </notations>
377 </note>
380 elsif ($notes =~ s/(\d+\.*)\(((\,|\')*(x|xx|b|bb|n)?\d*\.*(g|q)?(\-|A|B|C|D|E|F|G)t?\+?)\)/\($1$2\)/gs) { # pull duration into fermata parentheses
381 # print "after replacement: $notes\n"; exit;
383 elsif ($notes =~ /^ +(.*)$/) {
384 $notes = $1;
385 print("Invalid space encountered in notes before $notes\n");
387 else {
388 print_error("got stuck with $notes\n");
389 $notes = "";
392 if ($mopen) {
393 print OUT " </measure>\n";
398 sub parse_note {
399 my($note, $keysig, $notation, $addition, $in_qq_group) = @_;
401 my ($fermata) = (0);
402 my ($actualnotes, $normalnotes) = (1,1);
404 if ($addition =~ /^\s*<time-modification>\s*<actual-notes>\s*(\d+)\s*<\/actual-notes>\s*<normal-notes>\s*(\d+)\s*<\/normal-notes>\s*<\/time-modification>\s*$/) {
405 ($actualnotes, $normalnotes) = ($1, $2);
408 if ($note =~ /^\((.*)\)$/) {
409 $note = $1;
410 $fermata = 1;
413 $note =~ /^((\,|\')*)(x|xx|b|bb|n)?(\d*)(\.*)(g|q)?(\-|A|B|C|D|E|F|G)(t?)(\+?)$/;
414 my ($oct, $acc, $dur, $dot, $gracecue, $pitch, $trill, $tie) = ($1, $3, $4, $5, $6, $7, $8, $9);
416 print OUT ' <note>
418 if ($gracecue eq "g") {
419 print OUT ' <grace steal-time-following="33"/>
422 if ($gracecue eq "q" || $in_qq_group) {
423 print OUT ' <cue/>
426 if ($pitch eq "-") {
427 print OUT " <rest />\n";
428 } else {
429 print OUT ' <pitch>
430 <step>'.$pitch.'</step>
431 '.alter($pitch, $acc, $keysig)
432 .' <octave>'.octave($oct).'</octave>
433 </pitch>
436 if ($gracecue ne "g") {
437 print OUT ' <duration>'.(duration($dur, $dot)*$normalnotes/$actualnotes).'</duration>
441 if ($tie eq "+") {
442 if (!$TIE) {
443 $TIE = 1;
444 print OUT ' <tie type="start"/>
447 } else {
448 if ($TIE) {
449 print OUT ' <tie type="stop"/>
451 $TIE = 0;
455 print OUT $addition;
457 my $notationbracket = $fermata || ($trill eq "t") || ($notation ne "");
458 if ($notationbracket) {
459 print OUT " <notations>\n";
461 if ($fermata) { print OUT '
462 <fermata type="upright"/>'."\n"; }
463 if ($trill eq "t") { print OUT ' <ornaments>
464 <trill-mark/>
465 </ornaments>
468 if ($notation ne "") {
469 print OUT " $notation\n";
471 if ($notationbracket) {
472 print OUT " </notations>\n";
475 print OUT ' </note>
478 $toprint .= "note: oct. $oct/acc. $acc/dur. $dur/dots $dot/grace,cue $gracecue/pitch $pitch\n";
481 sub alter {
482 my ($pitch, $acc, $keysig) = @_;
484 my $alt = 0;
486 if (index ($keysig,$pitch) > -1) {
487 $keysig =~ /^(.).*$/gs;
488 if ($1 eq 'x') {
489 $alt = 1;
490 } else {$alt = -1;}
493 my %acc_alt = ("n", 0, "b", -1, "bb", -2, "x", 1, "xx", 2);
494 if ($acc_alt{$acc} ne "") {
495 $alt = $acc_alt{$acc};
498 if ($alt != 0) {
499 return "\t\t\t\t\t<alter>$alt</alter>\n";
501 return "";
504 sub duration {
505 my ($duration, $dots) = @_;
507 if ($duration.$dots ne "") {
508 my %du=("1",4*$divisions,"2",2*$divisions,"4",$divisions,
509 "8",$divisions/2,"6",$divisions/4,"3",$divisions/8,
510 "5",$divisions/16,"7",$divisions/32,
511 "9",$divisions*8,"0",$divisions*16); # breve/long
512 $old_duration = $du{$duration};
513 if ($old_duration eq "") {
514 print_error("strange duration '$duration'\n");
516 my $add = $old_duration;
517 while ($dots ne "") {
518 $add /= 2;
519 $old_duration += $add;
520 $dots =~ /^.(.*)$/gs;
521 $dots = $1;
524 return $old_duration;
527 sub octave {
528 my ($octave) = @_;
530 if ($octave ne "") {
531 $octave =~ /^(.)(.*)$/gs;
532 if ($1 eq ",") {
533 $old_octave = 4 - length $octave;
534 } else {
535 $old_octave = 3 + length $octave;
538 return $old_octave;
541 sub timesignature {
542 my ($timesig) = @_;
544 if ($timesig eq "c3") {
545 $timesig = "3/2"; # it would be better to display it as "C". Example: 451.023.814
547 if ($timesig =~ /^c(\d+)\/(\d+)$/gs) {
548 $timesig = "$1/$2"; # it would be better to show the "C"
551 if ($timesig eq "0" || $timesig eq "") { # unclear how to handle absence of time signature.
552 $timesig =' <time symbol="common">
553 <beats>4</beats>
554 <beat-type>4</beat-type>
555 </time>
556 '; # using 4/4 for now.
557 ($beats, $beattype) = (4,4);
558 } elsif ($timesig =~ /^c(\/?)$/gi) {
559 if ($1 eq "/") {
560 $timesig = ' <time symbol="cut">
561 <beats>2</beats>
562 <beat-type>2</beat-type>
563 </time>
565 ($beats, $beattype) = (2,2);
566 } else {
567 $timesig = ' <time symbol="common">
568 <beats>4</beats>
569 <beat-type>4</beat-type>
570 </time>
572 ($beats, $beattype) = (4,4);
574 } elsif ($timesig =~ /^(\d+)\/(\d+)$/gs) {
575 ($beats, $beattype) = ($1, $2);
576 $timesig = ' <time>
577 <beats>'.$beats.'</beats>
578 <beat-type>'.$beattype.'</beat-type>
579 </time>
581 } else {
582 print_error("Time signature '$timesig' looks strange.\n");
583 # $timesig = ""; we assume 4/4 just to get something legible:
584 ($beats, $beattype) = (4,4);
585 $timesig = ' <time>
586 <beats>'.$beats.'</beats>
587 <beat-type>'.$beattype.'</beat-type>
588 </time>
591 return $timesig;
594 sub print_error {
595 my ($msg) = @_;
597 print "\nAn error occurred; context:\n\n$toprint\n
598 Error: $msg\n";
601 sub read_file {
602 my ($fn) = @_;
603 my $res = "";
604 if ($fn eq "-") {
605 while (<STDIN>) { $res .= $_; } # read all lines
606 } else {
607 if (!(open FH, $fn)) {
608 return "";
610 while (<FH>) { $res .= $_; } # read all lines
611 close (FH);
613 return $res;