Bumping manifests a=b2g-bump
[gecko.git] / layout / mathml / updateOperatorDictionary.pl
blobff9344cc83a75de1d1f0e56fd069076b431acece
1 #!/usr/bin/perl
2 # -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*-
3 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 use XML::LibXSLT;
8 use XML::LibXML;
9 use LWP::Simple;
11 # output files
12 $FILE_UNICODE = "unicode.xml";
13 $FILE_DICTIONARY = "dictionary.xml";
14 $FILE_DIFFERENCES = "differences.txt";
15 $FILE_NEW_DICTIONARY = "new_dictionary.txt";
16 $FILE_SYNTAX_ERRORS = "syntax_errors.txt";
17 $FILE_JS = "tests/stretchy-and-large-operators.js";
19 # our dictionary (property file)
20 $MOZ_DICTIONARY = "mathfont.properties";
22 # dictionary provided by the W3C in "XML Entity Definitions for Characters"
23 $WG_DICTIONARY_URL = "http://www.w3.org/2003/entities/2007xml/unicode.xml";
25 # XSL stylesheet to extract relevant data from the dictionary
26 $DICTIONARY_XSL = "operatorDictionary.xsl";
28 # dictionary provided by the W3C transformed with operatorDictionary.xsl
29 $WG_DICTIONARY = $FILE_DICTIONARY;
31 if (!($#ARGV >= 0 &&
32 ((($ARGV[0] eq "download") && $#ARGV <= 1) ||
33 (($ARGV[0] eq "compare") && $#ARGV <= 1) ||
34 (($ARGV[0] eq "check") && $#ARGV <= 0) ||
35 (($ARGV[0] eq "make-js") && $#ARGV <= 0) ||
36 (($ARGV[0] eq "clean") && $#ARGV <= 0)))) {
37 &usage;
40 if ($ARGV[0] eq "download") {
41 if ($#ARGV == 1) {
42 $WG_DICTIONARY_URL = $ARGV[1];
44 print "Downloading $WG_DICTIONARY_URL...\n";
45 getstore($WG_DICTIONARY_URL, $FILE_UNICODE);
47 print "Converting $FILE_UNICODE into $FILE_DICTIONARY...\n";
48 my $xslt = XML::LibXSLT->new();
49 my $source = XML::LibXML->load_xml(location => $FILE_UNICODE);
50 my $style_doc = XML::LibXML->load_xml(location => $DICTIONARY_XSL,
51 no_cdata=>1);
52 my $stylesheet = $xslt->parse_stylesheet($style_doc);
53 my $results = $stylesheet->transform($source);
54 open($file, ">$FILE_DICTIONARY") || die ("Couldn't open $FILE_DICTIONARY!");
55 print $file $stylesheet->output_as_bytes($results);
56 close($file);
57 exit 0;
60 if ($ARGV[0] eq "clean") {
61 unlink($FILE_UNICODE,
62 $FILE_DICTIONARY,
63 $FILE_DIFFERENCES,
64 $FILE_NEW_DICTIONARY,
65 $FILE_SYNTAX_ERRORS);
66 exit 0;
69 if ($ARGV[0] eq "compare" && $#ARGV == 1) {
70 $WG_DICTIONARY = $ARGV[1];
73 ################################################################################
74 # structure of the dictionary used by this script:
75 # - key: same as in mathfont.properties
76 # - table:
77 # index | value
78 # 0 | description
79 # 1 | lspace
80 # 2 | rspace
81 # 3 | minsize
82 # 4 | largeop
83 # 5 | movablelimits
84 # 6 | stretchy
85 # 7 | separator
86 # 8 | accent
87 # 9 | fence
88 # 10 | symmetric
89 # 11 | priority
90 # 12 | linebreakstyle
91 # 13 | direction
92 # 14 | integral
93 # 15 | mirrorable
95 # 1) build %moz_hash from $MOZ_DICTIONARY
97 print "loading $MOZ_DICTIONARY...\n";
98 open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!");
100 print "building dictionary...\n";
101 while (<$file>) {
102 next unless (m/^operator\.(.*)$/);
103 (m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/);
105 # 1.1) build the key
106 $key = $1;
108 # 1.2) build the array
109 $_ = $2;
110 @value = ();
111 $value[0] = $3;
112 if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; }
113 if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; }
114 if (m/^(.*)minsize:(\d)(.*)$/) { $value[3] = $2; } else { $value[3] = "1"; }
115 $value[4] = (m/^(.*)largeop(.*)$/);
116 $value[5] = (m/^(.*)movablelimits(.*)$/);
117 $value[6] = (m/^(.*)stretchy(.*)$/);
118 $value[7] = (m/^(.*)separator(.*)$/);
119 $value[8] = (m/^(.*)accent(.*)$/);
120 $value[9] = (m/^(.*)fence(.*)$/);
121 $value[10] = (m/^(.*)symmetric(.*)$/);
122 $value[11] = ""; # we don't store "priority" in our dictionary
123 $value[12] = ""; # we don't store "linebreakstyle" in our dictionary
124 if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; }
125 else { $value[13] = ""; }
126 $value[14] = (m/^(.*)integral(.*)$/);
127 $value[15] = (m/^(.*)mirrorable(.*)$/);
129 # 1.3) save the key and value
130 $moz_hash{$key} = [ @value ];
133 close($file);
135 ################################################################################
136 # 2) If mode "make-js", generate tests/stretchy-and-large-operators.js and quit.
137 # If mode "check", verify validity of our operator dictionary and quit.
138 # If mode "compare", go to step 3)
140 if ($ARGV[0] eq "make-js") {
141 print "generating file $FILE_JS...\n";
142 open($file_js, ">$FILE_JS") ||
143 die ("Couldn't open $FILE_JS!");
144 print $file_js "// This file is automatically generated. Do not edit.\n";
145 print $file_js "var stretchy_and_large_operators = [";
146 @moz_keys = (keys %moz_hash);
147 while ($key = pop(@moz_keys)) {
148 @moz = @{ $moz_hash{$key} };
150 $_ = $key;
151 (m/^operator\.([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
152 $opname = "\\$1.$2: ";
154 if (@moz[4]) {
155 print $file_js "['$opname', '$1','l','$2'],";
158 if (@moz[6]) {
159 $_ = substr(@moz[13], 0, 1);
160 print $file_js "['$opname', '$1','$_','$2'],";
163 print $file_js "];\n";
164 close($file_js);
165 exit 0;
168 if ($ARGV[0] eq "check") {
169 print "checking operator dictionary...\n";
170 open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") ||
171 die ("Couldn't open $FILE_SYNTAX_ERRORS!");
173 $nb_errors = 0;
174 $nb_warnings = 0;
175 @moz_keys = (keys %moz_hash);
176 # check the validity of our private data
177 while ($key = pop(@moz_keys)) {
178 @moz = @{ $moz_hash{$key} };
179 $entry = &generateEntry($key, @moz);
180 $valid = 1;
182 if (!(@moz[13] eq "" ||
183 @moz[13] eq "horizontal" ||
184 @moz[13] eq "vertical")) {
185 $valid = 0;
186 $nb_errors++;
187 print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n";
190 if (!@moz[4] && @moz[14]) {
191 $valid = 0;
192 $nb_warnings++;
193 print $file_syntax_errors "warning: operator is integral but not largeop\n";
196 $_ = @moz[0];
197 if ((m/^(.*)[iI]ntegral(.*)$/) && !@moz[14]) {
198 $valid = 0;
199 $nb_warnings++;
200 print $file_syntax_errors "warning: operator contains the term \"integral\" in its comment, but is not integral\n";
203 if (!$valid) {
204 print $file_syntax_errors $entry;
205 print $file_syntax_errors "\n";
209 # check that all forms have the same direction.
210 @moz_keys = (keys %moz_hash);
211 while ($key = pop(@moz_keys)) {
213 if (@{ $moz_hash{$key} }) {
214 # the operator has not been removed from the hash table yet.
216 $_ = $key;
217 (m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
218 $key_prefix = "$1.prefix";
219 $key_infix = "$1.infix";
220 $key_postfix = "$1.postfix";
221 @moz_prefix = @{ $moz_hash{$key_prefix} };
222 @moz_infix = @{ $moz_hash{$key_infix} };
223 @moz_postfix = @{ $moz_hash{$key_postfix} };
225 $same_direction = 1;
227 if (@moz_prefix) {
228 if (@moz_infix &&
229 !($moz_infix[13] eq $moz_prefix[13])) {
230 $same_direction = 0;
232 if (@moz_postfix &&
233 !($moz_postfix[13] eq $moz_prefix[13])) {
234 $same_direction = 0;
237 if (@moz_infix) {
238 if (@moz_postfix &&
239 !($moz_postfix[13] eq $moz_infix[13])) {
240 $same_direction = 0;
244 if (!$same_direction) {
245 $nb_errors++;
246 print $file_syntax_errors
247 "error: operator has a stretchy form, but all forms";
248 print $file_syntax_errors
249 " have not the same direction\n";
250 if (@moz_prefix) {
251 $_ = &generateEntry($key_prefix, @moz_prefix);
252 print $file_syntax_errors $_;
254 if (@moz_infix) {
255 $_ = &generateEntry($key_infix, @moz_infix);
256 print $file_syntax_errors $_;
258 if (@moz_postfix) {
259 $_ = &generateEntry($key_postfix, @moz_postfix);
260 print $file_syntax_errors $_;
262 print $file_syntax_errors "\n";
265 if (@moz_prefix) {
266 delete $moz_hash{$key.prefix};
268 if (@moz_infix) {
269 delete $moz_hash{$key_infix};
271 if (@moz_postfix) {
272 delete $moz_hash{$key_postfix};
277 close($file_syntax_errors);
278 print "\n";
279 if ($nb_errors > 0 || $nb_warnings > 0) {
280 print "$nb_errors error(s) found\n";
281 print "$nb_warnings warning(s) found\n";
282 print "See output file $FILE_SYNTAX_ERRORS.\n\n";
283 } else {
284 print "No error found.\n\n";
287 exit 0;
290 ################################################################################
291 # 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY
293 print "loading $WG_DICTIONARY...\n";
294 my $parser = XML::LibXML->new();
295 my $doc = $parser->parse_file($WG_DICTIONARY);
297 print "building dictionary...\n";
298 @wg_keys = ();
300 foreach my $entry ($doc->findnodes('/root/entry')) {
301 # 3.1) build the key
302 $key = "operator.";
304 $_ = $entry->getAttribute("unicode");
305 $_ = "$_-";
306 while (m/^U?0(\w*)-(.*)$/) {
307 # Concatenate .\uNNNN
308 $key = "$key\\u$1";
309 $_ = $2;
312 $_ = $entry->getAttribute("form"); # "Form"
313 $key = "$key.$_";
315 # 3.2) build the array
316 @value = ();
317 $value[0] = lc($entry->getAttribute("description"));
318 $value[1] = $entry->getAttribute("lspace");
319 if ($value[1] eq "") { $value[1] = "5"; }
320 $value[2] = $entry->getAttribute("rspace");
321 if ($value[2] eq "") { $value[2] = "5"; }
322 $value[3] = $entry->getAttribute("minsize");
323 if ($value[3] eq "") { $value[3] = "1"; }
325 $_ = $entry->getAttribute("properties");
326 $value[4] = (m/^(.*)largeop(.*)$/);
327 $value[5] = (m/^(.*)movablelimits(.*)$/);
328 $value[6] = (m/^(.*)stretchy(.*)$/);
329 $value[7] = (m/^(.*)separator(.*)$/);
330 $value[8] = (m/^(.*)accent(.*)$/);
331 $value[9] = (m/^(.*)fence(.*)$/);
332 $value[10] = (m/^(.*)symmetric(.*)$/);
333 $value[15] = (m/^(.*)mirrorable(.*)$/);
334 $value[11] = $entry->getAttribute("priority");
335 $value[12] = $entry->getAttribute("linebreakstyle");
337 # not stored in the WG dictionary
338 $value[13] = ""; # direction
339 $value[14] = ""; # integral
341 # 3.3) save the key and value
342 push(@wg_keys, $key);
343 $wg_hash{$key} = [ @value ];
345 @wg_keys = reverse(@wg_keys);
347 ################################################################################
348 # 4) Compare the two dictionaries and output the result
350 print "comparing dictionaries...\n";
351 open($file_differences, ">$FILE_DIFFERENCES") ||
352 die ("Couldn't open $FILE_DIFFERENCES!");
353 open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") ||
354 die ("Couldn't open $FILE_NEW_DICTIONARY!");
356 $conflicting = 0; $conflicting_stretching = 0;
357 $new = 0; $new_stretching = 0;
358 $obsolete = 0; $obsolete_stretching = 0;
359 $unchanged = 0;
361 # 4.1) look to the entries of the WG dictionary
362 while ($key = pop(@wg_keys)) {
364 @wg = @{ $wg_hash{$key} };
365 delete $wg_hash{$key};
366 $wg_value = &generateCommon(@wg);
368 if (exists($moz_hash{$key})) {
369 # entry is in both dictionary
370 @moz = @{ $moz_hash{$key} };
371 delete $moz_hash{$key};
372 $moz_value = &generateCommon(@moz);
373 if ($moz_value ne $wg_value) {
374 # conflicting entry
375 print $file_differences "[conflict]";
376 $conflicting++;
377 if ($moz[6] != $wg[6]) {
378 print $file_differences "[stretching]";
379 $conflicting_stretching++;
381 print $file_differences " - $key ($wg[0])\n";
382 print $file_differences "-$moz_value\n+$wg_value\n\n";
383 $_ = &completeCommon($wg_value, $key, @moz, @wg);
384 print $file_new_dictionary $_;
385 } else {
386 # unchanged entry
387 $unchanged++;
388 $_ = &completeCommon($wg_value, $key, @moz, @wg);
389 print $file_new_dictionary $_;
391 } else {
392 # we don't have this entry in our dictionary yet
393 print $file_differences "[new entry]";
394 $new++;
395 if ($wg[6]) {
396 print $file_differences "[stretching]";
397 $new_stretching++;
399 print $file_differences " - $key ($wg[0])\n";
400 print $file_differences "-\n+$wg_value\n\n";
401 $_ = &completeCommon($wg_value, $key, (), @wg);
402 print $file_new_dictionary $_;
406 print $file_new_dictionary
407 "\n# Entries below are not part of the official MathML dictionary\n\n";
408 # 4.2) look in our dictionary the remaining entries
409 @moz_keys = (keys %moz_hash);
410 @moz_keys = reverse(sort(@moz_keys));
412 while ($key = pop(@moz_keys)) {
413 @moz = @{ $moz_hash{$key} };
414 $moz_value = &generateCommon(@moz);
415 print $file_differences "[obsolete entry]";
416 $obsolete++;
417 if ($moz[6]) {
418 print $file_differences "[stretching]";
419 $obsolete_stretching++;
421 print $file_differences " - $key ($moz[0])\n";
422 print $file_differences "-$moz_value\n+\n\n";
423 $_ = &completeCommon($moz_value, $key, (), @moz);
424 print $file_new_dictionary $_;
427 close($file_differences);
428 close($file_new_dictionary);
430 print "\n";
431 print "- $obsolete obsolete entries ";
432 print "($obsolete_stretching of them are related to stretching)\n";
433 print "- $unchanged unchanged entries\n";
434 print "- $conflicting conflicting entries ";
435 print "($conflicting_stretching of them are related to stretching)\n";
436 print "- $new new entries ";
437 print "($new_stretching of them are related to stretching)\n";
438 print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n";
439 print "After having modified the dictionary, please run";
440 print "./updateOperatorDictionary check\n\n";
441 exit 0;
443 ################################################################################
444 sub usage {
445 # display the accepted command syntax and quit
446 print "usage:\n";
447 print " ./updateOperatorDictionary.pl download [unicode.xml]\n";
448 print " ./updateOperatorDictionary.pl compare [dictionary.xml]\n";
449 print " ./updateOperatorDictionary.pl check\n";
450 print " ./updateOperatorDictionary.pl make-js\n";
451 print " ./updateOperatorDictionary.pl clean\n";
452 exit 0;
455 sub generateCommon {
456 # helper function to generate the string of data shared by both dictionaries
457 my(@v) = @_;
458 $entry = "lspace:$v[1] rspace:$v[2]";
459 if ($v[3] ne "1") { $entry = "$entry minsize:$v[3]"; }
460 if ($v[4]) { $entry = "$entry largeop"; }
461 if ($v[5]) { $entry = "$entry movablelimits"; }
462 if ($v[6]) { $entry = "$entry stretchy"; }
463 if ($v[7]) { $entry = "$entry separator"; }
464 if ($v[8]) { $entry = "$entry accent"; }
465 if ($v[9]) { $entry = "$entry fence"; }
466 if ($v[10]) { $entry = "$entry symmetric"; }
467 if ($v[15]) { $entry = "$entry mirrorable"; }
468 return $entry;
471 sub completeCommon {
472 # helper to add key and private data to generateCommon
473 my($entry, $key, @v_moz, @v_wg) = @_;
475 $entry = "$key = $entry";
477 if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; }
478 if ($v_moz[14]) { $entry = "$entry integral"; }
479 if ($v_moz[15]) { $entry = "$entry mirrorable"; }
481 if ($v_moz[0]) {
482 # keep our previous comment
483 $entry = "$entry # $v_moz[0]";
484 } else {
485 # otherwise use the description given by the WG
486 $entry = "$entry # $v_wg[0]";
489 $entry = "$entry\n";
490 return $entry;
493 sub generateEntry {
494 # helper function to generate an entry of our operator dictionary
495 my($key, @moz) = @_;
496 $entry = &generateCommon(@moz);
497 $entry = &completeCommon($entry, $key, @moz, @moz);
498 return $entry;