Merge mozilla-central and tracemonkey. (a=blockers)
[mozilla-central.git] / layout / mathml / updateOperatorDictionary.pl
blob4b0822fe24290fd560e198e60d9cdc8483ef6bdc
1 #!/usr/bin/perl
2 # -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*-
3 # ***** BEGIN LICENSE BLOCK *****
4 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 # The contents of this file are subject to the Mozilla Public License Version
7 # 1.1 (the "License"); you may not use this file except in compliance with
8 # the License. You may obtain a copy of the License at
9 # http://www.mozilla.org/MPL/
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 # for the specific language governing rights and limitations under the
14 # License.
16 # The Original Code is Mozilla MathML Project.
18 # The Initial Developer of the Original Code is
19 # Frederic Wang <fred.wang@free.fr>.
20 # Portions created by the Initial Developer are Copyright (C) 2010
21 # the Initial Developer. All Rights Reserved.
23 # Contributor(s):
25 # Alternatively, the contents of this file may be used under the terms of
26 # either the GNU General Public License Version 2 or later (the "GPL"), or
27 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 # in which case the provisions of the GPL or the LGPL are applicable instead
29 # of those above. If you wish to allow use of your version of this file only
30 # under the terms of either the GPL or the LGPL, and not to allow others to
31 # use your version of this file under the terms of the MPL, indicate your
32 # decision by deleting the provisions above and replace them with the notice
33 # and other provisions required by the GPL or the LGPL. If you do not delete
34 # the provisions above, a recipient may use your version of this file under
35 # the terms of any one of the MPL, the GPL or the LGPL.
37 # ***** END LICENSE BLOCK *****
39 use XML::XSLT;
40 use XML::DOM;
42 # output files
43 $FILE_DIFFERENCES = "differences.txt";
44 $FILE_NEW_DICTIONARY = "new_dictionary.txt";
45 $FILE_SYNTAX_ERRORS = "syntax_errors.txt";
47 # our dictionary (property file)
48 $MOZ_DICTIONARY = "mathfont.properties";
50 # dictionary provided in "XML Entity Definitions for Characters"
51 # The file unicode.xml is very large (> 5Mb), so it is expected that you
52 # provide instead the XML file transformed by operatorDictionary.xsl.
53 # > xsltproc -o dictionary.xml operatorDictionary.xsl unicode.xml
54 $WG_DICTIONARY = "dictionary.xml";
56 if (!($#ARGV >= 0 &&
57 ((($ARGV[0] eq "compare") && $#ARGV <= 1) ||
58 (($ARGV[0] eq "check") && $#ARGV <= 0)))) {
59 &usage;
62 if ($ARGV[0] eq "compare" && $#ARGV == 1) {
63 $WG_DICTIONARY = $ARGV[1];
66 ################################################################################
67 # structure of the dictionary used by this script:
68 # - key: same as in mathfont.properties
69 # - table:
70 # index | value
71 # 0 | description
72 # 1 | lspace
73 # 2 | rspace
74 # 3 | minsize
75 # 4 | largeop
76 # 5 | movablelimits
77 # 6 | stretchy
78 # 7 | separator
79 # 8 | accent
80 # 9 | fence
81 # 10 | symmetric
82 # 11 | priority
83 # 12 | linebreakstyle
84 # 13 | direction
85 # 14 | integral
87 # 1) build %moz_hash from $MOZ_DICTIONARY
89 print "loading $MOZ_DICTIONARY...\n";
90 open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!");
92 print "building dictionary...\n";
93 while (<$file>) {
94 next unless (m/^operator\.(.*)$/);
95 (m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/);
97 # 1.1) build the key
98 $key = $1;
100 # 1.2) build the array
101 $_ = $2;
102 @value = ();
103 $value[0] = $3;
104 if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; }
105 if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; }
106 if (m/^(.*)minsize:(\d)(.*)$/) { $value[3] = $2; } else { $value[3] = "1"; }
107 $value[4] = (m/^(.*)largeop(.*)$/);
108 $value[5] = (m/^(.*)movablelimits(.*)$/);
109 $value[6] = (m/^(.*)stretchy(.*)$/);
110 $value[7] = (m/^(.*)separator(.*)$/);
111 $value[8] = (m/^(.*)accent(.*)$/);
112 $value[9] = (m/^(.*)fence(.*)$/);
113 $value[10] = (m/^(.*)symmetric(.*)$/);
114 $value[11] = ""; # we don't store "priority" in our dictionary
115 $value[12] = ""; # we don't store "linebreakstyle" in our dictionary
116 if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; }
117 else { $value[13] = ""; }
118 $value[14] = (m/^(.*)integral(.*)$/);
120 # 1.3) save the key and value
121 $moz_hash{$key} = [ @value ];
124 close($file);
126 ################################################################################
127 # 2) If mode "check", verify validity of our operator dictionary and quit.
128 # If mode "compare", go to step 3)
130 if ($ARGV[0] eq "check") {
131 print "checking operator dictionary...\n";
132 open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") ||
133 die ("Couldn't open $FILE_SYNTAX_ERRORS!");
135 $nb_errors = 0;
136 $nb_warnings = 0;
137 @moz_keys = (keys %moz_hash);
138 # check the validity of our private data
139 while ($key = pop(@moz_keys)) {
140 @moz = @{ $moz_hash{$key} };
141 $entry = &generateEntry($key, @moz);
142 $valid = 1;
144 if (!(@moz[13] eq "" ||
145 @moz[13] eq "horizontal" ||
146 @moz[13] eq "vertical")) {
147 $valid = 0;
148 $nb_errors++;
149 print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n";
152 if (!@moz[4] && @moz[14]) {
153 $valid = 0;
154 $nb_warnings++;
155 print $file_syntax_errors "warning: operator is integral but not lareop\n";
158 $_ = @moz[0];
159 if ((m/^(.*)[iI]ntegral(.*)$/) && !@moz[14]) {
160 $valid = 0;
161 $nb_warnings++;
162 print $file_syntax_errors "warning: operator contains the term \"integral\" in its comment, but is not integral\n";
165 if (!$valid) {
166 print $file_syntax_errors $entry;
167 print $file_syntax_errors "\n";
171 # check that all forms have the same direction.
172 @moz_keys = (keys %moz_hash);
173 while ($key = pop(@moz_keys)) {
175 if (@{ $moz_hash{$key} }) {
176 # the operator has not been removed from the hash table yet.
178 $_ = $key;
179 (m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
180 $key_prefix = "$1.prefix";
181 $key_infix = "$1.infix";
182 $key_postfix = "$1.postfix";
183 @moz_prefix = @{ $moz_hash{$key_prefix} };
184 @moz_infix = @{ $moz_hash{$key_infix} };
185 @moz_postfix = @{ $moz_hash{$key_postfix} };
187 $same_direction = 1;
189 if (@moz_prefix) {
190 if (@moz_infix &&
191 !($moz_infix[13] eq $moz_prefix[13])) {
192 $same_direction = 0;
194 if (@moz_postfix &&
195 !($moz_postfix[13] eq $moz_prefix[13])) {
196 $same_direction = 0;
199 if (@moz_infix) {
200 if (@moz_postfix &&
201 !($moz_postfix[13] eq $moz_infix[13])) {
202 $same_direction = 0;
206 if (!$same_direction) {
207 $nb_errors++;
208 print $file_syntax_errors
209 "error: operator has a stretchy form, but all forms";
210 print $file_syntax_errors
211 " have not the same direction\n";
212 if (@moz_prefix) {
213 $_ = &generateEntry($key_prefix, @moz_prefix);
214 print $file_syntax_errors $_;
216 if (@moz_infix) {
217 $_ = &generateEntry($key_infix, @moz_infix);
218 print $file_syntax_errors $_;
220 if (@moz_postfix) {
221 $_ = &generateEntry($key_postfix, @moz_postfix);
222 print $file_syntax_errors $_;
224 print $file_syntax_errors "\n";
227 if (@moz_prefix) {
228 delete $moz_hash{$key.prefix};
230 if (@moz_infix) {
231 delete $moz_hash{$key_infix};
233 if (@moz_postfix) {
234 delete $moz_hash{$key_postfix};
239 close($file_syntax_errors);
240 print "\n";
241 if ($nb_errors > 0 || $nb_warnings > 0) {
242 print "$nb_errors error(s) found\n";
243 print "$nb_warnings warning(s) found\n";
244 print "See output file $FILE_SYNTAX_ERRORS.\n\n";
245 } else {
246 print "No error found.\n\n";
249 exit 0;
252 ################################################################################
253 # 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY
255 print "loading $WG_DICTIONARY...\n";
256 $parser = new XML::DOM::Parser;
257 $doc = $parser->parsefile($WG_DICTIONARY)->getDocumentElement;
259 print "building dictionary...\n";
260 @wg_keys = ();
261 $entries = $doc->getElementsByTagName("entry");
262 $n = $entries->getLength;
264 for ($i = 0; $i < $n; $i++) {
265 $entry = $entries->item($i);
267 # 3.1) build the key
268 $key = "operator.";
270 $_ = $entry->getAttribute("unicode");
271 $_ = "$_-";
272 while (m/^U?0(\w*)-(.*)$/) {
273 # Concatenate .\uNNNN
274 $key = "$key\\u$1";
275 $_ = $2;
278 $_ = $entry->getAttribute("form"); # "Form"
279 $key = "$key.$_";
281 # 3.2) build the array
282 @value = ();
283 $value[0] = lc($entry->getAttribute("description"));
284 $value[1] = $entry->getAttribute("lspace");
285 if ($value[1] eq "") { $value[1] = "5"; }
286 $value[2] = $entry->getAttribute("rspace");
287 if ($value[2] eq "") { $value[2] = "5"; }
288 $value[3] = $entry->getAttribute("minsize");
289 if ($value[3] eq "") { $value[3] = "1"; }
291 $_ = $entry->getAttribute("properties");
292 $value[4] = (m/^(.*)largeop(.*)$/);
293 $value[5] = (m/^(.*)movablelimits(.*)$/);
294 $value[6] = (m/^(.*)stretchy(.*)$/);
295 $value[7] = (m/^(.*)separator(.*)$/);
296 $value[8] = (m/^(.*)accent(.*)$/);
297 $value[9] = (m/^(.*)fence(.*)$/);
298 $value[10] = (m/^(.*)symmetric(.*)$/);
299 $value[11] = $entry->getAttribute("priority");
300 $value[12] = $entry->getAttribute("linebreakstyle");
302 # not stored in the WG dictionary
303 $value[13] = ""; # direction
304 $value[14] = ""; # integral
306 # 3.3) save the key and value
307 push(@wg_keys, $key);
308 $wg_hash{$key} = [ @value ];
310 $doc->dispose;
311 @wg_keys = reverse(@wg_keys);
313 ################################################################################
314 # 4) Compare the two dictionaries and output the result
316 print "comparing dictionaries...\n";
317 open($file_differences, ">$FILE_DIFFERENCES") ||
318 die ("Couldn't open $FILE_DIFFERENCES!");
319 open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") ||
320 die ("Couldn't open $FILE_NEW_DICTIONARY!");
322 $conflicting = 0; $conflicting_stretching = 0;
323 $new = 0; $new_stretching = 0;
324 $obsolete = 0; $obsolete_stretching = 0;
325 $unchanged = 0;
327 # 4.1) look to the entries of the WG dictionary
328 while ($key = pop(@wg_keys)) {
330 @wg = @{ $wg_hash{$key} };
331 delete $wg_hash{$key};
332 $wg_value = &generateCommon(@wg);
334 if (exists($moz_hash{$key})) {
335 # entry is in both dictionary
336 @moz = @{ $moz_hash{$key} };
337 delete $moz_hash{$key};
338 $moz_value = &generateCommon(@moz);
339 if ($moz_value ne $wg_value) {
340 # conflicting entry
341 print $file_differences "[conflict]";
342 $conflicting++;
343 if ($moz[6] != $wg[6]) {
344 print $file_differences "[stretching]";
345 $conflicting_stretching++;
347 print $file_differences " - $key ($wg[0])\n";
348 print $file_differences "-$moz_value\n+$wg_value\n\n";
349 $_ = &completeCommon($wg_value, $key, @moz, @wg);
350 print $file_new_dictionary $_;
351 } else {
352 # unchanged entry
353 $unchanged++;
354 $_ = &completeCommon($wg_value, $key, @moz, @wg);
355 print $file_new_dictionary $_;
357 } else {
358 # we don't have this entry in our dictionary yet
359 print $file_differences "[new entry]";
360 $new++;
361 if ($wg[6]) {
362 print $file_differences "[stretching]";
363 $new_stretching++;
365 print $file_differences " - $key ($wg[0])\n";
366 print $file_differences "-\n+$wg_value\n\n";
367 $_ = &completeCommon($wg_value, $key, (), @wg);
368 print $file_new_dictionary $_;
372 print $file_new_dictionary
373 "\n# Entries below are not part of the official MathML dictionary\n\n";
374 # 4.2) look in our dictionary the remaining entries
375 @moz_keys = (keys %moz_hash);
376 @moz_keys = reverse(sort(@moz_keys));
378 while ($key = pop(@moz_keys)) {
379 @moz = @{ $moz_hash{$key} };
380 $moz_value = &generateCommon(@moz);
381 print $file_differences "[obsolete entry]";
382 $obsolete++;
383 if ($moz[6]) {
384 print $file_differences "[stretching]";
385 $obsolete_stretching++;
387 print $file_differences " - $key ($moz[0])\n";
388 print $file_differences "-$moz_value\n+\n\n";
389 $_ = &completeCommon($moz_value, $key, (), @moz);
390 print $file_new_dictionary $_;
393 close($file_differences);
394 close($file_new_dictionary);
396 print "\n";
397 print "- $obsolete obsolete entries ";
398 print "($obsolete_stretching of them are related to stretching)\n";
399 print "- $unchanged unchanged entries\n";
400 print "- $conflicting conflicting entries ";
401 print "($conflicting_stretching of them are related to stretching)\n";
402 print "- $new new entries ";
403 print "($new_stretching of them are related to stretching)\n";
404 print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n";
405 print "After having modified the dictionary, please run";
406 print "./updateOperatorDictionary check\n\n";
407 exit 0;
409 ################################################################################
410 sub usage {
411 # display the accepted command syntax and quit
412 print "usage:\n";
413 print " ./updateOperatorDictionary.pl compare [dictionary]\n";
414 print " ./updateOperatorDictionary.pl check\n";
415 exit 0;
418 sub generateCommon {
419 # helper function to generate the string of data shared by both dictionaries
420 my(@v) = @_;
421 $entry = "lspace:$v[1] rspace:$v[2]";
422 if ($v[3] ne "1") { $entry = "$entry minsize:$v[3]"; }
423 if ($v[4]) { $entry = "$entry largeop"; }
424 if ($v[5]) { $entry = "$entry movablelimits"; }
425 if ($v[6]) { $entry = "$entry stretchy"; }
426 if ($v[7]) { $entry = "$entry separator"; }
427 if ($v[8]) { $entry = "$entry accent"; }
428 if ($v[9]) { $entry = "$entry fence"; }
429 if ($v[10]) { $entry = "$entry symmetric"; }
430 return $entry;
433 sub completeCommon {
434 # helper to add key and private data to generateCommon
435 my($entry, $key, @v_moz, @v_wg) = @_;
437 $entry = "$key = $entry";
439 if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; }
440 if ($v_moz[14]) { $entry = "$entry integral"; }
442 if ($v_moz[0]) {
443 # keep our previous comment
444 $entry = "$entry # $v_moz[0]";
445 } else {
446 # otherwise use the description given by the WG
447 $entry = "$entry # $v_wg[0]";
450 $entry = "$entry\n";
451 return $entry;
454 sub generateEntry {
455 # helper function to generate an entry of our operator dictionary
456 my($key, @moz) = @_;
457 $entry = &generateCommon(@moz);
458 $entry = &completeCommon($entry, $key, @moz, @moz);
459 return $entry;