no bug - Import translations from android-l10n r=release a=l10n CLOSED TREE
[gecko.git] / layout / mathml / updateOperatorDictionary.pl
blob01fc4d0a88fa70f0db4b88b03913fd7701c11be8
1 #!/usr/bin/perl
2 # -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*-
3 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 use XML::LibXSLT;
8 use XML::LibXML;
9 use LWP::Simple;
11 # output files
12 $FILE_UNICODE = "unicode.xml";
13 $FILE_DICTIONARY = "dictionary.xml";
14 $FILE_DIFFERENCES = "differences.txt";
15 $FILE_NEW_DICTIONARY = "new_dictionary.txt";
16 $FILE_SYNTAX_ERRORS = "syntax_errors.txt";
18 # our dictionary (property file)
19 $MOZ_DICTIONARY = "mathfont.properties";
21 # dictionary provided by the W3C in "XML Entity Definitions for Characters"
22 $WG_DICTIONARY_URL = "https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml";
24 # XSL stylesheet to extract relevant data from the dictionary
25 $DICTIONARY_XSL = "operatorDictionary.xsl";
27 # dictionary provided by the W3C transformed with operatorDictionary.xsl
28 $WG_DICTIONARY = $FILE_DICTIONARY;
30 if (!($#ARGV >= 0 &&
31 ((($ARGV[0] eq "download") && $#ARGV <= 1) ||
32 (($ARGV[0] eq "compare") && $#ARGV <= 1) ||
33 (($ARGV[0] eq "check") && $#ARGV <= 0) ||
34 (($ARGV[0] eq "clean") && $#ARGV <= 0)))) {
35 &usage;
38 if ($ARGV[0] eq "download") {
39 if ($#ARGV == 1) {
40 $WG_DICTIONARY_URL = $ARGV[1];
42 print "Downloading $WG_DICTIONARY_URL...\n";
43 getstore($WG_DICTIONARY_URL, $FILE_UNICODE);
45 print "Converting $FILE_UNICODE into $FILE_DICTIONARY...\n";
46 my $xslt = XML::LibXSLT->new();
47 my $source = XML::LibXML->load_xml(location => $FILE_UNICODE);
48 my $style_doc = XML::LibXML->load_xml(location => $DICTIONARY_XSL,
49 no_cdata=>1);
50 my $stylesheet = $xslt->parse_stylesheet($style_doc);
51 my $results = $stylesheet->transform($source);
52 open($file, ">$FILE_DICTIONARY") || die ("Couldn't open $FILE_DICTIONARY!");
53 print $file $stylesheet->output_as_bytes($results);
54 close($file);
55 exit 0;
58 if ($ARGV[0] eq "clean") {
59 unlink($FILE_UNICODE,
60 $FILE_DICTIONARY,
61 $FILE_DIFFERENCES,
62 $FILE_NEW_DICTIONARY,
63 $FILE_SYNTAX_ERRORS);
64 exit 0;
67 if ($ARGV[0] eq "compare" && $#ARGV == 1) {
68 $WG_DICTIONARY = $ARGV[1];
71 ################################################################################
72 # structure of the dictionary used by this script:
73 # - key: same as in mathfont.properties
74 # - table:
75 # index | value
76 # 0 | description
77 # 1 | lspace
78 # 2 | rspace
79 # 4 | largeop
80 # 5 | movablelimits
81 # 6 | stretchy
82 # 7 | separator
83 # 8 | accent
84 # 9 | fence
85 # 10 | symmetric
86 # 13 | direction
88 # 1) build %moz_hash from $MOZ_DICTIONARY
90 print "loading $MOZ_DICTIONARY...\n";
91 open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!");
93 print "building dictionary...\n";
94 while (<$file>) {
95 next unless (m/^operator\.(.*)$/);
96 (m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/);
98 # 1.1) build the key
99 $key = $1;
101 # 1.2) build the array
102 $_ = $2;
103 @value = ();
104 $value[0] = $3;
105 if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; }
106 if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; }
107 $value[4] = (m/^(.*)largeop(.*)$/);
108 $value[5] = (m/^(.*)movablelimits(.*)$/);
109 $value[6] = (m/^(.*)stretchy(.*)$/);
110 $value[7] = (m/^(.*)separator(.*)$/);
111 $value[8] = (m/^(.*)accent(.*)$/);
112 $value[9] = (m/^(.*)fence(.*)$/);
113 $value[10] = (m/^(.*)symmetric(.*)$/);
114 if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; }
115 else { $value[13] = ""; }
117 # 1.3) save the key and value
118 $moz_hash{$key} = [ @value ];
121 close($file);
123 ################################################################################
124 # 2) If mode "check", verify validity of our operator dictionary and quit.
125 # If mode "compare", go to step 3)
127 if ($ARGV[0] eq "check") {
128 print "checking operator dictionary...\n";
129 open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") ||
130 die ("Couldn't open $FILE_SYNTAX_ERRORS!");
132 $nb_errors = 0;
133 $nb_warnings = 0;
134 @moz_keys = (keys %moz_hash);
135 # check the validity of our private data
136 while ($key = pop(@moz_keys)) {
138 if ($key =~ /\\u.+\\u.+\\u.+/) {
139 $valid = 0;
140 $nb_errors++;
141 print $file_syntax_errors "error: \"$key\" has more than 2 characters\n";
144 if ($key =~ /\\u20D2\./ || $key =~ /\\u0338\./) {
145 $valid = 0;
146 $nb_errors++;
147 print $file_syntax_errors "error: \"$key\" ends with character U+20D2 or U+0338\n";
150 @moz = @{ $moz_hash{$key} };
151 $entry = &generateEntry($key, @moz);
152 $valid = 1;
154 if (!(@moz[13] eq "" ||
155 @moz[13] eq "horizontal" ||
156 @moz[13] eq "vertical")) {
157 $valid = 0;
158 $nb_errors++;
159 print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n";
162 if (@moz[4] && !(@moz[13] eq "vertical")) {
163 $valid = 0;
164 $nb_errors++;
165 print $file_syntax_errors "error: operator is largeop but does not have vertical direction\n";
168 if (!$valid) {
169 print $file_syntax_errors $entry;
170 print $file_syntax_errors "\n";
174 # check that all forms have the same direction.
175 @moz_keys = (keys %moz_hash);
176 while ($key = pop(@moz_keys)) {
178 if (@{ $moz_hash{$key} }) {
179 # the operator has not been removed from the hash table yet.
181 $_ = $key;
182 (m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
183 $key_prefix = "$1.prefix";
184 $key_infix = "$1.infix";
185 $key_postfix = "$1.postfix";
186 @moz_prefix = @{ $moz_hash{$key_prefix} };
187 @moz_infix = @{ $moz_hash{$key_infix} };
188 @moz_postfix = @{ $moz_hash{$key_postfix} };
190 $same_direction = 1;
192 if (@moz_prefix) {
193 if (@moz_infix &&
194 !($moz_infix[13] eq $moz_prefix[13])) {
195 $same_direction = 0;
197 if (@moz_postfix &&
198 !($moz_postfix[13] eq $moz_prefix[13])) {
199 $same_direction = 0;
202 if (@moz_infix) {
203 if (@moz_postfix &&
204 !($moz_postfix[13] eq $moz_infix[13])) {
205 $same_direction = 0;
209 if (!$same_direction) {
210 $nb_errors++;
211 print $file_syntax_errors
212 "error: operator has a stretchy form, but all forms";
213 print $file_syntax_errors
214 " have not the same direction\n";
215 if (@moz_prefix) {
216 $_ = &generateEntry($key_prefix, @moz_prefix);
217 print $file_syntax_errors $_;
219 if (@moz_infix) {
220 $_ = &generateEntry($key_infix, @moz_infix);
221 print $file_syntax_errors $_;
223 if (@moz_postfix) {
224 $_ = &generateEntry($key_postfix, @moz_postfix);
225 print $file_syntax_errors $_;
227 print $file_syntax_errors "\n";
230 if (@moz_prefix) {
231 delete $moz_hash{$key.prefix};
233 if (@moz_infix) {
234 delete $moz_hash{$key_infix};
236 if (@moz_postfix) {
237 delete $moz_hash{$key_postfix};
242 close($file_syntax_errors);
243 print "\n";
244 if ($nb_errors > 0 || $nb_warnings > 0) {
245 print "$nb_errors error(s) found\n";
246 print "$nb_warnings warning(s) found\n";
247 print "See output file $FILE_SYNTAX_ERRORS.\n\n";
248 } else {
249 print "No error found.\n\n";
252 exit 0;
255 ################################################################################
256 # 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY
258 print "loading $WG_DICTIONARY...\n";
259 my $parser = XML::LibXML->new();
260 my $doc = $parser->parse_file($WG_DICTIONARY);
262 print "building dictionary...\n";
263 @wg_keys = ();
265 foreach my $entry ($doc->findnodes('/root/entry')) {
266 # 3.1) build the key
267 $key = "operator.";
269 $_ = $entry->getAttribute("unicode");
271 # Skip non-BMP Arabic characters that are handled specially.
272 if ($_ == "U1EEF0" || $_ == "U1EEF1") {
273 next;
276 $_ = "$_-";
277 while (m/^U?0(\w*)-(.*)$/) {
278 # Concatenate .\uNNNN
279 $key = "$key\\u$1";
280 $_ = $2;
283 $_ = $entry->getAttribute("form"); # "Form"
284 $key = "$key.$_";
286 # 3.2) build the array
287 @value = ();
288 $value[0] = lc($entry->getAttribute("description"));
289 $value[1] = $entry->getAttribute("lspace");
290 if ($value[1] eq "") { $value[1] = "5"; }
291 $value[2] = $entry->getAttribute("rspace");
292 if ($value[2] eq "") { $value[2] = "5"; }
294 $_ = $entry->getAttribute("properties");
295 $value[4] = (m/^(.*)largeop(.*)$/);
296 $value[5] = (m/^(.*)movablelimits(.*)$/);
297 $value[6] = (m/^(.*)stretchy(.*)$/);
298 $value[7] = (m/^(.*)separator(.*)$/);
299 $value[9] = (m/^(.*)fence(.*)$/);
300 $value[10] = (m/^(.*)symmetric(.*)$/);
302 # not stored in the WG dictionary
303 $value[8] = ""; # accent
304 $value[13] = ""; # direction
306 # 3.3) save the key and value
307 push(@wg_keys, $key);
308 $wg_hash{$key} = [ @value ];
310 @wg_keys = reverse(@wg_keys);
312 ################################################################################
313 # 4) Compare the two dictionaries and output the result
315 print "comparing dictionaries...\n";
316 open($file_differences, ">$FILE_DIFFERENCES") ||
317 die ("Couldn't open $FILE_DIFFERENCES!");
318 open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") ||
319 die ("Couldn't open $FILE_NEW_DICTIONARY!");
321 $conflicting = 0; $conflicting_stretching = 0;
322 $new = 0; $new_stretching = 0;
323 $obsolete = 0; $obsolete_stretching = 0;
324 $unchanged = 0;
326 # 4.1) look to the entries of the WG dictionary
327 while ($key = pop(@wg_keys)) {
329 @wg = @{ $wg_hash{$key} };
330 delete $wg_hash{$key};
331 $wg_value = &generateCommon(@wg);
333 if (exists($moz_hash{$key})) {
334 # entry is in both dictionary
335 @moz = @{ $moz_hash{$key} };
336 delete $moz_hash{$key};
337 $moz_value = &generateCommon(@moz);
338 if ($moz_value ne $wg_value) {
339 # conflicting entry
340 print $file_differences "[conflict]";
341 $conflicting++;
342 if ($moz[6] != $wg[6]) {
343 print $file_differences "[stretching]";
344 $conflicting_stretching++;
346 print $file_differences " - $key ($wg[0])\n";
347 print $file_differences "-$moz_value\n+$wg_value\n\n";
348 $_ = &completeCommon($wg_value, $key, @moz, @wg);
349 print $file_new_dictionary $_;
350 } else {
351 # unchanged entry
352 $unchanged++;
353 $_ = &completeCommon($wg_value, $key, @moz, @wg);
354 print $file_new_dictionary $_;
356 } else {
357 # we don't have this entry in our dictionary yet
358 print $file_differences "[new entry]";
359 $new++;
360 if ($wg[6]) {
361 print $file_differences "[stretching]";
362 $new_stretching++;
364 print $file_differences " - $key ($wg[0])\n";
365 print $file_differences "-\n+$wg_value\n\n";
366 $_ = &completeCommon($wg_value, $key, (), @wg);
367 print $file_new_dictionary $_;
371 print $file_new_dictionary
372 "\n# Entries below are not part of the official MathML dictionary\n\n";
373 # 4.2) look in our dictionary the remaining entries
374 @moz_keys = (keys %moz_hash);
375 @moz_keys = reverse(sort(@moz_keys));
377 while ($key = pop(@moz_keys)) {
378 @moz = @{ $moz_hash{$key} };
379 $moz_value = &generateCommon(@moz);
380 print $file_differences "[obsolete entry]";
381 $obsolete++;
382 if ($moz[6]) {
383 print $file_differences "[stretching]";
384 $obsolete_stretching++;
386 print $file_differences " - $key ($moz[0])\n";
387 print $file_differences "-$moz_value\n+\n\n";
388 $_ = &completeCommon($moz_value, $key, (), @moz);
389 print $file_new_dictionary $_;
392 close($file_differences);
393 close($file_new_dictionary);
395 print "\n";
396 print "- $obsolete obsolete entries ";
397 print "($obsolete_stretching of them are related to stretching)\n";
398 print "- $unchanged unchanged entries\n";
399 print "- $conflicting conflicting entries ";
400 print "($conflicting_stretching of them are related to stretching)\n";
401 print "- $new new entries ";
402 print "($new_stretching of them are related to stretching)\n";
403 print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n";
404 print "After having modified the dictionary, please run";
405 print "./updateOperatorDictionary check\n\n";
406 exit 0;
408 ################################################################################
409 sub usage {
410 # display the accepted command syntax and quit
411 print "usage:\n";
412 print " ./updateOperatorDictionary.pl download [unicode.xml]\n";
413 print " ./updateOperatorDictionary.pl compare [dictionary.xml]\n";
414 print " ./updateOperatorDictionary.pl check\n";
415 print " ./updateOperatorDictionary.pl clean\n";
416 exit 0;
419 sub generateCommon {
420 # helper function to generate the string of data shared by both dictionaries
421 my(@v) = @_;
422 $entry = "lspace:$v[1] rspace:$v[2]";
423 if ($v[4]) { $entry = "$entry largeop"; }
424 if ($v[5]) { $entry = "$entry movablelimits"; }
425 if ($v[6]) { $entry = "$entry stretchy"; }
426 if ($v[7]) { $entry = "$entry separator"; }
427 if ($v[9]) { $entry = "$entry fence"; }
428 if ($v[10]) { $entry = "$entry symmetric"; }
429 return $entry;
432 sub completeCommon {
433 # helper to add key and private data to generateCommon
434 my($entry, $key, @v_moz, @v_wg) = @_;
436 $entry = "$key = $entry";
438 if ($v_moz[8]) { $entry = "$entry accent"; }
439 if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; }
441 if ($v_moz[0]) {
442 # keep our previous comment
443 $entry = "$entry # $v_moz[0]";
444 } else {
445 # otherwise use the description given by the WG
446 $entry = "$entry # $v_wg[0]";
449 $entry = "$entry\n";
450 return $entry;
453 sub generateEntry {
454 # helper function to generate an entry of our operator dictionary
455 my($key, @moz) = @_;
456 $entry = &generateCommon(@moz);
457 $entry = &completeCommon($entry, $key, @moz, @moz);
458 return $entry;