2 # ***** BEGIN LICENSE BLOCK *****
3 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 # The contents of this file are subject to the Mozilla Public License Version
6 # 1.1 (the "License"); you may not use this file except in compliance with
7 # the License. You may obtain a copy of the License at
8 # http://www.mozilla.org/MPL/
10 # Software distributed under the License is distributed on an "AS IS" basis,
11 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 # for the specific language governing rights and limitations under the
15 # The Original Code is Mozilla Communicator.
17 # The Initial Developer of the Original Code is
18 # Jungshik Shin <jshin@mailaps.org>.
19 # Portions created by the Initial Developer are Copyright (C) 2002, 2003
20 # the Initial Developer. All Rights Reserved.
24 # Alternatively, the contents of this file may be used under the terms of
25 # either the GNU General Public License Version 2 or later (the "GPL"), or
26 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 # in which case the provisions of the GPL or the LGPL are applicable instead
28 # of those above. If you wish to allow use of your version of this file only
29 # under the terms of either the GPL or the LGPL, and not to allow others to
30 # use your version of this file under the terms of the MPL, indicate your
31 # decision by deleting the provisions above and replace them with the notice
32 # and other provisions required by the GPL or the LGPL. If you do not delete
33 # the provisions above, a recipient may use your version of this file under
34 # the terms of any one of the MPL, the GPL or the LGPL.
36 # ***** END LICENSE BLOCK *****
38 # This script is used to generate precompiled CCMap files.
39 # See bug 180266 for details.
41 # Revised to support extended CCMaps for non-BMP characters : 2003-09-19 (bug 205387)
42 # Revised to support the automatic generation of a macro defining the size
43 # of a CCMap in terms of PRUint16 : 2003-12-11 (bug 224337)
48 use vars
qw($fill_fmt $fu_sz);
49 use vars qw($e_mid_offset $e_pg_offset);
51 (@ARGV < 1 ) and usage();
55 my ($ifh, $variable, $class);
56 open $ifh , "< $ifn" or die "Cannot open $ifn";
61 "$0:\n\t VARIABLE $variable is specified in the command line.\n" .
62 "\t The variable name spec. in the input file will be ignored.\n";
68 "$0:\n\t CLASS $class is specified in the command line.\n" .
69 "\t The class spec. in the input file will be ignored.\n";
72 use constant N_PLANES => 17; # BMP + 16 non-BMP planes
73 use constant PLANE_SZ => 0x10000;
74 use constant MID_SZ => PLANE_SZ / 16;
75 use constant PG_SZ => MID_SZ / 16;
77 # Unlike FillInfo() method in Mozilla, let's use 16bit integer
78 # to pack the character coverage/representability. This way,
79 # we can just copy fillinfo to fill up page maps later.
81 FILL_SZ => PLANE_SZ / 16,
82 MID_FILL_SZ => MID_SZ / 16,
83 PG_FILL_SZ => PG_SZ / 16
88 # network byte order short. actually, byte order doesn't matter.
90 $fu_sz = length(pack $fill_fmt, 0); # fillinfo unit size in byte (size of short)
100 my $planes = &read_input(\@fillinfo,$ifh,\%comments);
102 if (!defined($variable) && !defined($comments{'VARIABLE'}))
104 printf STDERR "Variable name is not specified in the cmd line. " .
105 "Neither is it found in the input file.\n\n" ;
109 $variable = $comments{'VARIABLE'} if (! defined($variable));
111 if (!defined($class) && !defined($comments{'CLASS'}))
113 printf STDERR "Class name is not specified in the cmd line. " .
114 "Neither is it found in the input file.\n\n" ;
118 $class = $comments{'CLASS'} if (! defined($class));
120 my $have_non_bmp = 0;
122 # add the non_bmp flag and the bmp ccmap size (default to 0)
123 # at the very beginning if there are non-bmp characters.
124 if ($planes & 0x1fe) {
129 my $plane_idx_offset;
130 foreach my $plane (0 .. ($have_non_bmp ? 16 : 0))
132 my @plane_ccmap = add_plane(\@ccmap, \@fillinfo, $plane);
133 my $size = @plane_ccmap;
134 push @ccmap, @plane_ccmap;
135 if ($plane == 0 && $have_non_bmp) {
137 # add 2 for non-BMP flag and BMP plane size
138 # that have negative indices in C++.
139 $plane_idx_offset = $size + 2;
141 # 'Flag' the offset as holding the plane indices (any negative
143 $pg_flags{$plane_idx_offset} = -1;
144 $pg_flags{$plane_idx_offset + 16} = -1;
146 # plane indices are 16 PRUint32's(not 16 PRUint16's).
147 # In Perl, we assign each PRUint32 two slots in @ccmap (in BE order)
148 my $e_plane_offset = $size + 16 * 2;
150 # set plane indices to the empty plane by default
151 foreach my $i (1 .. 16) {
152 # split PRUint32 into two PRUint16's in BE
153 push @ccmap, $e_plane_offset >> 16;
154 push @ccmap, $e_plane_offset & 0xffff;
156 # add 'the' empty plane;
157 push @ccmap, (0) x 16;
161 # split PRUint32 into two PRUint16's in BE.
162 # subtract 2 for non-BMP flag and BMP plane size
163 # that have negative indices in C++.
164 $ccmap[$plane_idx_offset + ($plane - 1) * 2] = (@ccmap - $size - 2) >> 16;
165 $ccmap[$plane_idx_offset + ($plane - 1) * 2 + 1] = (@ccmap - $size -2) & 0xffff;
170 &print_ccmap(\@ccmap, \%pg_flags, $variable, $class, \%comments, $have_non_bmp);
178 print STDERR <<USAGE;
179 Usage: $0 input_file [variable [class]]
181 The output file "class.ccmap" will be generated with
182 all three cases LE(16/32/64bit)/BE(16bit), BE(32bit), and BE(64bit)
183 put together. 'variable' will be used to name two macros, one for
184 dimensioning the size of a PRUin16[] and the other for the array
187 When 'variable' is omitted, it has to be specified in the input file with
188 the following syntax.
192 When 'class' is omitted, it has to be specified in the input file with
193 the following syntax.
204 my($fillinfo_p, $input, $comments_p) = @_;
205 @
$fillinfo_p = (0) x
(FILL_SZ
* N_PLANES
);
207 # init bitfield for plane flags (17bits : BMP + 16 non-BMP planes)
214 /^\s*VARIABLE::\s*([a-zA-Z][a-zA-Z0-9_]*)$/ and
215 $comments_p->{'VARIABLE'} = $1,
218 ($comments_p->{'CLASS'} = $_) =~ s/^\s*CLASS::\s*([a-zA-Z0-9_]+).*$/$1/,
220 /^\s*DESCRIPTION::/ and
221 ($comments_p->{'DESC'} = $_) =~ s/^\s*DESCRIPTION::\s*//, next;
223 ($comments_p->{'FILE'} = $_) =~ s/^\s*FILE::\s*//, next;
225 next unless /^\s*0[Xx][0-9A-Fa-f]{4}/;
228 my ($u, $comment) = split /\s+/, $1, 2;
231 next if $u =~ /^0x.*[^0-9a-f]+.*/;
234 if ( 0xd800 <= $usv && $usv <= 0xdfff || # surrogate code points
236 printf STDERR
"Invalid input $u at %4d\n", $lc;
239 $fillinfo_p->[($usv >> 4)] |= (1 << ($usv & 0x0f));
240 # printf STDERR "input %s(%04x) \@line %d : put %04x @ %04x\n",
241 # $u,$usv, $lc, (1 << ($usv & 0x0f)), ($usv >> 4) & 0xfff;
243 # turn on plane flags
244 $planes |= (1 << ($usv >> 16));
246 my $key = sprintf("0X%06X", $usv);
247 $comments_p->{$key} = "";
249 # Remove '/*' and '*/' (C style comment) or '//' (C++ style comment)
250 # or ':' and store only the textual content of the comment.
251 if (defined($comment)) {
252 ($comments_p->{$key} = $comment)
254 (?
:/\*|//|:)? # '/*', '//' or ':' or NULL. Do not store.
255 \s* # zero or more of white space(s)
256 ([^*]+) # one or more of non-white space(s).Store it
257 # in $1 for the reference in replace part.
258 \s* # zero or more of white space(s)
259 (?:\*/)? # '*/' or NONE. Do not store
260 !$1!sx # replace the whole match with $1 stored above.
269 my($ccmap_p, $f_pg_offset) = @_;
270 # add a full page if not yet added.
271 if (! $f_pg_offset) {
272 $f_pg_offset = @$ccmap_p;
273 push @$ccmap_p, (0xffff) x 16;
275 # add the full mid-pointer array with all the pointers pointing to the full page.
276 my $f_mid_offset = @$ccmap_p;
277 push @$ccmap_p, ($f_pg_offset) x 16;
278 return ($f_mid_offset, $f_pg_offset);
283 my($ccmap_p, $mid) = @_;
284 my $mid_offset = @$ccmap_p;
285 $ccmap_p->[$mid] = $mid_offset;
286 #by default, all mid-pointers point to the empty page.
287 push @$ccmap_p, ($e_pg_offset) x 16;
293 my ($full_ccmap_p, $fillinfo_p, $plane) = @_;
294 # my @ccmap = @$ccmap_p;
295 my @ccmap = (); # plane ccmap
296 my(@fillinfo) = splice @$fillinfo_p, 0, FILL_SZ;
297 # convert 4096(FILL_SZ) 16bit integers to a string of 4096 * $fu_sz
299 my($plane_str) = pack $fill_fmt x FILL_SZ, @fillinfo;
302 if ($plane_str eq "\0" x ($fu_sz * FILL_SZ)) {
303 # for non-BMP plane, the default empty plane ccmap would work.
304 # for BMP, we need 'self
-referring
' folded CCMap (the smallest CCMap)
305 push @ccmap, (0) x 16 if (!$plane);
309 #get all upper pointers to point at empty mid pointers
310 push @ccmap, ($e_mid_offset) x 16;
311 #get all mid-pointers to point at empty page.
312 push @ccmap, ($e_pg_offset) x 16;
313 push @ccmap, (0) x 16; # empty pg
315 my $f_mid_offset = 0;
318 foreach my $mid (0 .. 15)
320 my(@mid_fill) = splice @fillinfo, 0, MID_FILL_SZ;
321 # convert 256(MID_FILL_SZ) 16bit integers to a string of 256 * $fu_sz
323 my($mid_str) = pack $fill_fmt x MID_FILL_SZ, @mid_fill;
325 # for an empty mid, upper-pointer is already pointing to the empty mid.
326 next if ($mid_str eq "\0" x ($fu_sz * MID_FILL_SZ));
328 # for a full mid, add full mid if necessary.
329 if ($mid_str eq "\xff" x ($fu_sz * MID_FILL_SZ)) {
330 ($f_mid_offset, $f_pg_offset) =
331 add_full_mid(\@ccmap, $f_pg_offset) unless ($f_mid_offset);
332 $ccmap[$mid] = $f_mid_offset;
336 my $mid_offset = add_new_mid(\@ccmap,$mid);
338 foreach my $pg (0 .. 15) {
339 my(@pg_fill) = splice @mid_fill, 0, PG_FILL_SZ;
340 my($pg_str) = pack $fill_fmt x PG_FILL_SZ, @pg_fill;
342 # for an empty pg, mid-pointer is already pointing to the empty page.
343 next if ($pg_str eq "\x0" x ($fu_sz * PG_FILL_SZ));
345 # for a full pg, add the full pg if necessary.
346 # and set the mid-pointer to the full pg offset.
347 if ($pg_str eq "\xff" x ($fu_sz * PG_FILL_SZ)) {
348 if (! $f_pg_offset) {
349 $f_pg_offset = @ccmap;
350 #for the full pg, endianess and ALU size are immaterial.
351 push @ccmap, (0xffff) x 16;
353 $ccmap[$mid_offset + $pg] = $f_pg_offset;
357 $ccmap[$mid_offset + $pg] = @ccmap;
359 # 'Flag
' the offset as the beginning of a page with actual data as
360 # opposed to pointer sections.
361 $pg_flags{(scalar @$full_ccmap_p) + (scalar @ccmap)} = @ccmap;
363 push @ccmap, @pg_fill;
371 my($ccmap_p,$pg_flags_p, $variable, $class, $comments_p, $is_ext) = @_;
374 my $ofn = $class . ($is_ext ? ".x-ccmap" : ".ccmap");
376 open OUT, "> $ofn" or
377 die "cannot open $ofn for output\n";
379 print OUT print_preamble($variable, $class);
382 # defined ($comments_p->{'CLASS
'}) and
383 # print OUT " CLASS:: $comments_p->{'CLASS
'}\n";
384 print OUT " VARIABLE:: $variable\n";
385 print OUT " CLASS:: $class\n";
386 defined ($comments_p->{'DESC
'}) and
387 print OUT " DESCRIPTION:: $comments_p->{'DESC
'}\n";
388 defined ($comments_p->{'FILE
'}) and
389 print OUT " FILE:: $comments_p->{'FILE
'}\n";
393 for my $key (sort keys %$comments_p) {
394 next if ($key !~ /^0X/);
395 printf OUT " %s : %s\n", $key, $comments_p->{$key};
401 my(@idxlist, @int16toint32);
403 # When CCMap is accessed, (PRUint16 *) is cast to
404 # the pointer type of the ALU of a machine.
405 # For little endian machines, the size of the ALU
406 # doesn't matter
(16, 32, 64). For Big endian
407 # machines with 32/64 bit ALU, two/four 16bit words
408 # have to be rearranged to be interpreted correctly
409 # as 32bit or 64bit integers with the 16bit word
410 # at the lowest address taking the highest place value.
411 # This shuffling is NOT necessary for the upper pointer section
412 # and mid-pointer sections.
414 # If non-BMP characters are present, 16 plane indices
415 # (32bit integers stored in two 16bit shorts in
416 # BE order) have to be treated differently based on the
417 # the endianness as well.
419 # For BMP-only CCMap, 16BE CCMap is identical to LE CCMaps.
420 # With non-BMP characters present, to avoid the misalignment on 64bit
421 # machines, we add two 16-bit units of 0-padding before the ccmap flag
422 # (indicating whether the map is extended or not) and the BMP map size
423 # (bug 225340, bug 445626).
424 my @fmts = $is_ext ?
("64LE", "LE", "16BE", "32BE", "64BE") : ("LE", "32BE", "64BE") ;
425 foreach my $fmt (@fmts)
431 @idxlist = (0, 1, 2, 3);
432 @int16toint32 = (1, 0, 3, 2);
433 print OUT
"#if (defined(IS_LITTLE_ENDIAN) && ALU_SIZE == 64)\n" .
434 "// Precompiled CCMap for Little Endian(64bit)\n";
435 printf OUT
"#define ${variable}_SIZE %d\n", scalar @
$ccmap_p + 2;
436 printf OUT
"#define ${variable}_INITIALIZER \\\n";
437 printf OUT
"/* EXTFLG */ 0x0000,0x0000,0x%04X,0x%04X, \\\n",
438 $ccmap_p->[0], $ccmap_p->[1];
442 @idxlist = (0, 1, 2, 3);
443 @int16toint32 = (1, 0, 3, 2);
445 "#elif defined(IS_LITTLE_ENDIAN)\n" .
446 "// Precompiled CCMap for Little Endian(16/32bit) \n" :
447 "#if (defined(IS_LITTLE_ENDIAN) || ALU_SIZE == 16)\n" .
448 "// Precompiled CCMap for Little Endian(16/32/64bit)\n" .
449 "// and Big Endian(16bit)\n";
450 printf OUT
"#define ${variable}_SIZE %d\n", scalar @
$ccmap_p;
451 printf OUT
"#define ${variable}_INITIALIZER \\\n";
453 printf OUT
"/* EXTFLG */ 0x%04X,0x%04X, \\\n",
454 $ccmap_p->[0], $ccmap_p->[1];
459 @idxlist = (0, 1, 2, 3);
460 @int16toint32 = (0, 1, 2, 3);
461 print OUT
"#elif (ALU_SIZE == 16)\n" .
462 "// Precompiled CCMap for Big Endian(16bit)\n";
463 printf OUT
"#define ${variable}_SIZE %d\n", scalar @
$ccmap_p;
464 printf OUT
"#define ${variable}_INITIALIZER \\\n";
465 printf OUT
"/* EXTFLG */ 0x%04X,0x%04X, \\\n",
466 $ccmap_p->[0], $ccmap_p->[1];
470 @idxlist = (1, 0, 3, 2);
471 @int16toint32 = (0, 1, 2, 3);
472 print OUT
"#elif (ALU_SIZE == 32)\n" .
473 "// Precompiled CCMap for Big Endian(32bit)\n";
474 printf OUT
"#define ${variable}_SIZE %d\n", scalar @
$ccmap_p;
475 printf OUT
"#define ${variable}_INITIALIZER \\\n";
477 printf OUT
"/* EXTFLG */ 0x%04X,0x%04X, \\\n",
478 $ccmap_p->[0], $ccmap_p->[1];
483 @idxlist = (3, 2, 1, 0);
484 @int16toint32 = (0, 1, 2, 3);
485 print OUT
"#elif (ALU_SIZE == 64)\n" .
486 "// Precompiled CCMap for Big Endian(64bit)\n";
487 printf OUT
"#define ${variable}_SIZE %d\n", scalar @
$ccmap_p +
489 printf OUT
"#define ${variable}_INITIALIZER \\\n";
491 printf OUT
"/* EXTFLG */ 0x0000,0x0000,0x%04X,0x%04X, \\\n",
492 $ccmap_p->[0], $ccmap_p->[1];
498 $offset = $is_ext ?
2 : 0;
500 while ($offset < @
$ccmap_p) {
501 printf OUT
"/* %06x */ ", $offset - ($is_ext ?
2 : 0);
503 for my $j (defined($pg_flags_p->{$offset}) ?
504 ($pg_flags_p->{$offset} > 0 ?
505 @idxlist : @int16toint32) : (0,1,2,3)) {
506 printf OUT
"0x%04X,", $ccmap_p->[$offset + $i * 4 + $j];
508 print OUT
" \\\n " if $i==1;
510 if ($offset + 16 < @
$ccmap_p) {print OUT
" \\\n"; }
518 #error "We don't support this architecture."
529 my($variable, $class) = @_;
531 /* ***** BEGIN LICENSE BLOCK *****
532 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
534 * The contents of this file are subject to the Mozilla Public License Version
535 * 1.1 (the "License"); you may not use this file except in compliance with
536 * the License. You may obtain a copy of the License at
537 * http://www.mozilla.org/MPL/
539 * Software distributed under the License is distributed on an "AS IS" basis,
540 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
541 * for the specific language governing rights and limitations under the
544 * The Original Code is mozilla.org code.
546 * The Initial Developer of the Original Code is
547 * Jungshik Shin <jshin\@mailaps.org>
548 * Portions created by the Initial Developer are Copyright (C) 2003
549 * the Initial Developer. All Rights Reserved.
553 * Alternatively, the contents of this file may be used under the terms of
554 * either the GNU General Public License Version 2 or later (the "GPL"), or
555 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
556 * in which case the provisions of the GPL or the LGPL are applicable instead
557 * of those above. If you wish to allow use of your version of this file only
558 * under the terms of either the GPL or the LGPL, and not to allow others to
559 * use your version of this file under the terms of the MPL, indicate your
560 * decision by deleting the provisions above and replace them with the notice
561 * and other provisions required by the GPL or the LGPL. If you do not delete
562 * the provisions above, a recipient may use your version of this file under
563 * the terms of any one of the MPL, the GPL or the LGPL.
565 * ***** END LICENSE BLOCK ***** */
567 /*========================================================
568 This file contains a precompiled CCMap for a class of Unicode
569 characters ($class) to be identified quickly by Mozilla.
570 It was generated by ccmapbin.pl which you can find under
571 mozilla/intl/unicharutil/tools.
573 Enumerated below are characters included in the precompiled CCMap
574 which is human-readable but not so human-friendly. If you
575 needs to modify the list of characters belonging to "$class",
576 you have to make a new file (with the name of your choice)
577 listing characters (one character per line) you want to put
578 into "$class" in the format
582 In addition, the input file can have the following optional lines that
587 DESCRIPTION:: description of a character class
588 FILE:: mozilla source file to include the output file
591 Then, run the following in the current directory.
593 perl ccmapbin.pl input_file [$variable [$class]]
595 which will generate $class.ccmap (or $class.x-ccmap if the ccmap
596 includes non-BMP characters.). $variable is used as the prefix
597 in macros for the array initializer and the array size.
599 (see bug 180266, bug 167136, and bug 224337)