1 # chartables.pl - A perl program to generate tables for use by the
4 # Copyright (C) 1998, 1999 Red Hat, Inc.
6 # This file is part of libjava.
8 # This software is copyrighted work licensed under the terms of the
9 # Libjava License. Please consult the file "LIBJAVA_LICENSE" for
12 # This program requires a `unidata.txt' file of the form distributed
13 # on the Unicode 2.0 CD ROM. Or, get it more conveniently here:
14 # ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData-Latest.txt
15 # Version `2.1.8' of this file was last used to update the Character class.
17 # Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
18 # "The Java Language Specification", ISBN 0-201-63451-1
19 # plus online API docs for JDK 1.2 beta from http://www.javasoft.com.
21 # Usage: perl chartables.pl [-n] UnicodeData-VERSION.txt
22 # If this exits with nonzero status, then you must investigate the
23 # cause of the problem.
24 # Diagnostics and other information to stderr.
25 # This creates the new include/java-chartables.h and
26 # include/java-chardecomp.h files directly.
27 # With -n, the files are not created, but all processing
30 # Fields in the table.
43 $TAMIL_DIGIT_ONE = 0x0be7;
44 $TAMIL_DIGIT_NINE = 0x0bef;
46 # These are endpoints of legitimate gaps in the tables.
47 $CJK_IDEOGRAPH_END = 0x9fa5;
49 $HIGH_SURROGATE_END = 0xdb7f;
50 $PRIVATE_HIGH_SURROGATE_END = 0xdbff;
51 $LOW_SURROGATE_END = 0xdfff;
52 $PRIVATE_END = 0xf8ff;
84 # There are a few characters which actually need two attributes.
85 # These are special-cased.
86 $ROMAN_START = 0x2160;
88 %second_attributes = ();
95 'Mn' => 'NON_SPACING_MARK',
96 'Mc' => 'COMBINING_SPACING_MARK',
97 'Me' => 'ENCLOSING_MARK',
98 'Nd' => 'DECIMAL_DIGIT_NUMBER',
99 'Nl' => 'LETTER_NUMBER',
100 'No' => 'OTHER_NUMBER',
101 'Zs' => 'SPACE_SEPARATOR',
102 'Zl' => 'LINE_SEPARATOR',
103 'Zp' => 'PARAGRAPH_SEPARATOR',
107 'Co' => 'PRIVATE_USE',
108 'Cn' => 'UNASSIGNED',
109 'Lu' => 'UPPERCASE_LETTER',
110 'Ll' => 'LOWERCASE_LETTER',
111 'Lt' => 'TITLECASE_LETTER',
112 'Lm' => 'MODIFIER_LETTER',
113 'Lo' => 'OTHER_LETTER',
114 'Pc' => 'CONNECTOR_PUNCTUATION',
115 'Pd' => 'DASH_PUNCTUATION',
116 'Ps' => 'START_PUNCTUATION',
117 'Pe' => 'END_PUNCTUATION',
118 'Pi' => 'START_PUNCTUATION',
119 'Pf' => 'END_PUNCTUATION',
120 'Po' => 'OTHER_PUNCTUATION',
121 'Sm' => 'MATH_SYMBOL',
122 'Sc' => 'CURRENCY_SYMBOL',
123 'Sk' => 'MODIFIER_SYMBOL',
124 'So' => 'OTHER_SYMBOL'
127 # These maps characters to their decompositions.
128 %canonical_decomposition = ();
129 %full_decomposition = ();
132 # Handle `-n' and open output files.
133 local ($f1, $f2) = ('include/java-chartables.h',
134 'include/java-chardecomp.h');
135 if ($ARGV[0] eq '-n')
142 open (CHARTABLE
, "> $f1");
143 open (DECOMP
, "> $f2");
145 # Process the Unicode file.
149 # Specify a limit for split so that we pick up trailing fields.
150 # We make the limit larger than we need, to catch the case where
151 # there are extra fields.
152 @fields = split (';', $_, 30);
153 # Convert code to number.
154 $ncode = hex ($fields[$CODE]);
158 print STDERR
("Entry for \\u", $fields[$CODE],
159 " has wrong number of fields: ", $#fields, "\n");
162 $name{$fields[$CODE]} = $fields[$NAME];
164 # If we've found a gap in the table, fill it in.
165 if ($ncode != $prevcode + 1)
167 &process_gap
(*fields
, $prevcode, $ncode);
170 &process_char
(*fields
, $ncode);
175 if ($prevcode != 0xffff)
177 # Setting of `fields' parameter doesn't matter here.
178 &process_gap
(*fields
, $prevcode, 0x10000);
181 print CHARTABLE
"// java-chartables.h - Character tables for java.lang.Character -*- c++ -*-\n\n";
182 print CHARTABLE
"#ifndef __JAVA_CHARTABLES_H__\n";
183 print CHARTABLE
"#define __JAVA_CHARTABLES_H__\n\n";
184 print CHARTABLE
"// These tables are automatically generated by the chartables.pl\n";
185 print CHARTABLE
"// script. DO NOT EDIT the tables. Instead, fix the script\n";
186 print CHARTABLE
"// and run it again.\n\n";
187 print CHARTABLE
"// This file should only be included by natCharacter.cc\n\n";
192 # Titlecase mapping tables.
193 if ($#title_to_lower != $#title_to_upper)
195 # If this fails we need to reimplement toTitleCase.
196 print STDERR
"titlecase mappings have different sizes\n";
199 # Also ensure that the tables are entirely parallel.
200 foreach $key (sort keys %title_to_lower)
202 if (! defined $title_to_upper{$key})
204 print STDERR
"titlecase mappings have different entries\n";
208 &print_single_map
("title_to_lower_table", %title_to_lower);
209 &print_single_map
("title_to_upper_table", %title_to_upper);
211 print CHARTABLE
"#ifdef COMPACT_CHARACTER\n\n";
213 printf CHARTABLE
"#define TAMIL_DIGIT_ONE 0x%04x\n\n", $TAMIL_DIGIT_ONE;
215 # All numeric values.
219 &print_block
("digit_table", *digit_start
, *digit_end
);
222 &print_block
("space_table", *space_start
, *space_end
);
224 # Letters. We used to generate a separate letter table. But this
225 # doesn't really seem worthwhile. Simply using `all_table' saves us
226 # about 800 bytes, and only adds 3 table probes to isLetter.
227 # &print_block ("letter_table", *letter_start, *letter_end);
230 &print_case_table
("upper", *upper_start
, *upper_end
, *upper_map
, *upper_anom
);
231 &print_case_table
("lower", *lower_start
, *lower_end
, *lower_map
, *lower_anom
);
234 &print_all_block
(*all_start
, *all_end
, *all_cats
);
236 print CHARTABLE
"#else /* COMPACT_CHARACTER */\n\n";
238 printf CHARTABLE
"#define ROMAN_START 0x%04x\n", $ROMAN_START;
239 printf CHARTABLE
"#define ROMAN_END 0x%04x\n\n", $ROMAN_END;
241 &print_fast_tables
(*all_start
, *all_end
, *all_cats
,
242 *attributes
, *second_attributes
);
244 print CHARTABLE
"#endif /* COMPACT_CHARACTER */\n\n";
246 print CHARTABLE
"#endif /* __JAVA_CHARTABLES_H__ */\n";
248 printf STDERR
"Approximately %d bytes of data generated (compact case)\n",
252 # Now generate decomposition tables.
253 printf DECOMP
"// java-chardecomp.h - Decomposition character tables -*- c++ -*-\n\n";
254 printf DECOMP
"#ifndef __JAVA_CHARDECOMP_H__\n";
255 printf DECOMP
"#define __JAVA_CHARDECOMP_H__\n\n";
256 print DECOMP
"// These tables are automatically generated by the chartables.pl\n";
257 print DECOMP
"// script. DO NOT EDIT the tables. Instead, fix the script\n";
258 print DECOMP
"// and run it again.\n\n";
259 print DECOMP
"// This file should only be included by natCollator.cc\n\n";
261 print DECOMP
"struct decomp_entry\n{\n";
262 print DECOMP
" jchar key;\n";
263 print DECOMP
" const char *value;\n";
264 print DECOMP
"};\n\n";
266 &write_decompositions
;
268 printf DECOMP
"#endif /* __JAVA_CHARDECOMP_H__ */\n";
277 # Process a gap in the space.
280 local (*fields
, $prevcode, $ncode) = @_;
281 local (@gap_fields, $i);
283 if ($ncode == $CJK_IDEOGRAPH_END
284 || $ncode == $HANGUL_END
285 || $ncode == $HIGH_SURROGATE_END
286 || $ncode == $PRIVATE_HIGH_SURROGATE_END
287 || $ncode == $LOW_SURROGATE_END
288 || $ncode == $PRIVATE_END)
290 # The characters in the gap we just found are known to
291 # have the same properties as the character at the end of
293 @gap_fields = @fields;
297 # This prints too much to be enabled.
298 # print STDERR "Gap found at \\u", $fields[$CODE], "\n";
299 @gap_fields = ('', '', 'Cn', '', '', '', '', '', '', '', '',
303 for ($i = $prevcode + 1; $i < $ncode; ++$i)
305 $gap_fields[$CODE] = sprintf ("%04x", $i);
306 $gap_fields[$NAME] = "CHARACTER " . $gap_fields[$CODE];
307 &process_char
(*gap_fields
, $i);
311 # Process a single character.
314 local (*fields
, $ncode) = @_;
316 if ($fields[$DECOMPOSITION] ne '')
318 &add_decomposition
($ncode, $fields[$DECOMPOSITION]);
321 # If this is a titlecase character, mark it.
322 if ($fields[$CATEGORY] eq 'Lt')
324 $title_to_upper{$fields[$CODE]} = $fields[$UPPERCASE];
325 $title_to_lower{$fields[$CODE]} = $fields[$LOWERCASE];
329 # For upper and lower case mappings, we try to build compact
330 # tables that map range onto range. We specifically want to
331 # avoid titlecase characters. Java specifies a range check to
332 # make sure the character is not between 0x2000 and 0x2fff.
333 # We avoid that here because we need to generate table entries
334 # -- toLower and toUpper still work in that range.
335 if ($fields[$UPPERCASE] eq ''
336 && ($fields[$LOWERCASE] ne ''
337 || $fields[$NAME] =~ /CAPITAL (LETTER|LIGATURE)/))
339 if ($fields[$LOWERCASE] ne '')
341 &update_case_block
(*upper_start
, *upper_end
, *upper_map
,
342 $fields[$CODE], $fields[$LOWERCASE]);
343 &set_attribute
($ncode, hex ($fields[$LOWERCASE]));
347 $upper_anom{$fields[$CODE]} = 1;
350 elsif ($fields[$LOWERCASE] ne '')
352 print STDERR
("Java missed upper case char \\u",
353 $fields[$CODE], "\n");
355 elsif ($fields[$CATEGORY] eq 'Lu')
357 # This case is for letters which are marked as upper case
358 # but for which there is no lower case equivalent. For
359 # instance, LATIN LETTER YR.
362 if ($fields[$LOWERCASE] eq ''
363 && ($fields[$UPPERCASE] ne ''
364 || $fields[$NAME] =~ /SMALL (LETTER|LIGATURE)/))
366 if ($fields[$UPPERCASE] ne '')
368 &update_case_block
(*lower_start
, *lower_end
, *lower_map
,
369 $fields[$CODE], $fields[$UPPERCASE]);
370 &set_attribute
($ncode, hex ($fields[$UPPERCASE]));
374 $lower_anom{$fields[$CODE]} = 1;
377 elsif ($fields[$UPPERCASE] ne '')
379 print STDERR
("Java missed lower case char \\u",
380 $fields[$CODE], "\n");
382 elsif ($fields[$CATEGORY] eq 'Ll')
384 # This case is for letters which are marked as lower case
385 # but for which there is no upper case equivalent. For
386 # instance, FEMININE ORDINAL INDICATOR.
391 # If we have a non-decimal numeric value, add it to the list.
392 if ($fields[$CATEGORY] eq 'Nd'
393 && ($ncode < 0x2000 || $ncode > 0x2fff)
394 && $fields[$NAME] =~ /DIGIT/)
396 # This is a digit character that is handled elsewhere.
398 elsif ($fields[$DIGIT] ne '' || $fields[$NUMERIC] ne '')
401 if ($fields[$DECIMAL] ne '')
403 # This catches bugs in an earlier implementation of
404 # chartables.pl. Now it is here for historical interest
406 # print STDERR ("Character \u", $fields[$CODE],
407 # " would have been missed as digit\n");
410 local ($val) = $fields[$DIGIT];
411 $val = $fields[$NUMERIC] if $val eq '';
414 # If we have a value which is not a positive integer, then we
415 # set the value to -2 to make life easier for
416 # Character.getNumericValue.
417 if ($val !~ m/^[0-9]+$/)
419 if ($fields[$CATEGORY] ne 'Nl'
420 && $fields[$CATEGORY] ne 'No')
422 # This shows a few errors in the Unicode table. These
423 # characters have a missing Numeric field, and the `N'
424 # for the mirrored field shows up there instead. I
425 # reported these characters to errata@unicode.org on
426 # Thu Sep 10 1998. They said it will be fixed in the
427 # 2.1.6 release of the tables.
428 print STDERR
("Character \u", $fields[$CODE],
429 " has value but is not numeric; val = '",
439 $numerics{$fields[$CODE]} = $val;
440 &set_attribute
($ncode, $val);
444 # We build a table that lists ranges of ordinary decimal values.
445 # At each step we make sure that the digits are in the correct
446 # order, with no holes, as this is assumed by Character. If this
447 # fails, reimplementation is required. This implementation
448 # dovetails nicely with the Java Spec, which has strange rules for
449 # what constitutes a decimal value. In particular the Unicode
450 # name must contain the word `DIGIT'. The spec doesn't directly
451 # say that digits must have type `Nd' (or that their value must an
452 # integer), but that can be inferred from the list of digits in
453 # the book(s). Currently the only Unicode characters whose name
454 # includes `DIGIT' which would not fit are the Tibetan "half"
456 if ($fields[$CATEGORY] eq 'Nd')
458 if (($ncode < 0x2000 || $ncode > 0x2fff)
459 && $fields[$NAME] =~ /DIGIT/)
461 &update_digit_block
(*digit_start
, *digit_end
, $fields[$CODE],
463 &set_attribute
($ncode, $fields[$DECIMAL]);
467 # If this fails then Character.getType will fail. We
468 # assume that things in `digit_table' are the only
469 # category `Nd' characters.
470 print STDERR
("Character \u", $fields[$CODE],
471 " is class Nd but not in digit table\n");
476 # Keep track of space characters.
477 if ($fields[$CATEGORY] =~ /Z[slp]/)
479 &update_block
(*space_start
, *space_end
, $fields[$CODE]);
482 # Keep track of letters.
483 # if ($fields[$CATEGORY] =~ /L[ultmo]/)
485 # &update_letter_block (*letter_start, *letter_end, $fields[$CODE],
486 # $fields[$CATEGORY]);
489 # Keep track of all characters. You might think we wouldn't have
490 # to do this for uppercase letters, or other characters we already
491 # "classify". The problem is that this classification is
492 # different. E.g., \u216f is uppercase by Java rules, but is a
493 # LETTER_NUMBER here.
494 &update_all_block
(*all_start
, *all_end
, *all_cats
,
495 $fields[$CODE], $fields[$CATEGORY]);
499 # Called to add a new decomposition.
500 sub add_decomposition
502 local ($ncode, $value) = @_;
503 local ($is_full) = 0;
505 local (@decomp) = ();
507 foreach (split (' ', $value))
509 if ($first && /^\<.*\>$/)
515 push (@decomp, hex ($_));
520 # We pack the value into a string because this means we can stick
521 # with Perl 4 features.
522 local ($s) = pack "I*", @decomp;
525 $full_decomposition{$ncode} = $s;
529 $canonical_decomposition{$ncode} = $s;
533 # Write a single decomposition table.
534 sub write_single_decomposition
536 local ($name, $is_canon, %table) = @_;
538 printf DECOMP
"static const decomp_entry ${name}_decomposition[] =\n{\n";
540 local ($key, @expansion, $char);
541 local ($first_line) = 1;
543 for ($key = 0; $key <= 65535; ++$key)
545 next if ! defined $table{$key};
551 printf DECOMP
" { 0x%04x, \"", $key;
553 # We represent the expansion as a series of bytes, terminated
554 # with a double nul. This is ugly, but relatively
555 # space-efficient. Most expansions are short, but there are a
556 # few that are very long (e.g. \uFDFA). This means that if we
557 # chose a fixed-space representation we would waste a lot of
559 @expansion = unpack "I*", $table{$key};
560 foreach $char (@expansion)
562 printf DECOMP
"\\x%02x\\x%02x", ($char / 256), ($char % 256);
565 printf DECOMP
"\" }";
568 printf DECOMP
"\n};\n\n";
571 sub write_decompositions
573 &write_single_decomposition
('canonical', 1, %canonical_decomposition);
574 &write_single_decomposition
('full', 0, %full_decomposition);
577 # We represent a block of characters with a pair of lists. This
578 # function updates the pair to account for the new character. Returns
579 # 1 if we added to the old block, 0 otherwise.
582 local (*start
, *end
, $char) = @_;
584 local ($nchar) = hex ($char);
585 local ($count) = $#end;
586 if ($count >= 0 && $end[$count] == $nchar - 1)
594 $start[$count] = $nchar;
595 $end[$count] = $nchar;
600 # Return true if we will be appending this character to the end of the
604 local (*end
, $char) = @_;
605 return $#end >= 0 && $end[$#end] == $char - 1;
608 # This updates the digit block. This table is much like an ordinary
609 # block, but it has an extra constraint.
610 sub update_digit_block
612 local (*start
, *end
, $char, $value) = @_;
614 &update_block
($start, $end, $char);
615 local ($nchar) = hex ($char);
617 # We want to make sure that the new digit's value is correct for
618 # its place in the block. However, we special-case Tamil digits,
619 # since Tamil does not have a digit `0'.
620 local ($count) = $#start;
621 if (($nchar < $TAMIL_DIGIT_ONE || $nchar > $TAMIL_DIGIT_NINE)
622 && $nchar - $start[$count] != $value)
624 # If this fails then Character.digit_value will be wrong.
625 print STDERR
"Character \\u", $char, " violates digit constraint\n";
630 # Update letter table. We could be smart about avoiding upper or
631 # lower case letters, but it is much simpler to just track them all.
632 sub update_letter_block
634 local (*start
, *end
, $char, $category) = @_;
636 &update_block
(*start
, *end
, $char);
639 # Update `all' table. This table holds all the characters we don't
640 # already categorize for other reasons. FIXME: if a given type has
641 # very few characters, we should just inline the code. E.g., there is
642 # only one paragraph separator.
645 local (*start
, *end
, *cats
, $char, $category) = @_;
647 local ($nchar) = hex ($char);
648 local ($count) = $#end;
650 && $end[$count] == $nchar - 1
651 && $cats[$count] eq $category)
658 $start[$count] = $nchar;
659 $end[$count] = $nchar;
660 $cats[$count] = $category;
664 # Update a case table. We handle case tables specially because we
665 # want to map (e.g.) a block of uppercase characters directly onto the
666 # corresponding block of lowercase characters. Therefore we generate
667 # a new entry when the block would no longer map directly.
668 sub update_case_block
670 local (*start
, *end
, *map, $char, $mapchar) = @_;
672 local ($nchar) = hex ($char);
673 local ($nmap) = hex ($mapchar);
675 local ($count) = $#end;
677 && $end[$count] == $nchar - 1
678 && $nchar - $start[$count] == $nmap - $map[$count])
685 $start[$count] = $nchar;
686 $end[$count] = $nchar;
687 $map[$count] = $nmap;
691 # Set the attribute value for the character. Each character can have
692 # only one attribute.
695 local ($ncode, $attr) = @_;
697 if ($attributes{$ncode} ne '' && $attributes{$ncode} ne $attr)
699 if ($ncode >= $ROMAN_START && $ncode <= $ROMAN_END)
701 $second_attributes{$ncode} = $attr;
705 printf STDERR
"character \\u%04x already has attribute\n", $ncode;
708 # Attributes can be interpreted as unsigned in some situations,
709 # so we check against 65535. This could cause errors -- we need
710 # to check the interpretation here.
711 elsif ($attr < -32768 || $attr > 65535)
713 printf STDERR
"attribute out of range for character \\u%04x\n", $ncode;
717 $attributes{$ncode} = $attr;
722 # Print a block table.
725 local ($title, *start
, *end
) = @_;
727 print CHARTABLE
"static const jchar ", $title, "[][2] =\n";
728 print CHARTABLE
" {\n";
731 while ($i <= $#start)
733 print CHARTABLE
" { ";
734 &print_char
($start[$i]);
735 print CHARTABLE
", ";
736 &print_char
($end[$i]);
737 print CHARTABLE
" }";
738 print CHARTABLE
"," if ($i != $#start);
739 print CHARTABLE
"\n";
741 $bytes += 4; # Two bytes per char.
744 print CHARTABLE
" };\n\n";
747 # Print the numerics table.
750 local ($i, $key, $count, @keys);
753 @keys = sort keys %numerics;
756 print CHARTABLE
"static const jchar numeric_table[] =\n";
757 print CHARTABLE
" { ";
760 &print_char
(hex ($key));
762 print CHARTABLE
", " if $i < $count;
764 print CHARTABLE
"\n " if ($i % 5 == 0);
765 $bytes += 2; # One character.
767 print CHARTABLE
" };\n\n";
769 print CHARTABLE
"static const jshort numeric_value[] =\n";
770 print CHARTABLE
" { ";
774 print CHARTABLE
$numerics{$key};
775 if ($numerics{$key} > 32767 || $numerics{$key} < -32768)
777 # This means our generated type info is incorrect. We
778 # could just detect and work around this here, but I'm
780 print STDERR
"numeric value won't fit in a short\n";
784 print CHARTABLE
", " if $i < $count;
786 print CHARTABLE
"\n " if ($i % 10 == 0);
787 $bytes += 2; # One short.
789 print CHARTABLE
" };\n\n";
792 # Print a table that maps one single letter onto another. It assumes
793 # the map is index by char code.
796 local ($title, %map) = @_;
798 local (@keys) = sort keys %map;
800 print CHARTABLE
"static const jchar ", $title, "[][2] =\n";
801 print CHARTABLE
" {\n";
805 print CHARTABLE
" { ";
806 &print_char
(hex ($key));
807 print CHARTABLE
", ";
808 &print_char
(hex ($map{$key}));
809 print CHARTABLE
" }";
819 print CHARTABLE
" // ", $name{$key}, "\n";
820 $bytes += 4; # Two bytes per char.
822 print CHARTABLE
" };\n\n";
825 # Print the `all' block.
828 local (*start
, *end
, *cats
) = @_;
830 &print_block
("all_table", *start
, *end
);
834 while ($i <= $#start)
836 $sum += $end[$i] - $start[$i] + 1;
839 # We do this computation just to make sure it isn't cheaper to
840 # simply list all the characters individually.
841 printf STDERR
("all_table encodes %d characters in %d entries\n",
844 print CHARTABLE
"static const jbyte category_table[] =\n";
845 print CHARTABLE
" { ";
850 if ($i > 0 && $cats[$i] eq $cats[$i - 1])
852 # This isn't an error. We can have a duplicate because
853 # two ranges are not adjacent while the intervening
854 # characters are left out of the table for other reasons.
855 # We could exploit this to make the table a little smaller.
856 # printf STDERR "Duplicate all entry at \\u%04x\n", $start[$i];
858 print CHARTABLE
'java::lang::Character::', $category_map{$cats[$i]};
859 print CHARTABLE
", " if ($i < $#cats);
861 print CHARTABLE
"\n ";
864 print CHARTABLE
" };\n\n";
870 local ($title, *start
, *end
, *map, *anomalous
) = @_;
872 &print_block
($title . '_case_table', *start
, *end
);
874 print CHARTABLE
"static const jchar ", $title, "_case_map_table[] =\n";
875 print CHARTABLE
" { ";
880 &print_char
($map[$i]);
881 print CHARTABLE
", " if $i < $#map;
883 print CHARTABLE
"\n " if $i % 5 == 0;
886 print CHARTABLE
" };\n";
890 @keys = sort keys %anomalous;
892 if ($title eq 'upper')
896 # If these are found we need to change Character.isUpperCase.
897 print STDERR
"Found anomalous upper case characters\n";
903 print CHARTABLE
"\n";
904 print CHARTABLE
"static const jchar ", $title, "_anomalous_table[] =\n";
905 print CHARTABLE
" { ";
909 &print_char
(hex ($key));
910 print CHARTABLE
", " if $i < $#keys;
912 print CHARTABLE
"\n " if $i % 5 == 0;
915 print CHARTABLE
" };\n";
918 print CHARTABLE
"\n";
921 # Print the type table and attributes table for the fast version.
922 sub print_fast_tables
924 local (*start
, *end
, *cats
, *atts
, *second_atts
) = @_;
926 print CHARTABLE
"static const jbyte type_table[] =\n{ ";
929 for ($i = 0; $i <= $#cats; ++$i)
931 for ($j = $start[$i]; $j <= $end[$i]; ++$j)
933 print CHARTABLE
'java::lang::Character::', $category_map{$cats[$i]};
934 print CHARTABLE
"," if ($i < $#cats || $j < $end[$i]);
935 print CHARTABLE
"\n ";
938 print CHARTABLE
"\n };\n\n";
940 print CHARTABLE
"static const jshort attribute_table[] =\n{ ";
941 for ($i = 0; $i <= 0xffff; ++$i)
943 $atts{$i} = 0 if ! defined $atts{$i};
944 print CHARTABLE
$atts{$i};
945 print CHARTABLE
", " if $i < 0xffff;
946 print CHARTABLE
"\n " if $i % 5 == 1;
948 print CHARTABLE
"\n };\n\n";
950 print CHARTABLE
"static const jshort secondary_attribute_table[] =\n{ ";
951 for ($i = $ROMAN_START; $i <= $ROMAN_END; ++$i)
953 print CHARTABLE
$second_atts{$i};
954 print CHARTABLE
", " if $i < $ROMAN_END;
955 print CHARTABLE
"\n " if $i % 5 == 1;
957 print CHARTABLE
"\n };\n\n";
960 # Print a character constant.
964 printf CHARTABLE
"0x%04x", $ncode;