2 # unicode-to-chartables.pl -- generate Unicode database for java.lang.Character
3 # Copyright (C) 1998, 2002, 2004, 2006 Free Software Foundation, Inc.
5 # This file is part of GNU Classpath.
7 # GNU Classpath is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
12 # GNU Classpath is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with GNU Classpath; see the file COPYING. If not, write to the
19 # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 # Linking this library statically or dynamically with other modules is
23 # making a combined work based on this library. Thus, the terms and
24 # conditions of the GNU General Public License cover the whole
27 # As a special exception, the copyright holders of this library give you
28 # permission to link this library with independent modules to produce an
29 # executable, regardless of the license terms of these independent
30 # modules, and to copy and distribute the resulting executable under
31 # terms of your choice, provided that you also meet, for each linked
32 # independent module, the terms and conditions of the license of that
33 # module. An independent module is a module which is not derived from
34 # or based on this library. If you modify this library, you may extend
35 # this exception to your version of the library, but you are not
36 # obligated to do so. If you do not wish to do so, delete this
37 # exception statement from your version.
39 # Code for reading UnicodeData-4.0.0.txt and SpecialCasing-4.0.0.txt to generate
40 # the code for java-chartables.h. The relevant files can be found here:
42 # http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt
43 # http://www.unicode.org/Public/4.0-Update/SpecialCasing-4.0.0.txt
45 # Inspired by code from Jochen Hoenicke.
46 # author Eric Blake <ebb9@email.byu.edu>
47 # Unicode 4.0.0 support by Anthony Balkissoon <abalkiss@redhat.com>
49 # Usage: ./unicode-to-chartables.pl <UnicodeData> <SpecialCasing> <tables>
50 # where <UnicodeData.txt> is obtained from www.unicode.org (named
51 # UnicodeData-4.0.0.txt for Unicode version 4.0.0), <SpecialCasing>
52 # is obtained from www.unicode too (named SpecialCasing-4.0.0.txt for Unicode
53 # version 4.0.0), and <tables> is the final location for the header file
54 # java-chartables.h. As of JDK 1.5, use Unicode version 4.0.0
59 ## Return the given variable interpreted as a 16 bit signed number.
63 return unpack "s", pack "I", $char;
67 ## Convert the text UnicodeData file from www.unicode.org into a header file
68 ## interface with arrays holding the compressed information.
70 my @TYPECODES = qw(Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf
71 SKIPPED Co Cs Pd Ps Pe Pc Po Sm Sc Sk So Pi Pf);
72 my @DIRCODES = qw(L R AL EN ES ET AN CS NSM BN B S WS ON LRE LRO RLE RLO PDF);
74 my $NOBREAK_FLAG = 32;
75 my $MIRRORED_FLAG = 64;
79 # infoArray is an array where each element is a list of character information
80 # for characters in a plane. The index of each list is equal to the plane
81 # that it corresponds to even though most of these lists will currently be
82 # empty. This is done so that that this script can be easily modified to
83 # accomodate future versions of Unicode.
84 my @infoArray = \
((), (), (), (), (), (), (), (),
85 (), (), (), (), (), (), (), (), ());
87 # info is a reference to one of the lists in infoArray, depending on which
88 # plane we're currently parsing.
91 # largeNums is an array of numerical values that are too large to fit
92 # into the 16 bit char where most numerical values are stored.
93 # What is stored in the char then is a number N such that (-N - 3) is
94 # the index into largeNums where the numerical value can be found.
101 die "Usage: $0 <UnicodeData.txt> <SpecialCasing.txt> <java-chartables.h>"
104 print "GNU Classpath Unicode Attribute Database Generator 2.1\n";
105 print "Copyright (C) 1998, 2002 Free Software Foundation, Inc.\n";
108 ################################################################################
109 ################################################################################
110 # Stage 0: Parse the special casing file
111 print "Parsing special casing file\n";
112 open (SPECIAL
, "< $ARGV[1]") || die "Can't open special casing file: $!\n";
115 my ($ch, undef, undef, $upper) = split / *; */;
117 # This grabs only the special casing for multi-char uppercase. Note that
118 # there are no multi-char lowercase, and that Sun ignores multi-char
119 # titlecase rules. This script omits 3 special cases in Unicode 3.0.0,
120 # which must be hardcoded in java.lang.String:
121 # \u03a3 (Sun ignores this special case)
122 # \u0049 - lowercases to \u0131, but only in Turkish locale
123 # \u0069 - uppercases to \u0130, but only in Turkish locale
124 next unless defined $upper and $upper =~ / /;
125 $special{hex $ch} = [map {hex} split ' ', $upper];
131 ################################################################################
132 ################################################################################
133 ## Stage 1: Parse the attribute file
134 print "Parsing attributes file";
135 open (UNICODE
, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
137 print "." unless $count++ % 1000;
140 my ($ch, $name, $category, undef, $bidir, $decomp, undef, undef, $numeric,
141 $mirrored, undef, undef, $upcase, $lowcase, $title) = split ';';
144 # plane tells us which Unicode code plane we're currently in and is an
145 # index into infoArray.
146 my $plane = int($ch / 0x10000);
147 my $planeBase = $plane * 0x10000;
148 $info = \@
{$infoArray[$plane]};
150 my ($type, $numValue, $upperchar, $lowerchar, $direction);
153 while ($category !~ /^$TYPECODES[$type]$/) {
154 if (++$type == @TYPECODES) {
155 die "$ch: Unknown type: $category";
158 $type |= $NOBREAK_FLAG if ($decomp =~ /noBreak/);
159 $type |= $MIRRORED_FLAG if ($mirrored =~ /Y/);
161 if ($numeric =~ /^[0-9]+$/) {
162 $numValue = $numeric;
163 # If numeric takes more than 16 bits to store we want to store that
164 # number in a separate array and store a number N in numValue such
165 # that (-N - 3) is the offset into the separate array containing the
166 # large numerical value.
167 if ($numValue >= 0x7fff) {
168 $numValue = -3 - @largeNums;
169 push @largeNums, $numeric;
171 } elsif ($numeric eq "") {
172 # Special case sequences of 'a'-'z'
173 if ($ch >= 0x0041 && $ch <= 0x005a) {
174 $numValue = $ch - 0x0037;
175 } elsif ($ch >= 0x0061 && $ch <= 0x007a) {
176 $numValue = $ch - 0x0057;
177 } elsif ($ch >= 0xff21 && $ch <= 0xff3a) {
178 $numValue = $ch - 0xff17;
179 } elsif ($ch >= 0xff41 && $ch <= 0xff5a) {
180 $numValue = $ch - 0xff37;
188 $upperchar = $upcase ?
hex($upcase) - $ch : 0;
189 $lowerchar = $lowcase ?
hex($lowcase) - $ch : 0;
190 if ($title ne $upcase) {
191 my $titlechar = $title ?
hex($title) : $ch;
192 $titlecase .= pack("n2", $ch, $titlechar);
196 while ($bidir !~ /^$DIRCODES[$direction]$/) {
197 if (++$direction == @DIRCODES) {
203 $direction += $#{$special{$ch}} if defined $special{$ch};
206 die "Expecting end of range at $ch\n" unless $name =~ /Last>$/;
207 for ($range + 1 .. $ch - 1) {
208 $info->[$_ - $planeBase] = pack("n5", $type, $numValue, $upperchar,
209 $lowerchar, $direction);
212 } elsif ($name =~ /First>$/) {
215 # Store all this parsed information into the element in infoArray that info
217 $info->[$ch - $planeBase] = pack("n5", $type, $numValue, $upperchar, $lowerchar,
223 ################################################################################
224 ################################################################################
225 ## Stage 2: Compress the data structures
226 printf "\nCompressing data structures";
229 # data is a String that will be used to create the DATA String containing
230 # character information and offsets into the attribute tables.
233 # charhashArray is an array of hashtables used so that we can reuse character
234 # attributes when characters share the same attributes ... this makes our
235 # attribute tables smaller. charhash is a pointer into this array.
236 my @charhashArray = ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
239 # charinfoArray is an array of arrays, one per plane, for storing character
240 # information. charinfo is a pointer into this array.
241 my @charinfoArray = \
((), (), (), (), (), (), (), (),
242 (), (), (), (), (), (), (), (), ());
245 # charlen is an array, one element per plane, that tells us how many unique
246 # character attributes there are for that plane.
249 for my $plane (0 .. 0x10) {
250 $info = \@
{$infoArray[$plane]};
251 my $planeBase = $plane * 0x10000;
252 $charhash = \
%{$charhashArray[$plane]};
253 $charinfo = \@
{$charinfoArray[$plane]};
255 for my $ch ($planeBase .. $planeBase + 0xffff) {
256 my $index = $ch - $planeBase;
257 print "." unless $count++ % 0x1000;
258 $info->[$index] = pack("n5", 0, -1, 0, 0, -4) unless defined $info->[$index];
260 my ($type, $numVal, $upper, $lower, $direction) = unpack("n5", $info->[$index]);
261 if (! exists $charhash->{$info->[$index]}) {
262 # If we entered this loop that means the character we're looking at
263 # now has attributes that are unique from those that we've looked
264 # at so far for this plane. So we push its attributes into charinfo
265 # and store in charhash the offset into charinfo where these
266 # attributes can later be found.
267 push @
{$charinfo}, [ $numVal, $upper, $lower, $direction ];
268 $charhash->{$info->[$index]} = @
{$charinfo} - 1;
269 # When the file is generaged, the number we just stored in charhas
270 # will be the upper 9 bits in the DATA String that are an offset
271 # into the attribute tables.
273 $data[$plane] .= pack("n", ($charhash->{$info->[$index]} << 7) | $type);
275 $charlen[$plane] = scalar(@
{$charinfoArray[$plane]});
278 # the shift that results in the best compression of the table. This is an array
279 # because different shifts are better for the different tables for each plane.
283 my $bestest = 1000000;
287 for my $plane (0 .. 0x10) {
288 print "\n\nplane: $plane\n";
289 print "Unique character entries: $charlen[$plane]\n";
292 my $blksize = 1 << $i;
298 for ($j = 0; $j < 0x10000; $j += $blksize) {
299 my $blkkey = substr $data[$plane], 2 * $j, 2 * $blksize;
300 if (! exists $blocks{$blkkey}) {
301 push @blkarray, $blkkey;
302 $blocks{$blkkey} = $#blkarray;
306 my $blknum = @blkarray;
307 my $blocklen = $blknum * $blksize;
308 printf " before %5d", $blocklen;
310 # Now we try to pack the blkarray as tight as possible by finding matching
312 for ($j = $blksize - 1; $j > 0; $j--) {
314 for $k (0 .. $#blkarray) {
315 next unless defined $blkarray[$k];
316 my $len = length $blkarray[$k];
317 my $tail = substr $blkarray[$k], $len - $j * 2;
318 if (exists $tails{$tail}) {
319 push @
{$tails{$tail}}, $k;
321 $tails{$tail} = [ $k ];
325 # tails are calculated, now calculate the heads and merge.
327 for $k (0 .. $#blkarray) {
328 next unless defined $blkarray[$k];
331 my $head = substr($blkarray[$tomerge], 0, $j * 2);
332 my $entry = $tails{$head};
333 next BLOCK
unless defined $entry;
335 my $other = shift @
{$entry};
336 if ($other == $tomerge) {
338 push @
{$entry}, $other;
339 $other = shift @
{$entry};
341 push @
{$entry}, $other;
345 if (@
{$entry} == 0) {
346 delete $tails{$head};
350 my $merge = $blkarray[$other]
351 . substr($blkarray[$tomerge], $j * 2);
355 if ($other < $tomerge) {
356 $blkarray[$tomerge] = undef;
357 $blkarray[$other] = $merge;
358 my $len = length $merge;
359 my $tail = substr $merge, $len - $j * 2;
360 $tails{$tail} = [ map { $_ == $tomerge ?
$other : $_ }
364 $blkarray[$tomerge] = $merge;
365 $blkarray[$other] = undef;
370 for $k (0 .. $#blkarray) {
371 $blockstr .= $blkarray[$k] if defined $blkarray[$k];
374 die "Unexpected $blocklen" if length($blockstr) != 2 * $blocklen;
375 my $estimate = 2 * $blocklen + (0x20000 >> $i);
377 printf " after merge %5d: %6d bytes\n", $blocklen, $estimate;
378 if ($estimate < $bestest) {
379 $bestest = $estimate;
380 $bestshift[$plane] = $i;
381 $bestblkstr[$plane] = $blockstr;
384 $blksize[$plane] = 1 << $bestshift[$plane];
385 print "best shift: ", $bestshift[$plane];
386 print " blksize: ", $blksize[$plane];
388 my @blocksArray = \
((), (), (), (), (), (), (), (),
389 (), (), (), (), (), (), (), (), ());
391 for my $plane (0 .. 0x10) {
392 for (my $j = 0; $j < 0x10000; $j += $blksize[$plane]) {
393 my $blkkey = substr $data[$plane], 2 * $j, 2 * $blksize[$plane];
394 my $index = index $bestblkstr[$plane], $blkkey;
396 die "not found: $j" if $index == -1;
397 $index = index $bestblkstr[$plane], $blkkey, $index + 1;
399 push @
{$blocksArray[$plane]}, ($index / 2 - $j) & 0xffff;
404 ################################################################################
405 ################################################################################
406 ## Stage 3: Generate the file
407 for my $plane (0 .. 0x10) {
408 die "UTF-8 limit of blocks may be exceeded for plane $plane: " . scalar(@
{$blocksArray[$plane]}) . "\n"
409 if @
{$blocksArray[$plane]} > 0xffff / 3;
410 die "UTF-8 limit of data may be exceeded for plane $plane: " . length($bestblkstr[$plane]) . "\n"
411 if length($bestblkstr[$plane]) > 0xffff / 3;
415 print "\nGenerating $ARGV[2].";
418 open OUTPUT
, "> $ARGV[2]" or die "Failed creating output file: $!\n";
420 /* java-chartables.h -- Character tables for java.lang.Character -*- c++ -*-
421 Copyright (C) 2002, 2006 Free Software Foundation, Inc.
422 *** This file is generated by scripts/unicode-to-chartables.pl ***
424 This file is part of GNU Classpath.
426 GNU Classpath is free software; you can redistribute it and/or modify
427 it under the terms of the GNU General Public License as published by
428 the Free Software Foundation; either version 2, or (at your option)
431 GNU Classpath is distributed in the hope that it will be useful, but
432 WITHOUT ANY WARRANTY; without even the implied warranty of
433 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
434 General Public License for more details.
436 You should have received a copy of the GNU General Public License
437 along with GNU Classpath; see the file COPYING. If not, write to the
438 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
441 Linking this library statically or dynamically with other modules is
442 making a combined work based on this library. Thus, the terms and
443 conditions of the GNU General Public License cover the whole
446 As a special exception, the copyright holders of this library give you
447 permission to link this library with independent modules to produce an
448 executable, regardless of the license terms of these independent
449 modules, and to copy and distribute the resulting executable under
450 terms of your choice, provided that you also meet, for each linked
451 independent module, the terms and conditions of the license of that
452 module. An independent module is a module which is not derived from
453 or based on this library. If you modify this library, you may extend
454 this exception to your version of the library, but you are not
455 obligated to do so. If you do not wish to do so, delete this
456 exception statement from your version. */
458 #ifndef __JAVA_CHARTABLES_H__
459 #define __JAVA_CHARTABLES_H__
461 // These tables are automatically generated by scripts/unicode_to_chartables.pl.
462 // The Unicode data comes from www.unicode.org; this header is based on
463 // UnicodeData-4.0.0.txt. JDK 1.5 uses Unicode version 4.0.0.
464 // DO NOT EDIT the tables. Instead, fix the upstream scripts and run
467 // The data is stored in C style arrays of the appropriate CNI types, to
468 // guarantee that the data is constant and non-relocatable. The field
469 // <code>blocks</code> stores the offset of a block of 2<sup>SHIFT</sup>
470 // characters within <code>data</code>. The data field, in turn, stores
471 // information about each character in the low order bits, and an offset
472 // into the attribute tables <code>upper</code>, <code>lower</code>,
473 // <code>numValue</code>, and <code>direction</code>. Notice that the
474 // attribute tables are much smaller than 0xffff entries; as many characters
475 // in Unicode share common attributes. Finally, there is a listing for
476 // <code>title</code> exceptions (most characters just have the same title
477 // case as upper case).
479 // This file should only be included by natCharacter.cc
482 * The array containing the numeric values that are too large to be stored as
483 * chars in NUM_VALUE. NUM_VALUE in this case will contain a negative integer
484 * N such that LARGENUMS[-N - 3] contains the correct numeric value.
487 print OUTPUT
"static const jint largenums[] = {\n ";
488 for ($i = 0; $i < @largeNums; $i++) {
489 print OUTPUT
$largeNums[$i], ", ";
496 * The character shift amount to look up the block offset. In other words,
497 * <code>(char) (blocks[p][off >> SHIFT[p]] + off)</code> is the index where
498 * <code>ch</code> is described in <code>data</code>, where <code>off</code>
499 * is ch & 0xffff and <code>p</code> is the plane the character belongs to.
502 print OUTPUT
"static const int shift[] = {\n ";
503 for ($i = 0; $i < @bestshift; $i++) {
504 print OUTPUT
$bestshift[$i], ", ";
511 * The mapping of character blocks to their location in <code>data</code>.
512 * Each entry has been adjusted so that a modulo 16 sum with the desired
513 * character gives the actual index into <code>data</code>.
516 for ($plane = 0; $plane <= 0x10; $plane++) {
517 # The following if statement handles the cases of unassigned planes
518 # specially so we don't waste space with unused Strings. As of
519 # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used. If
520 # you are updating this script to work with a later version of
521 # Unicode you may have to alter this if statement.
522 next if ($plane > 2 && $plane != 14) ;
524 print OUTPUT
"static const jchar blocks", $plane, "[] = {\n";
525 for ($i = 0; $i < @
{$blocksArray[$plane]} / 10; $i++) {
528 last if @
{$blocksArray[$plane]} <= $i * 10 + $j;
529 my $val = $blocksArray[$plane]->[$i * 10 + $j];
530 print OUTPUT
$val, ", ";
534 print OUTPUT
"};\n\n";
536 print OUTPUT
"static const int blocks_length[] = {\n ";
537 for ($plane = 0; $plane <= 0x10; $plane++) {
538 if ($plane > 2 && $plane != 14){
542 print OUTPUT
scalar(@
{$blocksArray[$plane]}), ", ";
547 static const jchar* blocks[] = {
548 blocks0, blocks1, blocks2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
549 NULL, NULL, NULL, NULL, blocks14, NULL, NULL};
552 * Information about each character. The low order 5 bits form the
553 * character type, the next bit is a flag for non-breaking spaces, and the
554 * next bit is a flag for mirrored directionality. The high order 9 bits
555 * form the offset into the attribute tables. Note that this limits the
556 * number of unique character attributes per plane to 512, which is not a
557 * problem as of Unicode version 4.0.0, but may soon become one.
560 for ($plane = 0; $plane <= 0x10; $plane++) {
561 # The following if statement handles the cases of unassigned planes
562 # specially so we don't waste space with unused Strings. As of
563 # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used. If
564 # you are updating this script to work with a later version of
565 # Unicode you may have to alter this if statement.
566 next if ($plane > 2 && $plane != 14);
568 print OUTPUT
"static const jchar data", $plane, "[] = {\n";
569 my $len = length($bestblkstr[$plane]) / 2;
570 for ($i = 0; $i < $len / 10; $i++) {
573 last if $len <= $i * 10 + $j;
574 my $val = unpack "n", substr($bestblkstr[$plane], 2 * ($i * 10 + $j), 2);
575 print OUTPUT
$val, ", ";
579 print OUTPUT
"};\n\n";
581 print OUTPUT
"static const int data_length[] = {\n ";
582 for ($plane = 0; $plane <= 0x10; $plane++) {
583 if ($plane > 2 && $plane != 14){
587 print OUTPUT
length($bestblkstr[$plane]) / 2, ", ";
592 static const jchar* data[] = {
593 data0, data1, data2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
594 NULL, NULL, NULL, NULL, data14, NULL, NULL};
598 * This is the attribute table for computing the numeric value of a
599 * character. The value is -1 if Unicode does not define a value, -2
600 * if the value is not a positive integer, otherwise it is the value.
603 for ($plane = 0; $plane <= 0x10; $plane++) {
604 # The following if statement handles the cases of unassigned planes
605 # specially so we don't waste space with unused Strings. As of
606 # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used. If
607 # you are updating this script to work with a later version of
608 # Unicode you may have to alter this if statement.
609 next if ($plane > 2 && $plane != 14);
611 print OUTPUT
"static const jshort numValue", $plane, "[] = {\n";
612 $len = @
{$charinfoArray[$plane]};
613 for ($i = 0; $i < $len / 13; $i++) {
616 last if $len <= $i * 13 + $j;
617 my $val = $charinfoArray[$plane]->[$i * 13 + $j][0];
618 print OUTPUT cShort
($val), ", ";
622 print OUTPUT
"};\n\n";
624 print OUTPUT
"static const int numValue_length[] = {\n ";
625 for ($plane = 0; $plane <= 0x10; $plane++) {
626 if ($plane > 2 && $plane != 14){
630 print OUTPUT
scalar(@
{$charinfoArray[$plane]}), ", ";
635 static const jshort* numValue[] = {
636 numValue0, numValue1, numValue2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
637 NULL, NULL, NULL, NULL, numValue14, NULL, NULL};
642 * This is the attribute table for computing the uppercase representation
643 * of a character. The value is the difference between the character and
644 * its uppercase version.
647 for ($plane = 0; $plane <= 0x10; $plane++) {
648 # The following if statement handles the cases of unassigned planes
649 # specially so we don't waste space with unused Strings. As of
650 # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used. If
651 # you are updating this script to work with a later version of
652 # Unicode you may have to alter this if statement.
653 next if ($plane > 2 && $plane != 14);
655 print OUTPUT
"static const jshort upper", $plane, "[] = {\n";
656 $len = @
{$charinfoArray[$plane]};
657 for ($i = 0; $i < $len / 13; $i++) {
660 last if $len <= $i * 13 + $j;
661 my $val = $charinfoArray[$plane]->[$i * 13 + $j][1];
662 print OUTPUT cShort
($val), ", ";
666 print OUTPUT
"};\n\n";
668 print OUTPUT
"static const int upper_length[] = {\n ";
669 for ($plane = 0; $plane <= 0x10; $plane++) {
670 if ($plane > 2 && $plane != 14){
674 print OUTPUT
scalar(@
{$charinfoArray[$plane]}), ", ";
679 static const jshort* upper[] = {
680 upper0, upper1, upper2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
681 NULL, NULL, NULL, NULL, upper14, NULL, NULL};
685 * This is the attribute table for computing the lowercase representation
686 * of a character. The value is the difference between the character and
687 * its lowercase version.
690 for ($plane = 0; $plane <= 0x10; $plane++) {
691 # The following if statement handles the cases of unassigned planes
692 # specially so we don't waste space with unused Strings. As of
693 # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used. If
694 # you are updating this script to work with a later version of
695 # Unicode you may have to alter this if statement.
696 next if ($plane > 2 && $plane != 14);
698 print OUTPUT
"static const jshort lower", $plane, "[] = {\n";
699 $len = @
{$charinfoArray[$plane]};
700 for ($i = 0; $i < $len / 13; $i++) {
703 last if $len <= $i * 13 + $j;
704 my $val = $charinfoArray[$plane]->[$i * 13 + $j][2];
705 print OUTPUT cShort
($val), ", ";
709 print OUTPUT
"};\n\n";
711 print OUTPUT
"static const int lower_length[] = {\n ";
712 for ($plane = 0; $plane <= 0x10; $plane++) {
713 if ($plane > 2 && $plane != 14){
717 print OUTPUT
scalar(@
{$charinfoArray[$plane]}), ", ";
722 static const jshort* lower[] = {
723 lower0, lower1, lower2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
724 NULL, NULL, NULL, NULL, lower14, NULL, NULL};
728 * This is the attribute table for computing the directionality class
729 * of a character. At present, the value is in the range 0 - 18 if the
730 * character has a direction, otherwise it is -1.
733 for ($plane = 0; $plane <= 0x10; $plane++) {
734 # The following if statement handles the cases of unassigned planes
735 # specially so we don't waste space with unused Strings. As of
736 # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used. If
737 # you are updating this script to work with a later version of
738 # Unicode you may have to alter this if statement.
739 next if ($plane > 2 && $plane != 14);
741 print OUTPUT
"static const jbyte direction", $plane, "[] = {\n";
742 $len = @
{$charinfoArray[$plane]};
743 for ($i = 0; $i < $len / 19; $i++) {
746 last if $len <= $i * 19 + $j;
747 my $val = $charinfoArray[$plane]->[$i * 19 + $j][3];
749 if ($val < 0 || $val > 18){
752 print OUTPUT cShort
($val), ", ";
756 print OUTPUT
"};\n\n";
758 print OUTPUT
"static const int direction_length[] = {\n ";
759 for ($plane = 0; $plane <= 0x10; $plane++) {
760 if ($plane > 2 && $plane != 14){
764 print OUTPUT
scalar(@
{$charinfoArray[$plane]}), ", ";
769 static const jbyte* direction[] = {
770 direction0, direction1, direction2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
771 NULL, NULL, NULL, NULL, direction14, NULL, NULL};
775 * This is the listing of titlecase special cases (all other character
776 * can use <code>upper</code> to determine their titlecase). The listing
777 * is a sequence of character pairs; converting the first character of the
778 * pair to titlecase produces the second character.
780 static const jchar title[] = {
783 $len = length($titlecase) / 2;
784 for ($i = 0; $i < $len / 10; $i++) {
785 print OUTPUT
$i ?
"\n " : " ";
787 last if $len <= $i * 10 + $j;
788 my $val = unpack "n", substr($titlecase, 2 * ($i * 10 + $j), 2);
789 print OUTPUT
$val, ", ";
793 print OUTPUT
"\n };";
794 print OUTPUT
"\n/** Length of title. */\nstatic const int title_length = ", $len;
798 #endif /* __JAVA_CHARTABLES_H__ */