3 # Reads the Unicode 2.0 "unidata2.txt" file and selects encodings
4 # for case pairs, then builds an include file "casemap.h" for the
5 # case conversion routines.
9 $INFILE = "unidata2.txt";
13 # Open the data file ...
14 open INFILE
or die "Can't open input file $INFILE!\n";
15 open OUT
or die "Can't open output file $OUT!\n";
16 open TEST
or die "Can't open output file $OUT!\n";
18 #Initialize the upper and lower hashes
24 while ($line = <INFILE
> )
26 # Decode the fields ...
27 ($code, $name, $cat, $comb, $bidi,
28 $decomp, $dec, $dig, $num, $mirror,
29 $oldname, $comment, $upper, $lower, $title) = split /;/, $line;
31 #Get the high byte of the code
32 $high = substr $code, 0, 2;
34 $low[hex $high] = "lblk" . $high;
35 $lwrtable{$code} = $lower;
38 $upr[hex $high] = "ublk" . $high;
39 $uprtable{$code} = $upper;
41 #Write everything to the test file
42 printf TEST
"%s %s %s\n", $code,
43 $upper ne "" ?
$upper : "0000",
44 $lower ne "" ?
$lower : "0000";
50 #Generate the header file
52 print OUT
" * Automatically generated file -- do not edit!\n";
53 print OUT
" * (Use tools/unimap.pl for generation)\n";
55 print OUT
" * Mapping tables for Unicode case conversion\n";
58 #Write out the non-trivial mappings
59 for ($high = 0; $high < 256; $high++) {
60 #Check whether the table is needed
61 if (length $low[$high] < 6) {
64 printf OUT
"/* Lowercase mappings %02X00 - %02XFF */\n",
66 printf OUT
"static const WCHAR lblk%02X[256] = {\n", $high;
67 for ($low = 0; $low < 256; $low += 8) {
69 for ($i = 0; $i < 8; $i++) {
70 $code = sprintf "%02X%02X", $high, $low + $i;
71 $map = $lwrtable{$code};
75 $patch[$i] = "0x" . $map;
77 printf OUT
"\t%s, %s, %s, %s, %s, %s, %s, %s,\n",
82 print OUT
"static const WCHAR * const lwrtable[256] = {\n";
83 for ($i = 0; $i < 256; $i += 8) {
84 @patch = @low[$i+0 .. $i+7];
85 printf OUT
"\t%06s, %06s, %06s, %06s, %06s, %06s, %06s, %06s,\n",
90 for ($high = 0; $high < 256; $high++) {
91 #Check whether the table is needed
92 if (length $upr[$high] < 6) {
95 printf OUT
"/* Uppercase mappings %02X00 - %02XFF */\n",
97 printf OUT
"static const WCHAR ublk%02X[256] = {\n", $high;
98 for ($low = 0; $low < 256; $low += 8) {
100 for ($i = 0; $i < 8; $i++) {
101 $code = sprintf "%02X%02X", $high, $low + $i;
102 $map = $uprtable{$code};
106 $patch[$i] = "0x" . $map;
108 printf OUT
"\t%s, %s, %s, %s, %s, %s, %s, %s,\n",
113 print OUT
"static const WCHAR * const uprtable[256] = {\n";
114 for ($i = 0; $i < 256; $i += 8) {
115 @patch = @upr[$i+0 .. $i+7];
116 printf OUT
"\t%06s, %06s, %06s, %06s, %06s, %06s, %06s, %06s,\n",