3 # Copyright (C) 2000 Free Software Foundation
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2, or (at your option)
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 # gen-table.pl - Generate tables for gcj from Unicode data.
21 # Usage: perl gen-table.pl DATA-FILE
23 # Names of fields in Unicode data table.
27 $COMBINING_CLASSES = 3;
40 # Start of special-cased gaps in Unicode data table.
44 0xd800 => "Unassigned High Surrogate",
45 0xdb80 => "Private Use High Surrogate",
46 0xdc00 => "Low Surrogate",
47 0xe000 => "Private Use"
50 # This lists control characters which are also considered whitespace.
51 # This is a somewhat odd list, taken from the JCL definition of
52 # Character.isIdentifierIgnorable.
53 %whitespace_controls =
66 open (INPUT
, "< $ARGV[0]") || exit 1;
72 @fields = split (';', $_, 30);
75 print STDERR
"Entry for $fields[$CODE] has wrong number of fields\n";
78 $code = hex ($fields[$CODE]);
79 if ($code > $last_code + 1)
82 if (defined $gaps{$code})
84 # Fill the gap with the last character read.
89 # The gap represents undefined characters. Only the type
91 @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
94 for (++$last_code; $last_code < $code; ++$last_code)
96 $gfields{$CODE} = sprintf ("%04x", $last_code);
97 &process_one
($last_code, @gfields);
100 &process_one
($code, @fields);
106 @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
108 for (++$last_code; $last_code < 0x10000; ++$last_code)
110 $gfields{$CODE} = sprintf ("%04x", $last_code);
111 &process_one
($last_code, @gfields);
113 --$last_code; # Want last to be 0xFFFF.
115 &print_tables
($last_code);
119 # Process a single character.
122 my ($code, @fields) = @_;
125 my $type = $fields[$CATEGORY];
127 # See if the character is a valid identifier start.
128 if ($type =~ /L./ # Letter
129 || $type eq 'Pc' # Connecting punctuation
130 || $type eq 'Sc') # Currency symbol
132 $value = 'LETTER_START';
135 # See if the character is a valid identifier member.
136 if ($type =~ /L./ # Letter
137 || $type eq 'Pc' # Connecting punctuation
138 || $type eq 'Sc' # Currency symbol
139 || $type =~ /N[dl]/ # Number: decimal or letter
140 || $type =~ /M[nc]/ # Mark: non-spacing or combining
141 || ($type eq 'Cc' # Certain controls
142 && ! defined $whitespace_controls{$code})
143 || ($code >= 0x200c # Join controls
145 || ($code >= 0x202a # Bidi controls -- note that there
146 # is a typo in the JCL where these are
149 || ($code >= 0x206a # Format controls
151 || $code == 0xfeff) # ZWNBSP
155 $value = 'LETTER_PART';
159 $value = 'LETTER_PART | ' . $value;
169 $value = '(' . $value . ')';
172 $map[$code] = $value;
179 local ($bytes_out) = 0;
181 open (OUT
, "> chartables.h");
183 print OUT
"/* This file is automatically generated. DO NOT EDIT!\n";
184 print OUT
" Instead, edit gen-table.pl and re-run. */\n\n";
186 print OUT
"#ifndef GCC_CHARTABLES_H\n";
187 print OUT
"#define GCC_CHARTABLES_H\n\n";
189 print OUT
"#define LETTER_START 1\n";
190 print OUT
"#define LETTER_PART 2\n\n";
192 for ($count = 0; $count <= $last; $count += 256)
194 $row[$count / 256] = &print_row
($count, '(char *) ', 'char', 1,
198 print OUT
"static char *type_table[256] = {\n";
199 for ($count = 0; $count <= $last; $count += 256)
201 print OUT
",\n" if $count > 0;
202 print OUT
" ", $row[$count / 256];
205 print OUT
"\n};\n\n";
207 print OUT
"#endif /* ! GCC_CHARTABLES_H */\n";
211 printf "Generated %d bytes\n", $bytes_out;
214 # Print a single "row" of a two-level table.
217 my ($start, $def_pfx, $typname, $typsize, $name) = @_;
223 for ($off = 0; $off < 256; ++$off)
225 $values[$off] = $map[$off + $start];
226 if ($values[$off] ne $values[0])
233 return $def_pfx . $values[0];
236 printf OUT
"static %s %s%d[256] = {\n ", $typname, $name, $start / 256;
238 for ($i = $start; $i < $start + 256; ++$i)
242 my ($text) = $values[$i - $start];
243 if (length ($text) + $column + 2 > 78)
249 $column += length ($text) + 2;
251 print OUT
"\n};\n\n";
253 $bytes_out += 256 * $typsize;
255 return sprintf "%s%d", $name, $start / 256;