* c-decl.c (duplicate_decls): Copy DECL_SAVED_TREE.
[official-gcc.git] / gcc / java / gen-table.pl
blob44bdc2a4cee4323a3a1875cce16d8906c89a8898
1 #! /usr/bin/perl
3 # Copyright (C) 2000 Free Software Foundation
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2, or (at your option)
8 # any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18 # 02111-1307, USA.
20 # gen-table.pl - Generate tables for gcj from Unicode data.
21 # Usage: perl gen-table.pl DATA-FILE
23 # Names of fields in Unicode data table.
24 $CODE = 0;
25 $NAME = 1;
26 $CATEGORY = 2;
27 $COMBINING_CLASSES = 3;
28 $BIDI_CATEGORY = 4;
29 $DECOMPOSITION = 5;
30 $DECIMAL_VALUE = 6;
31 $DIGIT_VALUE = 7;
32 $NUMERIC_VALUE = 8;
33 $MIRRORED = 9;
34 $OLD_NAME = 10;
35 $COMMENT = 11;
36 $UPPER = 12;
37 $LOWER = 13;
38 $TITLE = 14;
40 # Start of special-cased gaps in Unicode data table.
41 %gaps = (
42 0x4e00 => "CJK",
43 0xac00 => "Hangul",
44 0xd800 => "Unassigned High Surrogate",
45 0xdb80 => "Private Use High Surrogate",
46 0xdc00 => "Low Surrogate",
47 0xe000 => "Private Use"
50 # This lists control characters which are also considered whitespace.
51 # This is a somewhat odd list, taken from the JCL definition of
52 # Character.isIdentifierIgnorable.
53 %whitespace_controls =
55 0x0009 => 1,
56 0x000a => 1,
57 0x000b => 1,
58 0x000c => 1,
59 0x000d => 1,
60 0x001c => 1,
61 0x001d => 1,
62 0x001e => 1,
63 0x001f => 1
66 open (INPUT, "< $ARGV[0]") || exit 1;
68 $last_code = -1;
69 while (<INPUT>)
71 chop;
72 @fields = split (';', $_, 30);
73 if ($#fields != 14)
75 print STDERR "Entry for $fields[$CODE] has wrong number of fields\n";
78 $code = hex ($fields[$CODE]);
79 if ($code > $last_code + 1)
81 # Found a gap.
82 if (defined $gaps{$code})
84 # Fill the gap with the last character read.
85 @gfields = @fields;
87 else
89 # The gap represents undefined characters. Only the type
90 # matters.
91 @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
92 '', '', '', '');
94 for (++$last_code; $last_code < $code; ++$last_code)
96 $gfields{$CODE} = sprintf ("%04x", $last_code);
97 &process_one ($last_code, @gfields);
100 &process_one ($code, @fields);
101 $last_code = $code;
104 close (INPUT);
106 @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
107 '', '', '', '');
108 for (++$last_code; $last_code < 0x10000; ++$last_code)
110 $gfields{$CODE} = sprintf ("%04x", $last_code);
111 &process_one ($last_code, @gfields);
113 --$last_code; # Want last to be 0xFFFF.
115 &print_tables ($last_code);
117 exit 0;
119 # Process a single character.
120 sub process_one
122 my ($code, @fields) = @_;
124 my $value = '';
125 my $type = $fields[$CATEGORY];
127 # See if the character is a valid identifier start.
128 if ($type =~ /L./ # Letter
129 || $type eq 'Pc' # Connecting punctuation
130 || $type eq 'Sc') # Currency symbol
132 $value = 'LETTER_START';
135 # See if the character is a valid identifier member.
136 if ($type =~ /L./ # Letter
137 || $type eq 'Pc' # Connecting punctuation
138 || $type eq 'Sc' # Currency symbol
139 || $type =~ /N[dl]/ # Number: decimal or letter
140 || $type =~ /M[nc]/ # Mark: non-spacing or combining
141 || ($type eq 'Cc' # Certain controls
142 && ! defined $whitespace_controls{$code})
143 || ($code >= 0x200c # Join controls
144 && $code <= 0x200f)
145 || ($code >= 0x202a # Bidi controls -- note that there
146 # is a typo in the JCL where these are
147 # concerned.
148 && $code <= 0x202e)
149 || ($code >= 0x206a # Format controls
150 && $code <= 0x206f)
151 || $code == 0xfeff) # ZWNBSP
153 if ($value eq '')
155 $value = 'LETTER_PART';
157 else
159 $value = 'LETTER_PART | ' . $value;
163 if ($value eq '')
165 $value = '0';
167 else
169 $value = '(' . $value . ')';
172 $map[$code] = $value;
175 sub print_tables
177 my ($last) = @_;
179 local ($bytes_out) = 0;
181 open (OUT, "> chartables.h");
183 print OUT "/* This file is automatically generated. DO NOT EDIT!\n";
184 print OUT " Instead, edit gen-table.pl and re-run. */\n\n";
186 print OUT "#ifndef GCC_CHARTABLES_H\n";
187 print OUT "#define GCC_CHARTABLES_H\n\n";
189 print OUT "#define LETTER_START 1\n";
190 print OUT "#define LETTER_PART 2\n\n";
192 for ($count = 0; $count <= $last; $count += 256)
194 $row[$count / 256] = &print_row ($count, '(char *) ', 'char', 1,
195 'page');
198 print OUT "static char *type_table[256] = {\n";
199 for ($count = 0; $count <= $last; $count += 256)
201 print OUT ",\n" if $count > 0;
202 print OUT " ", $row[$count / 256];
203 $bytes_out += 4;
205 print OUT "\n};\n\n";
207 print OUT "#endif /* ! GCC_CHARTABLES_H */\n";
209 close (OUT);
211 printf "Generated %d bytes\n", $bytes_out;
214 # Print a single "row" of a two-level table.
215 sub print_row
217 my ($start, $def_pfx, $typname, $typsize, $name) = @_;
219 my ($i);
220 my (@values);
221 my ($flag) = 1;
222 my ($off);
223 for ($off = 0; $off < 256; ++$off)
225 $values[$off] = $map[$off + $start];
226 if ($values[$off] ne $values[0])
228 $flag = 0;
231 if ($flag)
233 return $def_pfx . $values[0];
236 printf OUT "static %s %s%d[256] = {\n ", $typname, $name, $start / 256;
237 my ($column) = 2;
238 for ($i = $start; $i < $start + 256; ++$i)
240 print OUT ", "
241 if $i > $start;
242 my ($text) = $values[$i - $start];
243 if (length ($text) + $column + 2 > 78)
245 print OUT "\n ";
246 $column = 2;
248 print OUT $text;
249 $column += length ($text) + 2;
251 print OUT "\n};\n\n";
253 $bytes_out += 256 * $typsize;
255 return sprintf "%s%d", $name, $start / 256;