2013-03-05 Richard Biener <rguenther@suse.de>
[official-gcc.git] / libcpp / makeucnid.c
blob4e3f76d551dd9210c973479b34a530a23f2eb80b
1 /* Make ucnid.h from various sources.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3, or (at your option) any
7 later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
18 /* Run this program as
19 ./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \
20 > ucnid.h
23 #include <stdio.h>
24 #include <string.h>
25 #include <ctype.h>
26 #include <stdbool.h>
27 #include <stdlib.h>
29 enum {
30 C99 = 1,
31 CXX = 2,
32 digit = 4,
33 not_NFC = 8,
34 not_NFKC = 16,
35 maybe_not_NFC = 32
38 static unsigned flags[65536];
39 static unsigned short decomp[65536][2];
40 static unsigned char combining_value[65536];
42 /* Die! */
44 static void
45 fail (const char *s)
47 fprintf (stderr, "%s\n", s);
48 exit (1);
51 /* Read ucnid.tab and set the C99 and CXX flags in header[]. */
53 static void
54 read_ucnid (const char *fname)
56 FILE *f = fopen (fname, "r");
57 unsigned fl = 0;
59 if (!f)
60 fail ("opening ucnid.tab");
61 for (;;)
63 char line[256];
65 if (!fgets (line, sizeof (line), f))
66 break;
67 if (strcmp (line, "[C99]\n") == 0)
68 fl = C99;
69 else if (strcmp (line, "[CXX]\n") == 0)
70 fl = CXX;
71 else if (isxdigit (line[0]))
73 char *l = line;
74 while (*l)
76 unsigned long start, end;
77 char *endptr;
78 start = strtoul (l, &endptr, 16);
79 if (endptr == l || (*endptr != '-' && ! isspace (*endptr)))
80 fail ("parsing ucnid.tab [1]");
81 l = endptr;
82 if (*l != '-')
83 end = start;
84 else
86 end = strtoul (l + 1, &endptr, 16);
87 if (end < start)
88 fail ("parsing ucnid.tab, end before start");
89 l = endptr;
90 if (! isspace (*l))
91 fail ("parsing ucnid.tab, junk after range");
93 while (isspace (*l))
94 l++;
95 if (end > 0xFFFF)
96 fail ("parsing ucnid.tab, end too large");
97 while (start <= end)
98 flags[start++] |= fl;
102 if (ferror (f))
103 fail ("reading ucnid.tab");
104 fclose (f);
107 /* Read UnicodeData.txt and set the 'digit' flag, and
108 also fill in the 'decomp' table to be the decompositions of
109 characters for which both the character decomposed and all the code
110 points in the decomposition are either C99 or CXX. */
112 static void
113 read_table (char *fname)
115 FILE * f = fopen (fname, "r");
117 if (!f)
118 fail ("opening UnicodeData.txt");
119 for (;;)
121 char line[256];
122 unsigned long codepoint, this_decomp[4];
123 char *l;
124 int i;
125 int decomp_useful;
127 if (!fgets (line, sizeof (line), f))
128 break;
129 codepoint = strtoul (line, &l, 16);
130 if (l == line || *l != ';')
131 fail ("parsing UnicodeData.txt, reading code point");
132 if (codepoint > 0xffff || ! (flags[codepoint] & (C99 | CXX)))
133 continue;
135 do {
136 l++;
137 } while (*l != ';');
138 /* Category value; things starting with 'N' are numbers of some
139 kind. */
140 if (*++l == 'N')
141 flags[codepoint] |= digit;
143 do {
144 l++;
145 } while (*l != ';');
146 /* Canonical combining class; in NFC/NFKC, they must be increasing
147 (or zero). */
148 if (! isdigit (*++l))
149 fail ("parsing UnicodeData.txt, combining class not number");
150 combining_value[codepoint] = strtoul (l, &l, 10);
151 if (*l++ != ';')
152 fail ("parsing UnicodeData.txt, junk after combining class");
154 /* Skip over bidi value. */
155 do {
156 l++;
157 } while (*l != ';');
159 /* Decomposition mapping. */
160 decomp_useful = flags[codepoint];
161 if (*++l == '<') /* Compatibility mapping. */
162 continue;
163 for (i = 0; i < 4; i++)
165 if (*l == ';')
166 break;
167 if (!isxdigit (*l))
168 fail ("parsing UnicodeData.txt, decomposition format");
169 this_decomp[i] = strtoul (l, &l, 16);
170 decomp_useful &= flags[this_decomp[i]];
171 while (isspace (*l))
172 l++;
174 if (i > 2) /* Decomposition too long. */
175 fail ("parsing UnicodeData.txt, decomposition too long");
176 if (decomp_useful)
177 while (--i >= 0)
178 decomp[codepoint][i] = this_decomp[i];
180 if (ferror (f))
181 fail ("reading UnicodeData.txt");
182 fclose (f);
185 /* Read DerivedNormalizationProps.txt and set the flags that say whether
186 a character is in NFC, NFKC, or is context-dependent. */
188 static void
189 read_derived (const char *fname)
191 FILE * f = fopen (fname, "r");
193 if (!f)
194 fail ("opening DerivedNormalizationProps.txt");
195 for (;;)
197 char line[256];
198 unsigned long start, end;
199 char *l;
200 bool not_NFC_p, not_NFKC_p, maybe_not_NFC_p;
202 if (!fgets (line, sizeof (line), f))
203 break;
204 not_NFC_p = (strstr (line, "; NFC_QC; N") != NULL);
205 not_NFKC_p = (strstr (line, "; NFKC_QC; N") != NULL);
206 maybe_not_NFC_p = (strstr (line, "; NFC_QC; M") != NULL);
207 if (! not_NFC_p && ! not_NFKC_p && ! maybe_not_NFC_p)
208 continue;
210 start = strtoul (line, &l, 16);
211 if (l == line)
212 fail ("parsing DerivedNormalizationProps.txt, reading start");
213 if (start > 0xffff)
214 continue;
215 if (*l == '.' && l[1] == '.')
216 end = strtoul (l + 2, &l, 16);
217 else
218 end = start;
220 while (start <= end)
221 flags[start++] |= ((not_NFC_p ? not_NFC : 0)
222 | (not_NFKC_p ? not_NFKC : 0)
223 | (maybe_not_NFC_p ? maybe_not_NFC : 0)
226 if (ferror (f))
227 fail ("reading DerivedNormalizationProps.txt");
228 fclose (f);
231 /* Write out the table.
232 The table consists of two words per entry. The first word is the flags
233 for the unicode code points up to and including the second word. */
235 static void
236 write_table (void)
238 unsigned i;
239 unsigned last_flag = flags[0];
240 bool really_safe = decomp[0][0] == 0;
241 unsigned char last_combine = combining_value[0];
243 for (i = 1; i <= 65536; i++)
244 if (i == 65536
245 || (flags[i] != last_flag && ((flags[i] | last_flag) & (C99 | CXX)))
246 || really_safe != (decomp[i][0] == 0)
247 || combining_value[i] != last_combine)
249 printf ("{ %s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
250 last_flag & C99 ? "C99" : " 0",
251 last_flag & digit ? "DIG" : " 0",
252 last_flag & CXX ? "CXX" : " 0",
253 really_safe ? "CID" : " 0",
254 last_flag & not_NFC ? " 0" : "NFC",
255 last_flag & not_NFKC ? " 0" : "NKC",
256 last_flag & maybe_not_NFC ? "CTX" : " 0",
257 combining_value[i - 1],
258 i - 1);
259 last_flag = flags[i];
260 last_combine = combining_value[0];
261 really_safe = decomp[i][0] == 0;
265 /* Print out the huge copyright notice. */
267 static void
268 write_copyright (void)
270 static const char copyright[] = "\
271 /* Unicode characters and various properties.\n\
272 Copyright (C) 2003-2013 Free Software Foundation, Inc.\n\
274 This program is free software; you can redistribute it and/or modify it\n\
275 under the terms of the GNU General Public License as published by the\n\
276 Free Software Foundation; either version 3, or (at your option) any\n\
277 later version.\n\
279 This program is distributed in the hope that it will be useful,\n\
280 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
281 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
282 GNU General Public License for more details.\n\
284 You should have received a copy of the GNU General Public License\n\
285 along with this program; see the file COPYING3. If not see\n\
286 <http://www.gnu.org/licenses/>.\n\
289 Copyright (C) 1991-2005 Unicode, Inc. All rights reserved.\n\
290 Distributed under the Terms of Use in\n\
291 http://www.unicode.org/copyright.html.\n\
293 Permission is hereby granted, free of charge, to any person\n\
294 obtaining a copy of the Unicode data files and any associated\n\
295 documentation (the \"Data Files\") or Unicode software and any\n\
296 associated documentation (the \"Software\") to deal in the Data Files\n\
297 or Software without restriction, including without limitation the\n\
298 rights to use, copy, modify, merge, publish, distribute, and/or\n\
299 sell copies of the Data Files or Software, and to permit persons to\n\
300 whom the Data Files or Software are furnished to do so, provided\n\
301 that (a) the above copyright notice(s) and this permission notice\n\
302 appear with all copies of the Data Files or Software, (b) both the\n\
303 above copyright notice(s) and this permission notice appear in\n\
304 associated documentation, and (c) there is clear notice in each\n\
305 modified Data File or in the Software as well as in the\n\
306 documentation associated with the Data File(s) or Software that the\n\
307 data or software has been modified.\n\
309 THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY\n\
310 OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE\n\
311 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n\
312 NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE\n\
313 COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR\n\
314 ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY\n\
315 DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\n\
316 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\n\
317 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\n\
318 OF THE DATA FILES OR SOFTWARE.\n\
320 Except as contained in this notice, the name of a copyright holder\n\
321 shall not be used in advertising or otherwise to promote the sale,\n\
322 use or other dealings in these Data Files or Software without prior\n\
323 written authorization of the copyright holder. */\n";
325 puts (copyright);
328 /* Main program. */
331 main(int argc, char ** argv)
333 if (argc != 4)
334 fail ("too few arguments to makeucn");
335 read_ucnid (argv[1]);
336 read_table (argv[2]);
337 read_derived (argv[3]);
339 write_copyright ();
340 write_table ();
341 return 0;