2013-11-15 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / libcpp / makeucnid.c
blobda06065595367a110974a9eddf516a7c05276ec9
1 /* Make ucnid.h from various sources.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3, or (at your option) any
7 later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
18 /* Run this program as
19 ./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \
20 > ucnid.h
23 #include <stdio.h>
24 #include <string.h>
25 #include <ctype.h>
26 #include <stdbool.h>
27 #include <stdlib.h>
29 enum {
30 C99 = 1,
31 CXX = 2,
32 digit = 4,
33 not_NFC = 8,
34 not_NFKC = 16,
35 maybe_not_NFC = 32
38 static unsigned flags[65536];
39 static unsigned short decomp[65536][2];
40 static unsigned char combining_value[65536];
42 /* Die! */
44 static void
45 fail (const char *s)
47 fprintf (stderr, "%s\n", s);
48 exit (1);
51 /* Read ucnid.tab and set the C99 and CXX flags in header[]. */
53 static void
54 read_ucnid (const char *fname)
56 FILE *f = fopen (fname, "r");
57 unsigned fl = 0;
59 if (!f)
60 fail ("opening ucnid.tab");
61 for (;;)
63 char line[256];
65 if (!fgets (line, sizeof (line), f))
66 break;
67 if (strcmp (line, "[C99]\n") == 0)
68 fl = C99;
69 if (strcmp (line, "[C99DIG]\n") == 0)
70 fl = C99|digit;
71 else if (strcmp (line, "[CXX]\n") == 0)
72 fl = CXX;
73 else if (isxdigit (line[0]))
75 char *l = line;
76 while (*l)
78 unsigned long start, end;
79 char *endptr;
80 start = strtoul (l, &endptr, 16);
81 if (endptr == l || (*endptr != '-' && ! isspace (*endptr)))
82 fail ("parsing ucnid.tab [1]");
83 l = endptr;
84 if (*l != '-')
85 end = start;
86 else
88 end = strtoul (l + 1, &endptr, 16);
89 if (end < start)
90 fail ("parsing ucnid.tab, end before start");
91 l = endptr;
92 if (! isspace (*l))
93 fail ("parsing ucnid.tab, junk after range");
95 while (isspace (*l))
96 l++;
97 if (end > 0xFFFF)
98 fail ("parsing ucnid.tab, end too large");
99 while (start <= end)
100 flags[start++] |= fl;
104 if (ferror (f))
105 fail ("reading ucnid.tab");
106 fclose (f);
109 /* Read UnicodeData.txt and fill in the 'decomp' table to be the
110 decompositions of characters for which both the character
111 decomposed and all the code points in the decomposition are either
112 C99 or CXX. */
114 static void
115 read_table (char *fname)
117 FILE * f = fopen (fname, "r");
119 if (!f)
120 fail ("opening UnicodeData.txt");
121 for (;;)
123 char line[256];
124 unsigned long codepoint, this_decomp[4];
125 char *l;
126 int i;
127 int decomp_useful;
129 if (!fgets (line, sizeof (line), f))
130 break;
131 codepoint = strtoul (line, &l, 16);
132 if (l == line || *l != ';')
133 fail ("parsing UnicodeData.txt, reading code point");
134 if (codepoint > 0xffff || ! (flags[codepoint] & (C99 | CXX)))
135 continue;
137 do {
138 l++;
139 } while (*l != ';');
140 /* Category value. */
141 do {
142 l++;
143 } while (*l != ';');
144 /* Canonical combining class; in NFC/NFKC, they must be increasing
145 (or zero). */
146 if (! isdigit (*++l))
147 fail ("parsing UnicodeData.txt, combining class not number");
148 combining_value[codepoint] = strtoul (l, &l, 10);
149 if (*l++ != ';')
150 fail ("parsing UnicodeData.txt, junk after combining class");
152 /* Skip over bidi value. */
153 do {
154 l++;
155 } while (*l != ';');
157 /* Decomposition mapping. */
158 decomp_useful = flags[codepoint];
159 if (*++l == '<') /* Compatibility mapping. */
160 continue;
161 for (i = 0; i < 4; i++)
163 if (*l == ';')
164 break;
165 if (!isxdigit (*l))
166 fail ("parsing UnicodeData.txt, decomposition format");
167 this_decomp[i] = strtoul (l, &l, 16);
168 decomp_useful &= flags[this_decomp[i]];
169 while (isspace (*l))
170 l++;
172 if (i > 2) /* Decomposition too long. */
173 fail ("parsing UnicodeData.txt, decomposition too long");
174 if (decomp_useful)
175 while (--i >= 0)
176 decomp[codepoint][i] = this_decomp[i];
178 if (ferror (f))
179 fail ("reading UnicodeData.txt");
180 fclose (f);
183 /* Read DerivedNormalizationProps.txt and set the flags that say whether
184 a character is in NFC, NFKC, or is context-dependent. */
186 static void
187 read_derived (const char *fname)
189 FILE * f = fopen (fname, "r");
191 if (!f)
192 fail ("opening DerivedNormalizationProps.txt");
193 for (;;)
195 char line[256];
196 unsigned long start, end;
197 char *l;
198 bool not_NFC_p, not_NFKC_p, maybe_not_NFC_p;
200 if (!fgets (line, sizeof (line), f))
201 break;
202 not_NFC_p = (strstr (line, "; NFC_QC; N") != NULL);
203 not_NFKC_p = (strstr (line, "; NFKC_QC; N") != NULL);
204 maybe_not_NFC_p = (strstr (line, "; NFC_QC; M") != NULL);
205 if (! not_NFC_p && ! not_NFKC_p && ! maybe_not_NFC_p)
206 continue;
208 start = strtoul (line, &l, 16);
209 if (l == line)
210 fail ("parsing DerivedNormalizationProps.txt, reading start");
211 if (start > 0xffff)
212 continue;
213 if (*l == '.' && l[1] == '.')
214 end = strtoul (l + 2, &l, 16);
215 else
216 end = start;
218 while (start <= end)
219 flags[start++] |= ((not_NFC_p ? not_NFC : 0)
220 | (not_NFKC_p ? not_NFKC : 0)
221 | (maybe_not_NFC_p ? maybe_not_NFC : 0)
224 if (ferror (f))
225 fail ("reading DerivedNormalizationProps.txt");
226 fclose (f);
229 /* Write out the table.
230 The table consists of two words per entry. The first word is the flags
231 for the unicode code points up to and including the second word. */
233 static void
234 write_table (void)
236 unsigned i;
237 unsigned last_flag = flags[0];
238 bool really_safe = decomp[0][0] == 0;
239 unsigned char last_combine = combining_value[0];
241 for (i = 1; i <= 65536; i++)
242 if (i == 65536
243 || (flags[i] != last_flag && ((flags[i] | last_flag) & (C99 | CXX)))
244 || really_safe != (decomp[i][0] == 0)
245 || combining_value[i] != last_combine)
247 printf ("{ %s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
248 last_flag & C99 ? "C99" : " 0",
249 last_flag & digit ? "DIG" : " 0",
250 last_flag & CXX ? "CXX" : " 0",
251 really_safe ? "CID" : " 0",
252 last_flag & not_NFC ? " 0" : "NFC",
253 last_flag & not_NFKC ? " 0" : "NKC",
254 last_flag & maybe_not_NFC ? "CTX" : " 0",
255 combining_value[i - 1],
256 i - 1);
257 last_flag = flags[i];
258 last_combine = combining_value[0];
259 really_safe = decomp[i][0] == 0;
263 /* Print out the huge copyright notice. */
265 static void
266 write_copyright (void)
268 static const char copyright[] = "\
269 /* Unicode characters and various properties.\n\
270 Copyright (C) 2003-2013 Free Software Foundation, Inc.\n\
272 This program is free software; you can redistribute it and/or modify it\n\
273 under the terms of the GNU General Public License as published by the\n\
274 Free Software Foundation; either version 3, or (at your option) any\n\
275 later version.\n\
277 This program is distributed in the hope that it will be useful,\n\
278 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
279 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
280 GNU General Public License for more details.\n\
282 You should have received a copy of the GNU General Public License\n\
283 along with this program; see the file COPYING3. If not see\n\
284 <http://www.gnu.org/licenses/>.\n\
287 Copyright (C) 1991-2005 Unicode, Inc. All rights reserved.\n\
288 Distributed under the Terms of Use in\n\
289 http://www.unicode.org/copyright.html.\n\
291 Permission is hereby granted, free of charge, to any person\n\
292 obtaining a copy of the Unicode data files and any associated\n\
293 documentation (the \"Data Files\") or Unicode software and any\n\
294 associated documentation (the \"Software\") to deal in the Data Files\n\
295 or Software without restriction, including without limitation the\n\
296 rights to use, copy, modify, merge, publish, distribute, and/or\n\
297 sell copies of the Data Files or Software, and to permit persons to\n\
298 whom the Data Files or Software are furnished to do so, provided\n\
299 that (a) the above copyright notice(s) and this permission notice\n\
300 appear with all copies of the Data Files or Software, (b) both the\n\
301 above copyright notice(s) and this permission notice appear in\n\
302 associated documentation, and (c) there is clear notice in each\n\
303 modified Data File or in the Software as well as in the\n\
304 documentation associated with the Data File(s) or Software that the\n\
305 data or software has been modified.\n\
307 THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY\n\
308 OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE\n\
309 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n\
310 NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE\n\
311 COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR\n\
312 ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY\n\
313 DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\n\
314 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\n\
315 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\n\
316 OF THE DATA FILES OR SOFTWARE.\n\
318 Except as contained in this notice, the name of a copyright holder\n\
319 shall not be used in advertising or otherwise to promote the sale,\n\
320 use or other dealings in these Data Files or Software without prior\n\
321 written authorization of the copyright holder. */\n";
323 puts (copyright);
326 /* Main program. */
329 main(int argc, char ** argv)
331 if (argc != 4)
332 fail ("too few arguments to makeucn");
333 read_ucnid (argv[1]);
334 read_table (argv[2]);
335 read_derived (argv[3]);
337 write_copyright ();
338 write_table ();
339 return 0;