1 /* Make ucnid.h from various sources.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3, or (at your option) any
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
18 /* Run this program as
19 ./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \
38 static unsigned flags
[65536];
39 static unsigned short decomp
[65536][2];
40 static unsigned char combining_value
[65536];
47 fprintf (stderr
, "%s\n", s
);
51 /* Read ucnid.tab and set the C99 and CXX flags in header[]. */
54 read_ucnid (const char *fname
)
56 FILE *f
= fopen (fname
, "r");
60 fail ("opening ucnid.tab");
65 if (!fgets (line
, sizeof (line
), f
))
67 if (strcmp (line
, "[C99]\n") == 0)
69 if (strcmp (line
, "[C99DIG]\n") == 0)
71 else if (strcmp (line
, "[CXX]\n") == 0)
73 else if (isxdigit (line
[0]))
78 unsigned long start
, end
;
80 start
= strtoul (l
, &endptr
, 16);
81 if (endptr
== l
|| (*endptr
!= '-' && ! isspace (*endptr
)))
82 fail ("parsing ucnid.tab [1]");
88 end
= strtoul (l
+ 1, &endptr
, 16);
90 fail ("parsing ucnid.tab, end before start");
93 fail ("parsing ucnid.tab, junk after range");
98 fail ("parsing ucnid.tab, end too large");
100 flags
[start
++] |= fl
;
105 fail ("reading ucnid.tab");
109 /* Read UnicodeData.txt and fill in the 'decomp' table to be the
110 decompositions of characters for which both the character
111 decomposed and all the code points in the decomposition are either
115 read_table (char *fname
)
117 FILE * f
= fopen (fname
, "r");
120 fail ("opening UnicodeData.txt");
124 unsigned long codepoint
, this_decomp
[4];
129 if (!fgets (line
, sizeof (line
), f
))
131 codepoint
= strtoul (line
, &l
, 16);
132 if (l
== line
|| *l
!= ';')
133 fail ("parsing UnicodeData.txt, reading code point");
134 if (codepoint
> 0xffff || ! (flags
[codepoint
] & (C99
| CXX
)))
140 /* Category value. */
144 /* Canonical combining class; in NFC/NFKC, they must be increasing
146 if (! isdigit (*++l
))
147 fail ("parsing UnicodeData.txt, combining class not number");
148 combining_value
[codepoint
] = strtoul (l
, &l
, 10);
150 fail ("parsing UnicodeData.txt, junk after combining class");
152 /* Skip over bidi value. */
157 /* Decomposition mapping. */
158 decomp_useful
= flags
[codepoint
];
159 if (*++l
== '<') /* Compatibility mapping. */
161 for (i
= 0; i
< 4; i
++)
166 fail ("parsing UnicodeData.txt, decomposition format");
167 this_decomp
[i
] = strtoul (l
, &l
, 16);
168 decomp_useful
&= flags
[this_decomp
[i
]];
172 if (i
> 2) /* Decomposition too long. */
173 fail ("parsing UnicodeData.txt, decomposition too long");
176 decomp
[codepoint
][i
] = this_decomp
[i
];
179 fail ("reading UnicodeData.txt");
183 /* Read DerivedNormalizationProps.txt and set the flags that say whether
184 a character is in NFC, NFKC, or is context-dependent. */
187 read_derived (const char *fname
)
189 FILE * f
= fopen (fname
, "r");
192 fail ("opening DerivedNormalizationProps.txt");
196 unsigned long start
, end
;
198 bool not_NFC_p
, not_NFKC_p
, maybe_not_NFC_p
;
200 if (!fgets (line
, sizeof (line
), f
))
202 not_NFC_p
= (strstr (line
, "; NFC_QC; N") != NULL
);
203 not_NFKC_p
= (strstr (line
, "; NFKC_QC; N") != NULL
);
204 maybe_not_NFC_p
= (strstr (line
, "; NFC_QC; M") != NULL
);
205 if (! not_NFC_p
&& ! not_NFKC_p
&& ! maybe_not_NFC_p
)
208 start
= strtoul (line
, &l
, 16);
210 fail ("parsing DerivedNormalizationProps.txt, reading start");
213 if (*l
== '.' && l
[1] == '.')
214 end
= strtoul (l
+ 2, &l
, 16);
219 flags
[start
++] |= ((not_NFC_p
? not_NFC
: 0)
220 | (not_NFKC_p
? not_NFKC
: 0)
221 | (maybe_not_NFC_p
? maybe_not_NFC
: 0)
225 fail ("reading DerivedNormalizationProps.txt");
229 /* Write out the table.
230 The table consists of two words per entry. The first word is the flags
231 for the unicode code points up to and including the second word. */
237 unsigned last_flag
= flags
[0];
238 bool really_safe
= decomp
[0][0] == 0;
239 unsigned char last_combine
= combining_value
[0];
241 for (i
= 1; i
<= 65536; i
++)
243 || (flags
[i
] != last_flag
&& ((flags
[i
] | last_flag
) & (C99
| CXX
)))
244 || really_safe
!= (decomp
[i
][0] == 0)
245 || combining_value
[i
] != last_combine
)
247 printf ("{ %s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
248 last_flag
& C99
? "C99" : " 0",
249 last_flag
& digit
? "DIG" : " 0",
250 last_flag
& CXX
? "CXX" : " 0",
251 really_safe
? "CID" : " 0",
252 last_flag
& not_NFC
? " 0" : "NFC",
253 last_flag
& not_NFKC
? " 0" : "NKC",
254 last_flag
& maybe_not_NFC
? "CTX" : " 0",
255 combining_value
[i
- 1],
257 last_flag
= flags
[i
];
258 last_combine
= combining_value
[0];
259 really_safe
= decomp
[i
][0] == 0;
263 /* Print out the huge copyright notice. */
266 write_copyright (void)
268 static const char copyright
[] = "\
269 /* Unicode characters and various properties.\n\
270 Copyright (C) 2003-2013 Free Software Foundation, Inc.\n\
272 This program is free software; you can redistribute it and/or modify it\n\
273 under the terms of the GNU General Public License as published by the\n\
274 Free Software Foundation; either version 3, or (at your option) any\n\
277 This program is distributed in the hope that it will be useful,\n\
278 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
279 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
280 GNU General Public License for more details.\n\
282 You should have received a copy of the GNU General Public License\n\
283 along with this program; see the file COPYING3. If not see\n\
284 <http://www.gnu.org/licenses/>.\n\
287 Copyright (C) 1991-2005 Unicode, Inc. All rights reserved.\n\
288 Distributed under the Terms of Use in\n\
289 http://www.unicode.org/copyright.html.\n\
291 Permission is hereby granted, free of charge, to any person\n\
292 obtaining a copy of the Unicode data files and any associated\n\
293 documentation (the \"Data Files\") or Unicode software and any\n\
294 associated documentation (the \"Software\") to deal in the Data Files\n\
295 or Software without restriction, including without limitation the\n\
296 rights to use, copy, modify, merge, publish, distribute, and/or\n\
297 sell copies of the Data Files or Software, and to permit persons to\n\
298 whom the Data Files or Software are furnished to do so, provided\n\
299 that (a) the above copyright notice(s) and this permission notice\n\
300 appear with all copies of the Data Files or Software, (b) both the\n\
301 above copyright notice(s) and this permission notice appear in\n\
302 associated documentation, and (c) there is clear notice in each\n\
303 modified Data File or in the Software as well as in the\n\
304 documentation associated with the Data File(s) or Software that the\n\
305 data or software has been modified.\n\
307 THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY\n\
308 OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE\n\
309 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n\
310 NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE\n\
311 COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR\n\
312 ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY\n\
313 DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\n\
314 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\n\
315 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\n\
316 OF THE DATA FILES OR SOFTWARE.\n\
318 Except as contained in this notice, the name of a copyright holder\n\
319 shall not be used in advertising or otherwise to promote the sale,\n\
320 use or other dealings in these Data Files or Software without prior\n\
321 written authorization of the copyright holder. */\n";
329 main(int argc
, char ** argv
)
332 fail ("too few arguments to makeucn");
333 read_ucnid (argv
[1]);
334 read_table (argv
[2]);
335 read_derived (argv
[3]);