1 /* Make ucnid.h from various sources.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3, or (at your option) any
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
18 /* Run this program as
19 ./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \
38 static unsigned flags
[65536];
39 static unsigned short decomp
[65536][2];
40 static unsigned char combining_value
[65536];
47 fprintf (stderr
, "%s\n", s
);
51 /* Read ucnid.tab and set the C99 and CXX flags in header[]. */
54 read_ucnid (const char *fname
)
56 FILE *f
= fopen (fname
, "r");
60 fail ("opening ucnid.tab");
65 if (!fgets (line
, sizeof (line
), f
))
67 if (strcmp (line
, "[C99]\n") == 0)
69 else if (strcmp (line
, "[CXX]\n") == 0)
71 else if (isxdigit (line
[0]))
76 unsigned long start
, end
;
78 start
= strtoul (l
, &endptr
, 16);
79 if (endptr
== l
|| (*endptr
!= '-' && ! isspace (*endptr
)))
80 fail ("parsing ucnid.tab [1]");
86 end
= strtoul (l
+ 1, &endptr
, 16);
88 fail ("parsing ucnid.tab, end before start");
91 fail ("parsing ucnid.tab, junk after range");
96 fail ("parsing ucnid.tab, end too large");
103 fail ("reading ucnid.tab");
107 /* Read UnicodeData.txt and set the 'digit' flag, and
108 also fill in the 'decomp' table to be the decompositions of
109 characters for which both the character decomposed and all the code
110 points in the decomposition are either C99 or CXX. */
113 read_table (char *fname
)
115 FILE * f
= fopen (fname
, "r");
118 fail ("opening UnicodeData.txt");
122 unsigned long codepoint
, this_decomp
[4];
127 if (!fgets (line
, sizeof (line
), f
))
129 codepoint
= strtoul (line
, &l
, 16);
130 if (l
== line
|| *l
!= ';')
131 fail ("parsing UnicodeData.txt, reading code point");
132 if (codepoint
> 0xffff || ! (flags
[codepoint
] & (C99
| CXX
)))
138 /* Category value; things starting with 'N' are numbers of some
141 flags
[codepoint
] |= digit
;
146 /* Canonical combining class; in NFC/NFKC, they must be increasing
148 if (! isdigit (*++l
))
149 fail ("parsing UnicodeData.txt, combining class not number");
150 combining_value
[codepoint
] = strtoul (l
, &l
, 10);
152 fail ("parsing UnicodeData.txt, junk after combining class");
154 /* Skip over bidi value. */
159 /* Decomposition mapping. */
160 decomp_useful
= flags
[codepoint
];
161 if (*++l
== '<') /* Compatibility mapping. */
163 for (i
= 0; i
< 4; i
++)
168 fail ("parsing UnicodeData.txt, decomposition format");
169 this_decomp
[i
] = strtoul (l
, &l
, 16);
170 decomp_useful
&= flags
[this_decomp
[i
]];
174 if (i
> 2) /* Decomposition too long. */
175 fail ("parsing UnicodeData.txt, decomposition too long");
178 decomp
[codepoint
][i
] = this_decomp
[i
];
181 fail ("reading UnicodeData.txt");
185 /* Read DerivedNormalizationProps.txt and set the flags that say whether
186 a character is in NFC, NFKC, or is context-dependent. */
189 read_derived (const char *fname
)
191 FILE * f
= fopen (fname
, "r");
194 fail ("opening DerivedNormalizationProps.txt");
198 unsigned long start
, end
;
200 bool not_NFC_p
, not_NFKC_p
, maybe_not_NFC_p
;
202 if (!fgets (line
, sizeof (line
), f
))
204 not_NFC_p
= (strstr (line
, "; NFC_QC; N") != NULL
);
205 not_NFKC_p
= (strstr (line
, "; NFKC_QC; N") != NULL
);
206 maybe_not_NFC_p
= (strstr (line
, "; NFC_QC; M") != NULL
);
207 if (! not_NFC_p
&& ! not_NFKC_p
&& ! maybe_not_NFC_p
)
210 start
= strtoul (line
, &l
, 16);
212 fail ("parsing DerivedNormalizationProps.txt, reading start");
215 if (*l
== '.' && l
[1] == '.')
216 end
= strtoul (l
+ 2, &l
, 16);
221 flags
[start
++] |= ((not_NFC_p
? not_NFC
: 0)
222 | (not_NFKC_p
? not_NFKC
: 0)
223 | (maybe_not_NFC_p
? maybe_not_NFC
: 0)
227 fail ("reading DerivedNormalizationProps.txt");
231 /* Write out the table.
232 The table consists of two words per entry. The first word is the flags
233 for the unicode code points up to and including the second word. */
239 unsigned last_flag
= flags
[0];
240 bool really_safe
= decomp
[0][0] == 0;
241 unsigned char last_combine
= combining_value
[0];
243 for (i
= 1; i
<= 65536; i
++)
245 || (flags
[i
] != last_flag
&& ((flags
[i
] | last_flag
) & (C99
| CXX
)))
246 || really_safe
!= (decomp
[i
][0] == 0)
247 || combining_value
[i
] != last_combine
)
249 printf ("{ %s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
250 last_flag
& C99
? "C99" : " 0",
251 last_flag
& digit
? "DIG" : " 0",
252 last_flag
& CXX
? "CXX" : " 0",
253 really_safe
? "CID" : " 0",
254 last_flag
& not_NFC
? " 0" : "NFC",
255 last_flag
& not_NFKC
? " 0" : "NKC",
256 last_flag
& maybe_not_NFC
? "CTX" : " 0",
257 combining_value
[i
- 1],
259 last_flag
= flags
[i
];
260 last_combine
= combining_value
[0];
261 really_safe
= decomp
[i
][0] == 0;
265 /* Print out the huge copyright notice. */
268 write_copyright (void)
270 static const char copyright
[] = "\
271 /* Unicode characters and various properties.\n\
272 Copyright (C) 2003-2013 Free Software Foundation, Inc.\n\
274 This program is free software; you can redistribute it and/or modify it\n\
275 under the terms of the GNU General Public License as published by the\n\
276 Free Software Foundation; either version 3, or (at your option) any\n\
279 This program is distributed in the hope that it will be useful,\n\
280 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
281 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
282 GNU General Public License for more details.\n\
284 You should have received a copy of the GNU General Public License\n\
285 along with this program; see the file COPYING3. If not see\n\
286 <http://www.gnu.org/licenses/>.\n\
289 Copyright (C) 1991-2005 Unicode, Inc. All rights reserved.\n\
290 Distributed under the Terms of Use in\n\
291 http://www.unicode.org/copyright.html.\n\
293 Permission is hereby granted, free of charge, to any person\n\
294 obtaining a copy of the Unicode data files and any associated\n\
295 documentation (the \"Data Files\") or Unicode software and any\n\
296 associated documentation (the \"Software\") to deal in the Data Files\n\
297 or Software without restriction, including without limitation the\n\
298 rights to use, copy, modify, merge, publish, distribute, and/or\n\
299 sell copies of the Data Files or Software, and to permit persons to\n\
300 whom the Data Files or Software are furnished to do so, provided\n\
301 that (a) the above copyright notice(s) and this permission notice\n\
302 appear with all copies of the Data Files or Software, (b) both the\n\
303 above copyright notice(s) and this permission notice appear in\n\
304 associated documentation, and (c) there is clear notice in each\n\
305 modified Data File or in the Software as well as in the\n\
306 documentation associated with the Data File(s) or Software that the\n\
307 data or software has been modified.\n\
309 THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY\n\
310 OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE\n\
311 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n\
312 NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE\n\
313 COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR\n\
314 ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY\n\
315 DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\n\
316 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\n\
317 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\n\
318 OF THE DATA FILES OR SOFTWARE.\n\
320 Except as contained in this notice, the name of a copyright holder\n\
321 shall not be used in advertising or otherwise to promote the sale,\n\
322 use or other dealings in these Data Files or Software without prior\n\
323 written authorization of the copyright holder. */\n";
331 main(int argc
, char ** argv
)
334 fail ("too few arguments to makeucn");
335 read_ucnid (argv
[1]);
336 read_table (argv
[2]);
337 read_derived (argv
[3]);