* sysdeps/unix/sysv/linux/kernel-features.h: Fix missing backslash
[glibc.git] / localedata / gen-unicode-ctype.c
blob1259aef872c81509c1c665ca0de49f8969ca92cb
1 /* Generate a Unicode conforming LC_CTYPE category from a UnicodeData file.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU UTF-8 Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* Usage example:
22 $ gen-unicode /usr/local/share/Unidata/UnicodeData.txt \
23 /usr/local/share/Unidata/PropList.txt \
24 3.0
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <time.h>
33 /* This structure represents one line in the UnicodeData.txt file. */
34 struct unicode_attribute
36 const char *name; /* Character name */
37 const char *category; /* General category */
38 const char *combining; /* Canonical combining classes */
39 const char *bidi; /* Bidirectional category */
40 const char *decomposition; /* Character decomposition mapping */
41 const char *decdigit; /* Decimal digit value */
42 const char *digit; /* Digit value */
43 const char *numeric; /* Numeric value */
44 int mirrored; /* mirrored */
45 const char *oldname; /* Old Unicode 1.0 name */
46 const char *comment; /* Comment */
47 unsigned int upper; /* Uppercase mapping */
48 unsigned int lower; /* Lowercase mapping */
49 unsigned int title; /* Titlecase mapping */
52 /* Missing fields are represented with "" for strings, and NONE for
53 characters. */
54 #define NONE (~(unsigned int)0)
56 /* The entire contents of the UnicodeData.txt file. */
57 struct unicode_attribute unicode_attributes [0x10000];
59 /* Stores in unicode_attributes[i] the values from the given fields. */
60 static void
61 fill_attribute (unsigned int i,
62 const char *field1, const char *field2,
63 const char *field3, const char *field4,
64 const char *field5, const char *field6,
65 const char *field7, const char *field8,
66 const char *field9, const char *field10,
67 const char *field11, const char *field12,
68 const char *field13, const char *field14)
70 struct unicode_attribute * uni;
72 if (i >= 0x10000)
74 fprintf (stderr, "index too large\n");
75 exit (1);
77 uni = &unicode_attributes[i];
78 /* Copy the strings. */
79 uni->name = strdup (field1);
80 uni->category = (field2[0] == '\0' ? "" : strdup (field2));
81 uni->combining = (field3[0] == '\0' ? "" : strdup (field3));
82 uni->bidi = (field4[0] == '\0' ? "" : strdup (field4));
83 uni->decomposition = (field5[0] == '\0' ? "" : strdup (field5));
84 uni->decdigit = (field6[0] == '\0' ? "" : strdup (field6));
85 uni->digit = (field7[0] == '\0' ? "" : strdup (field7));
86 uni->numeric = (field8[0] == '\0' ? "" : strdup (field8));
87 uni->mirrored = (field9[0] == 'Y');
88 uni->oldname = (field10[0] == '\0' ? "" : strdup (field10));
89 uni->comment = (field11[0] == '\0' ? "" : strdup (field11));
90 uni->upper = (field12[0] =='\0' ? NONE : strtoul (field12, NULL, 16));
91 uni->lower = (field13[0] =='\0' ? NONE : strtoul (field13, NULL, 16));
92 uni->title = (field14[0] =='\0' ? NONE : strtoul (field14, NULL, 16));
95 /* Maximum length of a field in the UnicodeData.txt file. */
96 #define FIELDLEN 120
98 /* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
99 Reads up to (but excluding) DELIM.
100 Returns 1 when a field was successfully read, otherwise 0. */
101 static int
102 getfield (FILE *stream, char *buffer, int delim)
104 int count = 0;
105 int c;
107 for (; (c = getc (stream)), (c != EOF && c != delim); )
109 /* The original unicode.org UnicodeData.txt file happens to have
110 CR/LF line terminators. Silently convert to LF. */
111 if (c == '\r')
112 continue;
114 /* Put c into the buffer. */
115 if (++count >= FIELDLEN - 1)
117 fprintf (stderr, "field too long\n");
118 exit (1);
120 *buffer++ = c;
123 if (c == EOF)
124 return 0;
126 *buffer = '\0';
127 return 1;
130 /* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
131 file. */
132 static void
133 fill_attributes (const char *unicodedata_filename)
135 unsigned int i, j;
136 FILE *stream;
137 char field0[FIELDLEN];
138 char field1[FIELDLEN];
139 char field2[FIELDLEN];
140 char field3[FIELDLEN];
141 char field4[FIELDLEN];
142 char field5[FIELDLEN];
143 char field6[FIELDLEN];
144 char field7[FIELDLEN];
145 char field8[FIELDLEN];
146 char field9[FIELDLEN];
147 char field10[FIELDLEN];
148 char field11[FIELDLEN];
149 char field12[FIELDLEN];
150 char field13[FIELDLEN];
151 char field14[FIELDLEN];
152 int lineno = 0;
154 for (i = 0; i < 0x10000; i++)
155 unicode_attributes[i].name = NULL;
157 stream = fopen (unicodedata_filename, "r");
158 if (stream == NULL)
160 fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
161 exit (1);
164 for (;;)
166 int n;
168 lineno++;
169 n = getfield (stream, field0, ';');
170 n += getfield (stream, field1, ';');
171 n += getfield (stream, field2, ';');
172 n += getfield (stream, field3, ';');
173 n += getfield (stream, field4, ';');
174 n += getfield (stream, field5, ';');
175 n += getfield (stream, field6, ';');
176 n += getfield (stream, field7, ';');
177 n += getfield (stream, field8, ';');
178 n += getfield (stream, field9, ';');
179 n += getfield (stream, field10, ';');
180 n += getfield (stream, field11, ';');
181 n += getfield (stream, field12, ';');
182 n += getfield (stream, field13, ';');
183 n += getfield (stream, field14, '\n');
184 if (n == 0)
185 break;
186 if (n != 15)
188 fprintf (stderr, "short line in'%s':%d\n",
189 unicodedata_filename, lineno);
190 exit (1);
192 i = strtoul (field0, NULL, 16);
193 if (field1[0] == '<'
194 && strlen (field1) >= 9
195 && !strcmp (field1 + strlen(field1) - 8, ", First>"))
197 /* Deal with a range. */
198 lineno++;
199 n = getfield (stream, field0, ';');
200 n += getfield (stream, field1, ';');
201 n += getfield (stream, field2, ';');
202 n += getfield (stream, field3, ';');
203 n += getfield (stream, field4, ';');
204 n += getfield (stream, field5, ';');
205 n += getfield (stream, field6, ';');
206 n += getfield (stream, field7, ';');
207 n += getfield (stream, field8, ';');
208 n += getfield (stream, field9, ';');
209 n += getfield (stream, field10, ';');
210 n += getfield (stream, field11, ';');
211 n += getfield (stream, field12, ';');
212 n += getfield (stream, field13, ';');
213 n += getfield (stream, field14, '\n');
214 if (n != 15)
216 fprintf (stderr, "missing end range in '%s':%d\n",
217 unicodedata_filename, lineno);
218 exit (1);
220 if (!(field1[0] == '<'
221 && strlen (field1) >= 8
222 && !strcmp (field1 + strlen (field1) - 7, ", Last>")))
224 fprintf (stderr, "missing end range in '%s':%d\n",
225 unicodedata_filename, lineno);
226 exit (1);
228 field1[strlen (field1) - 7] = '\0';
229 j = strtoul (field0, NULL, 16);
230 for (; i <= j; i++)
231 fill_attribute (i, field1+1, field2, field3, field4, field5,
232 field6, field7, field8, field9, field10,
233 field11, field12, field13, field14);
235 else
237 /* Single character line */
238 fill_attribute (i, field1, field2, field3, field4, field5,
239 field6, field7, field8, field9, field10,
240 field11, field12, field13, field14);
243 if (ferror (stream) || fclose (stream))
245 fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
246 exit (1);
250 /* The combining property from the PropList.txt file. */
251 char unicode_combining[0x10000];
253 /* Stores in unicode_combining[] the Combining property from the
254 PropList.txt file. */
255 static void
256 fill_combining (const char *proplist_filename)
258 unsigned int i;
259 FILE *stream;
260 char buf[100+1];
262 for (i = 0; i < 0x10000; i++)
263 unicode_combining[i] = 0;
265 stream = fopen (proplist_filename, "r");
266 if (stream == NULL)
268 fprintf (stderr, "error during fopen of '%s'\n", proplist_filename);
269 exit (1);
272 /* Search for the "Property dump for: 0x20000004 (Combining)" line. */
275 if (fscanf (stream, "%100[^\n]\n", buf) < 1)
277 fprintf (stderr, "no combining property found in '%s'\n",
278 proplist_filename);
279 exit (1);
282 while (strstr (buf, "(Combining)") == NULL);
284 for (;;)
286 unsigned int i1, i2;
288 if (fscanf (stream, "%100[^\n]\n", buf) < 1)
290 fprintf (stderr, "premature end of combining property in '%s'\n",
291 proplist_filename);
292 exit (1);
294 if (buf[0] == '*')
295 break;
296 if (strlen (buf) >= 10 && buf[4] == '.' && buf[5] == '.')
298 if (sscanf (buf, "%4X..%4X", &i1, &i2) < 2)
300 fprintf (stderr, "parse error in combining property in '%s'\n",
301 proplist_filename);
302 exit (1);
305 else if (strlen (buf) >= 4)
307 if (sscanf (buf, "%4X", &i1) < 1)
309 fprintf (stderr, "parse error in combining property in '%s'\n",
310 proplist_filename);
311 exit (1);
313 i2 = i1;
315 else
317 fprintf (stderr, "parse error in combining property in '%s'\n",
318 proplist_filename);
319 exit (1);
321 for (i = i1; i <= i2; i++)
322 unicode_combining[i] = 1;
324 if (ferror (stream) || fclose (stream))
326 fprintf (stderr, "error reading from '%s'\n", proplist_filename);
327 exit (1);
331 /* Character mappings. */
333 static unsigned int
334 to_upper (unsigned int ch)
336 if (unicode_attributes[ch].name != NULL
337 && unicode_attributes[ch].upper != NONE)
338 return unicode_attributes[ch].upper;
339 else
340 return ch;
343 static unsigned int
344 to_lower (unsigned int ch)
346 if (unicode_attributes[ch].name != NULL
347 && unicode_attributes[ch].lower != NONE)
348 return unicode_attributes[ch].lower;
349 else
350 return ch;
353 static unsigned int
354 to_title (unsigned int ch)
356 if (unicode_attributes[ch].name != NULL
357 && unicode_attributes[ch].title != NONE)
358 return unicode_attributes[ch].title;
359 else
360 return ch;
363 /* Character class properties. */
365 static bool
366 is_upper (unsigned int ch)
368 return (to_lower (ch) != ch);
371 static bool
372 is_lower (unsigned int ch)
374 return (to_upper (ch) != ch)
375 /* <U00DF> is lowercase, but without simple to_upper mapping. */
376 || (ch == 0x00DF);
379 static bool
380 is_alpha (unsigned int ch)
382 return (unicode_attributes[ch].name != NULL
383 && (unicode_attributes[ch].category[0] == 'L'
384 /* Avoid warning for <U0345>. */
385 || (ch == 0x0345)
386 /* Avoid warnings for <U2160>..<U217F>. */
387 || (unicode_attributes[ch].category[0] == 'N'
388 && unicode_attributes[ch].category[1] == 'l')
389 /* Avoid warnings for <U24B6>..<U24E9>. */
390 || (unicode_attributes[ch].category[0] == 'S'
391 && unicode_attributes[ch].category[1] == 'o'
392 && strstr (unicode_attributes[ch].name, " LETTER ")
393 != NULL)
394 /* Consider all the non-ASCII digits as alphabetic.
395 ISO C 99 forbids us to have them in category "digit",
396 but we want iswalnum to return true on them. */
397 || (unicode_attributes[ch].category[0] == 'N'
398 && unicode_attributes[ch].category[1] == 'd'
399 && !(ch >= 0x0030 && ch <= 0x0039))));
402 static bool
403 is_digit (unsigned int ch)
405 #if 0
406 return (unicode_attributes[ch].name != NULL
407 && unicode_attributes[ch].category[0] == 'N'
408 && unicode_attributes[ch].category[1] == 'd');
409 /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
410 a zero. Must add <0> in front of them by hand. */
411 #else
412 /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
413 takes it away:
414 7.25.2.1.5:
415 The iswdigit function tests for any wide character that corresponds
416 to a decimal-digit character (as defined in 5.2.1).
417 5.2.1:
418 the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
420 return (ch >= 0x0030 && ch <= 0x0039);
421 #endif
424 static bool
425 is_outdigit (unsigned int ch)
427 return (ch >= 0x0030 && ch <= 0x0039);
430 static bool
431 is_blank (unsigned int ch)
433 return (ch == 0x0009 /* '\t' */
434 /* Category Zs without mention of "<noBreak>" */
435 || (unicode_attributes[ch].name != NULL
436 && unicode_attributes[ch].category[0] == 'Z'
437 && unicode_attributes[ch].category[1] == 's'
438 && !strstr (unicode_attributes[ch].decomposition, "<noBreak>")));
441 static bool
442 is_space (unsigned int ch)
444 /* Don't make U+00A0 a space. Non-breaking space means that all programs
445 should treat it like a punctuation character, not like a space. */
446 return (ch == 0x0020 /* ' ' */
447 || ch == 0x000C /* '\f' */
448 || ch == 0x000A /* '\n' */
449 || ch == 0x000D /* '\r' */
450 || ch == 0x0009 /* '\t' */
451 || ch == 0x000B /* '\v' */
452 /* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
453 || (unicode_attributes[ch].name != NULL
454 && unicode_attributes[ch].category[0] == 'Z'
455 && (unicode_attributes[ch].category[1] == 'l'
456 || unicode_attributes[ch].category[1] == 'p'
457 || (unicode_attributes[ch].category[1] == 's'
458 && !strstr (unicode_attributes[ch].decomposition,
459 "<noBreak>")))));
462 static bool
463 is_cntrl (unsigned int ch)
465 return (unicode_attributes[ch].name != NULL
466 && (!strcmp (unicode_attributes[ch].name, "<control>")
467 /* Categories Zl and Zp */
468 || (unicode_attributes[ch].category[0] == 'Z'
469 && (unicode_attributes[ch].category[1] == 'l'
470 || unicode_attributes[ch].category[1] == 'p'))));
473 static bool
474 is_xdigit (unsigned int ch)
476 #if 0
477 return is_digit (ch)
478 || (ch >= 0x0041 && ch <= 0x0046)
479 || (ch >= 0x0061 && ch <= 0x0066);
480 #else
481 /* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99
482 takes it away:
483 7.25.2.1.12:
484 The iswxdigit function tests for any wide character that corresponds
485 to a hexadecimal-digit character (as defined in 6.4.4.1).
486 6.4.4.1:
487 hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
489 return (ch >= 0x0030 && ch <= 0x0039)
490 || (ch >= 0x0041 && ch <= 0x0046)
491 || (ch >= 0x0061 && ch <= 0x0066);
492 #endif
495 static bool
496 is_graph (unsigned int ch)
498 return (unicode_attributes[ch].name != NULL
499 && strcmp (unicode_attributes[ch].name, "<control>")
500 && !is_space (ch));
503 static bool
504 is_print (unsigned int ch)
506 return (unicode_attributes[ch].name != NULL
507 && strcmp (unicode_attributes[ch].name, "<control>")
508 /* Categories Zl and Zp */
509 && !(unicode_attributes[ch].name != NULL
510 && unicode_attributes[ch].category[0] == 'Z'
511 && (unicode_attributes[ch].category[1] == 'l'
512 || unicode_attributes[ch].category[1] == 'p')));
515 static bool
516 is_punct (unsigned int ch)
518 #if 0
519 return (unicode_attributes[ch].name != NULL
520 && unicode_attributes[ch].category[0] == 'P');
521 #else
522 /* The traditional POSIX definition of punctuation is every graphic,
523 non-alphanumeric character. */
524 return (is_graph (ch) && !is_alpha (ch) && !is_digit (ch));
525 #endif
528 static bool
529 is_combining (unsigned int ch)
531 return (unicode_attributes[ch].name != NULL
532 && unicode_combining[ch] != 0);
535 static bool
536 is_combining_level3 (unsigned int ch)
538 return is_combining (ch)
539 && !(unicode_attributes[ch].combining[0] != '\0'
540 && unicode_attributes[ch].combining[0] != '0'
541 && strtoul (unicode_attributes[ch].combining, NULL, 10) >= 200);
544 /* Output a character class (= property) table. */
546 static void
547 output_charclass (FILE *stream, const char *classname,
548 bool (*func) (unsigned int))
550 char table[0x10000];
551 unsigned int i;
552 bool need_semicolon;
553 const int max_column = 75;
554 int column;
556 for (i = 0; i < 0x10000; i++)
557 table[i] = (int) func (i);
559 fprintf (stream, "%s ", classname);
560 need_semicolon = false;
561 column = 1000;
562 for (i = 0; i < 0x10000; )
564 if (!table[i])
565 i++;
566 else
568 unsigned int low, high;
569 char buf[17];
571 low = i;
573 i++;
574 while (i < 0x10000 && table[i]);
575 high = i - 1;
577 if (low == high)
578 sprintf (buf, "<U%04X>", low);
579 else
580 sprintf (buf, "<U%04X>..<U%04X>", low, high);
582 if (need_semicolon)
584 fprintf (stream, ";");
585 column++;
588 if (column + strlen (buf) > max_column)
590 fprintf (stream, "/\n ");
591 column = 3;
594 fprintf (stream, "%s", buf);
595 column += strlen (buf);
596 need_semicolon = true;
599 fprintf (stream, "\n");
602 /* Output a character mapping table. */
604 static void
605 output_charmap (FILE *stream, const char *mapname,
606 unsigned int (*func) (unsigned int))
608 char table[0x10000];
609 unsigned int i;
610 bool need_semicolon;
611 const int max_column = 75;
612 int column;
614 for (i = 0; i < 0x10000; i++)
615 table[i] = (func (i) != i);
617 fprintf (stream, "%s ", mapname);
618 need_semicolon = false;
619 column = 1000;
620 for (i = 0; i < 0x10000; i++)
621 if (table[i])
623 char buf[18];
625 sprintf (buf, "(<U%04X>,<U%04X>)", i, func (i));
627 if (need_semicolon)
629 fprintf (stream, ";");
630 column++;
633 if (column + strlen (buf) > max_column)
635 fprintf (stream, "/\n ");
636 column = 3;
639 fprintf (stream, "%s", buf);
640 column += strlen (buf);
641 need_semicolon = true;
643 fprintf (stream, "\n");
646 /* Output the width table. */
648 static void
649 output_widthmap (FILE *stream)
653 /* Output the tables to the given file. */
655 static void
656 output_tables (const char *filename, const char *version)
658 FILE *stream;
659 unsigned int ch;
661 stream = fopen (filename, "w");
662 if (stream == NULL)
664 fprintf (stderr, "cannot open '%s' for writing\n", filename);
665 exit (1);
668 fprintf (stream, "escape_char /\n");
669 fprintf (stream, "comment_char %%\n");
670 fprintf (stream, "\n");
671 fprintf (stream, "%% Generated automatically by gen-unicode for Unicode %s.\n",
672 version);
673 fprintf (stream, "\n");
675 fprintf (stream, "LC_IDENTIFICATION\n");
676 fprintf (stream, "title \"Unicode %s FDCC-set\"\n", version);
677 fprintf (stream, "source \"UnicodeData.txt, PropList.txt\"\n");
678 fprintf (stream, "address \"\"\n");
679 fprintf (stream, "contact \"\"\n");
680 fprintf (stream, "email \"bug-glibc@gnu.org\"\n");
681 fprintf (stream, "tel \"\"\n");
682 fprintf (stream, "fax \"\"\n");
683 fprintf (stream, "language \"\"\n");
684 fprintf (stream, "territory \"Earth\"\n");
685 fprintf (stream, "revision \"%s\"\n", version);
687 time_t now;
688 char date[11];
689 now = time (NULL);
690 strftime (date, sizeof (date), "%Y-%m-%d", gmtime (&now));
691 fprintf (stream, "date \"%s\"\n", date);
693 fprintf (stream, "category \"unicode:2000\";LC_CTYPE\n");
694 fprintf (stream, "END LC_IDENTIFICATION\n");
695 fprintf (stream, "\n");
697 /* Verifications. */
698 for (ch = 0; ch < 0x10000; ch++)
700 /* toupper restriction: "Only characters specified for the keywords
701 lower and upper shall be specified. */
702 if (to_upper (ch) != ch && !(is_lower (ch) || is_upper (ch)))
703 fprintf (stderr,
704 "<U%04X> is not upper|lower but toupper(0x%04X) = 0x%04X\n",
705 ch, ch, to_upper (ch));
707 /* tolower restriction: "Only characters specified for the keywords
708 lower and upper shall be specified. */
709 if (to_lower (ch) != ch && !(is_lower (ch) || is_upper (ch)))
710 fprintf (stderr,
711 "<U%04X> is not upper|lower but tolower(0x%04X) = 0x%04X\n",
712 ch, ch, to_lower (ch));
714 /* alpha restriction: "Characters classified as either upper or lower
715 shall automatically belong to this class. */
716 if ((is_lower (ch) || is_upper (ch)) && !is_alpha (ch))
717 fprintf (stderr, "<U%04X> is upper|lower but not alpha\n", ch);
719 /* alpha restriction: "No character specified for the keywords cntrl,
720 digit, punct or space shall be specified." */
721 if (is_alpha (ch) && is_cntrl (ch))
722 fprintf (stderr, "<U%04X> is alpha and cntrl\n", ch);
723 if (is_alpha (ch) && is_digit (ch))
724 fprintf (stderr, "<U%04X> is alpha and digit\n", ch);
725 if (is_alpha (ch) && is_punct (ch))
726 fprintf (stderr, "<U%04X> is alpha and punct\n", ch);
727 if (is_alpha (ch) && is_space (ch))
728 fprintf (stderr, "<U%04X> is alpha and space\n", ch);
730 /* space restriction: "No character specified for the keywords upper,
731 lower, alpha, digit, graph or xdigit shall be specified."
732 upper, lower, alpha already checked above. */
733 if (is_space (ch) && is_digit (ch))
734 fprintf (stderr, "<U%04X> is space and digit\n", ch);
735 if (is_space (ch) && is_graph (ch))
736 fprintf (stderr, "<U%04X> is space and graph\n", ch);
737 if (is_space (ch) && is_xdigit (ch))
738 fprintf (stderr, "<U%04X> is space and xdigit\n", ch);
740 /* cntrl restriction: "No character specified for the keywords upper,
741 lower, alpha, digit, punct, graph, print or xdigit shall be
742 specified." upper, lower, alpha already checked above. */
743 if (is_cntrl (ch) && is_digit (ch))
744 fprintf (stderr, "<U%04X> is cntrl and digit\n", ch);
745 if (is_cntrl (ch) && is_punct (ch))
746 fprintf (stderr, "<U%04X> is cntrl and punct\n", ch);
747 if (is_cntrl (ch) && is_graph (ch))
748 fprintf (stderr, "<U%04X> is cntrl and graph\n", ch);
749 if (is_cntrl (ch) && is_print (ch))
750 fprintf (stderr, "<U%04X> is cntrl and print\n", ch);
751 if (is_cntrl (ch) && is_xdigit (ch))
752 fprintf (stderr, "<U%04X> is cntrl and xdigit\n", ch);
754 /* punct restriction: "No character specified for the keywords upper,
755 lower, alpha, digit, cntrl, xdigit or as the <space> character shall
756 be specified." upper, lower, alpha, cntrl already checked above. */
757 if (is_punct (ch) && is_digit (ch))
758 fprintf (stderr, "<U%04X> is punct and digit\n", ch);
759 if (is_punct (ch) && is_xdigit (ch))
760 fprintf (stderr, "<U%04X> is punct and xdigit\n", ch);
761 if (is_punct (ch) && (ch == 0x0020))
762 fprintf (stderr, "<U%04X> is punct\n", ch);
764 /* graph restriction: "No character specified for the keyword cntrl
765 shall be specified." Already checked above. */
767 /* print restriction: "No character specified for the keyword cntrl
768 shall be specified." Already checked above. */
770 /* graph - print relation: differ only in the <space> character.
771 How is this possible if there are more than one space character?!
772 I think susv2/xbd/locale.html should speak of "space characters",
773 not "space character". */
774 if (is_print (ch) && !(is_graph (ch) || /* ch == 0x0020 */ is_space (ch)))
775 fprintf (stderr, "<U%04X> is print but not graph|<space>\n", ch);
776 if (!is_print (ch) && (is_graph (ch) || ch == 0x0020))
777 fprintf (stderr, "<U%04X> is graph|<space> but not print\n", ch);
780 fprintf (stream, "LC_CTYPE\n");
781 output_charclass (stream, "upper", is_upper);
782 output_charclass (stream, "lower", is_lower);
783 output_charclass (stream, "alpha", is_alpha);
784 output_charclass (stream, "digit", is_digit);
785 output_charclass (stream, "outdigit", is_outdigit);
786 output_charclass (stream, "blank", is_blank);
787 output_charclass (stream, "space", is_space);
788 output_charclass (stream, "cntrl", is_cntrl);
789 output_charclass (stream, "punct", is_punct);
790 output_charclass (stream, "xdigit", is_xdigit);
791 output_charclass (stream, "graph", is_graph);
792 output_charclass (stream, "print", is_print);
793 output_charclass (stream, "class \"combining\";", is_combining);
794 output_charclass (stream, "class \"combining_level3\";", is_combining_level3);
795 output_charmap (stream, "toupper", to_upper);
796 output_charmap (stream, "tolower", to_lower);
797 output_charmap (stream, "map \"totitle\";", to_title);
798 output_widthmap (stream);
799 fprintf (stream, "END LC_CTYPE\n");
801 if (ferror (stream) || fclose (stream))
803 fprintf (stderr, "error writing to '%s'\n", filename);
804 exit (1);
809 main (int argc, char * argv[])
811 if (argc != 4)
813 fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt version\n",
814 argv[0]);
815 exit (1);
818 fill_attributes (argv[1]);
819 fill_combining (argv[2]);
821 output_tables ("unicode", argv[3]);
823 return 0;