1 /* Generate a Unicode conforming LC_CTYPE category from a UnicodeData file.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU UTF-8 Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 $ gen-unicode /usr/local/share/Unidata/UnicodeData.txt \
23 /usr/local/share/Unidata/PropList.txt \
33 /* This structure represents one line in the UnicodeData.txt file. */
34 struct unicode_attribute
36 const char *name
; /* Character name */
37 const char *category
; /* General category */
38 const char *combining
; /* Canonical combining classes */
39 const char *bidi
; /* Bidirectional category */
40 const char *decomposition
; /* Character decomposition mapping */
41 const char *decdigit
; /* Decimal digit value */
42 const char *digit
; /* Digit value */
43 const char *numeric
; /* Numeric value */
44 int mirrored
; /* mirrored */
45 const char *oldname
; /* Old Unicode 1.0 name */
46 const char *comment
; /* Comment */
47 unsigned int upper
; /* Uppercase mapping */
48 unsigned int lower
; /* Lowercase mapping */
49 unsigned int title
; /* Titlecase mapping */
52 /* Missing fields are represented with "" for strings, and NONE for
54 #define NONE (~(unsigned int)0)
56 /* The entire contents of the UnicodeData.txt file. */
57 struct unicode_attribute unicode_attributes
[0x10000];
59 /* Stores in unicode_attributes[i] the values from the given fields. */
61 fill_attribute (unsigned int i
,
62 const char *field1
, const char *field2
,
63 const char *field3
, const char *field4
,
64 const char *field5
, const char *field6
,
65 const char *field7
, const char *field8
,
66 const char *field9
, const char *field10
,
67 const char *field11
, const char *field12
,
68 const char *field13
, const char *field14
)
70 struct unicode_attribute
* uni
;
74 fprintf (stderr
, "index too large\n");
77 uni
= &unicode_attributes
[i
];
78 /* Copy the strings. */
79 uni
->name
= strdup (field1
);
80 uni
->category
= (field2
[0] == '\0' ? "" : strdup (field2
));
81 uni
->combining
= (field3
[0] == '\0' ? "" : strdup (field3
));
82 uni
->bidi
= (field4
[0] == '\0' ? "" : strdup (field4
));
83 uni
->decomposition
= (field5
[0] == '\0' ? "" : strdup (field5
));
84 uni
->decdigit
= (field6
[0] == '\0' ? "" : strdup (field6
));
85 uni
->digit
= (field7
[0] == '\0' ? "" : strdup (field7
));
86 uni
->numeric
= (field8
[0] == '\0' ? "" : strdup (field8
));
87 uni
->mirrored
= (field9
[0] == 'Y');
88 uni
->oldname
= (field10
[0] == '\0' ? "" : strdup (field10
));
89 uni
->comment
= (field11
[0] == '\0' ? "" : strdup (field11
));
90 uni
->upper
= (field12
[0] =='\0' ? NONE
: strtoul (field12
, NULL
, 16));
91 uni
->lower
= (field13
[0] =='\0' ? NONE
: strtoul (field13
, NULL
, 16));
92 uni
->title
= (field14
[0] =='\0' ? NONE
: strtoul (field14
, NULL
, 16));
95 /* Maximum length of a field in the UnicodeData.txt file. */
98 /* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
99 Reads up to (but excluding) DELIM.
100 Returns 1 when a field was successfully read, otherwise 0. */
102 getfield (FILE *stream
, char *buffer
, int delim
)
107 for (; (c
= getc (stream
)), (c
!= EOF
&& c
!= delim
); )
109 /* The original unicode.org UnicodeData.txt file happens to have
110 CR/LF line terminators. Silently convert to LF. */
114 /* Put c into the buffer. */
115 if (++count
>= FIELDLEN
- 1)
117 fprintf (stderr
, "field too long\n");
130 /* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
133 fill_attributes (const char *unicodedata_filename
)
137 char field0
[FIELDLEN
];
138 char field1
[FIELDLEN
];
139 char field2
[FIELDLEN
];
140 char field3
[FIELDLEN
];
141 char field4
[FIELDLEN
];
142 char field5
[FIELDLEN
];
143 char field6
[FIELDLEN
];
144 char field7
[FIELDLEN
];
145 char field8
[FIELDLEN
];
146 char field9
[FIELDLEN
];
147 char field10
[FIELDLEN
];
148 char field11
[FIELDLEN
];
149 char field12
[FIELDLEN
];
150 char field13
[FIELDLEN
];
151 char field14
[FIELDLEN
];
154 for (i
= 0; i
< 0x10000; i
++)
155 unicode_attributes
[i
].name
= NULL
;
157 stream
= fopen (unicodedata_filename
, "r");
160 fprintf (stderr
, "error during fopen of '%s'\n", unicodedata_filename
);
169 n
= getfield (stream
, field0
, ';');
170 n
+= getfield (stream
, field1
, ';');
171 n
+= getfield (stream
, field2
, ';');
172 n
+= getfield (stream
, field3
, ';');
173 n
+= getfield (stream
, field4
, ';');
174 n
+= getfield (stream
, field5
, ';');
175 n
+= getfield (stream
, field6
, ';');
176 n
+= getfield (stream
, field7
, ';');
177 n
+= getfield (stream
, field8
, ';');
178 n
+= getfield (stream
, field9
, ';');
179 n
+= getfield (stream
, field10
, ';');
180 n
+= getfield (stream
, field11
, ';');
181 n
+= getfield (stream
, field12
, ';');
182 n
+= getfield (stream
, field13
, ';');
183 n
+= getfield (stream
, field14
, '\n');
188 fprintf (stderr
, "short line in'%s':%d\n",
189 unicodedata_filename
, lineno
);
192 i
= strtoul (field0
, NULL
, 16);
194 && strlen (field1
) >= 9
195 && !strcmp (field1
+ strlen(field1
) - 8, ", First>"))
197 /* Deal with a range. */
199 n
= getfield (stream
, field0
, ';');
200 n
+= getfield (stream
, field1
, ';');
201 n
+= getfield (stream
, field2
, ';');
202 n
+= getfield (stream
, field3
, ';');
203 n
+= getfield (stream
, field4
, ';');
204 n
+= getfield (stream
, field5
, ';');
205 n
+= getfield (stream
, field6
, ';');
206 n
+= getfield (stream
, field7
, ';');
207 n
+= getfield (stream
, field8
, ';');
208 n
+= getfield (stream
, field9
, ';');
209 n
+= getfield (stream
, field10
, ';');
210 n
+= getfield (stream
, field11
, ';');
211 n
+= getfield (stream
, field12
, ';');
212 n
+= getfield (stream
, field13
, ';');
213 n
+= getfield (stream
, field14
, '\n');
216 fprintf (stderr
, "missing end range in '%s':%d\n",
217 unicodedata_filename
, lineno
);
220 if (!(field1
[0] == '<'
221 && strlen (field1
) >= 8
222 && !strcmp (field1
+ strlen (field1
) - 7, ", Last>")))
224 fprintf (stderr
, "missing end range in '%s':%d\n",
225 unicodedata_filename
, lineno
);
228 field1
[strlen (field1
) - 7] = '\0';
229 j
= strtoul (field0
, NULL
, 16);
231 fill_attribute (i
, field1
+1, field2
, field3
, field4
, field5
,
232 field6
, field7
, field8
, field9
, field10
,
233 field11
, field12
, field13
, field14
);
237 /* Single character line */
238 fill_attribute (i
, field1
, field2
, field3
, field4
, field5
,
239 field6
, field7
, field8
, field9
, field10
,
240 field11
, field12
, field13
, field14
);
243 if (ferror (stream
) || fclose (stream
))
245 fprintf (stderr
, "error reading from '%s'\n", unicodedata_filename
);
250 /* The combining property from the PropList.txt file. */
251 char unicode_combining
[0x10000];
253 /* Stores in unicode_combining[] the Combining property from the
254 PropList.txt file. */
256 fill_combining (const char *proplist_filename
)
262 for (i
= 0; i
< 0x10000; i
++)
263 unicode_combining
[i
] = 0;
265 stream
= fopen (proplist_filename
, "r");
268 fprintf (stderr
, "error during fopen of '%s'\n", proplist_filename
);
272 /* Search for the "Property dump for: 0x20000004 (Combining)" line. */
275 if (fscanf (stream
, "%100[^\n]\n", buf
) < 1)
277 fprintf (stderr
, "no combining property found in '%s'\n",
282 while (strstr (buf
, "(Combining)") == NULL
);
288 if (fscanf (stream
, "%100[^\n]\n", buf
) < 1)
290 fprintf (stderr
, "premature end of combining property in '%s'\n",
296 if (strlen (buf
) >= 10 && buf
[4] == '.' && buf
[5] == '.')
298 if (sscanf (buf
, "%4X..%4X", &i1
, &i2
) < 2)
300 fprintf (stderr
, "parse error in combining property in '%s'\n",
305 else if (strlen (buf
) >= 4)
307 if (sscanf (buf
, "%4X", &i1
) < 1)
309 fprintf (stderr
, "parse error in combining property in '%s'\n",
317 fprintf (stderr
, "parse error in combining property in '%s'\n",
321 for (i
= i1
; i
<= i2
; i
++)
322 unicode_combining
[i
] = 1;
324 if (ferror (stream
) || fclose (stream
))
326 fprintf (stderr
, "error reading from '%s'\n", proplist_filename
);
331 /* Character mappings. */
334 to_upper (unsigned int ch
)
336 if (unicode_attributes
[ch
].name
!= NULL
337 && unicode_attributes
[ch
].upper
!= NONE
)
338 return unicode_attributes
[ch
].upper
;
344 to_lower (unsigned int ch
)
346 if (unicode_attributes
[ch
].name
!= NULL
347 && unicode_attributes
[ch
].lower
!= NONE
)
348 return unicode_attributes
[ch
].lower
;
354 to_title (unsigned int ch
)
356 if (unicode_attributes
[ch
].name
!= NULL
357 && unicode_attributes
[ch
].title
!= NONE
)
358 return unicode_attributes
[ch
].title
;
363 /* Character class properties. */
366 is_upper (unsigned int ch
)
368 return (to_lower (ch
) != ch
);
372 is_lower (unsigned int ch
)
374 return (to_upper (ch
) != ch
)
375 /* <U00DF> is lowercase, but without simple to_upper mapping. */
380 is_alpha (unsigned int ch
)
382 return (unicode_attributes
[ch
].name
!= NULL
383 && (unicode_attributes
[ch
].category
[0] == 'L'
384 /* Avoid warning for <U0345>. */
386 /* Avoid warnings for <U2160>..<U217F>. */
387 || (unicode_attributes
[ch
].category
[0] == 'N'
388 && unicode_attributes
[ch
].category
[1] == 'l')
389 /* Avoid warnings for <U24B6>..<U24E9>. */
390 || (unicode_attributes
[ch
].category
[0] == 'S'
391 && unicode_attributes
[ch
].category
[1] == 'o'
392 && strstr (unicode_attributes
[ch
].name
, " LETTER ")
394 /* Consider all the non-ASCII digits as alphabetic.
395 ISO C 99 forbids us to have them in category "digit",
396 but we want iswalnum to return true on them. */
397 || (unicode_attributes
[ch
].category
[0] == 'N'
398 && unicode_attributes
[ch
].category
[1] == 'd'
399 && !(ch
>= 0x0030 && ch
<= 0x0039))));
403 is_digit (unsigned int ch
)
406 return (unicode_attributes
[ch
].name
!= NULL
407 && unicode_attributes
[ch
].category
[0] == 'N'
408 && unicode_attributes
[ch
].category
[1] == 'd');
409 /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
410 a zero. Must add <0> in front of them by hand. */
412 /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
415 The iswdigit function tests for any wide character that corresponds
416 to a decimal-digit character (as defined in 5.2.1).
418 the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
420 return (ch
>= 0x0030 && ch
<= 0x0039);
425 is_outdigit (unsigned int ch
)
427 return (ch
>= 0x0030 && ch
<= 0x0039);
431 is_blank (unsigned int ch
)
433 return (ch
== 0x0009 /* '\t' */
434 /* Category Zs without mention of "<noBreak>" */
435 || (unicode_attributes
[ch
].name
!= NULL
436 && unicode_attributes
[ch
].category
[0] == 'Z'
437 && unicode_attributes
[ch
].category
[1] == 's'
438 && !strstr (unicode_attributes
[ch
].decomposition
, "<noBreak>")));
442 is_space (unsigned int ch
)
444 /* Don't make U+00A0 a space. Non-breaking space means that all programs
445 should treat it like a punctuation character, not like a space. */
446 return (ch
== 0x0020 /* ' ' */
447 || ch
== 0x000C /* '\f' */
448 || ch
== 0x000A /* '\n' */
449 || ch
== 0x000D /* '\r' */
450 || ch
== 0x0009 /* '\t' */
451 || ch
== 0x000B /* '\v' */
452 /* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
453 || (unicode_attributes
[ch
].name
!= NULL
454 && unicode_attributes
[ch
].category
[0] == 'Z'
455 && (unicode_attributes
[ch
].category
[1] == 'l'
456 || unicode_attributes
[ch
].category
[1] == 'p'
457 || (unicode_attributes
[ch
].category
[1] == 's'
458 && !strstr (unicode_attributes
[ch
].decomposition
,
463 is_cntrl (unsigned int ch
)
465 return (unicode_attributes
[ch
].name
!= NULL
466 && (!strcmp (unicode_attributes
[ch
].name
, "<control>")
467 /* Categories Zl and Zp */
468 || (unicode_attributes
[ch
].category
[0] == 'Z'
469 && (unicode_attributes
[ch
].category
[1] == 'l'
470 || unicode_attributes
[ch
].category
[1] == 'p'))));
474 is_xdigit (unsigned int ch
)
478 || (ch
>= 0x0041 && ch
<= 0x0046)
479 || (ch
>= 0x0061 && ch
<= 0x0066);
481 /* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99
484 The iswxdigit function tests for any wide character that corresponds
485 to a hexadecimal-digit character (as defined in 6.4.4.1).
487 hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
489 return (ch
>= 0x0030 && ch
<= 0x0039)
490 || (ch
>= 0x0041 && ch
<= 0x0046)
491 || (ch
>= 0x0061 && ch
<= 0x0066);
496 is_graph (unsigned int ch
)
498 return (unicode_attributes
[ch
].name
!= NULL
499 && strcmp (unicode_attributes
[ch
].name
, "<control>")
504 is_print (unsigned int ch
)
506 return (unicode_attributes
[ch
].name
!= NULL
507 && strcmp (unicode_attributes
[ch
].name
, "<control>")
508 /* Categories Zl and Zp */
509 && !(unicode_attributes
[ch
].name
!= NULL
510 && unicode_attributes
[ch
].category
[0] == 'Z'
511 && (unicode_attributes
[ch
].category
[1] == 'l'
512 || unicode_attributes
[ch
].category
[1] == 'p')));
516 is_punct (unsigned int ch
)
519 return (unicode_attributes
[ch
].name
!= NULL
520 && unicode_attributes
[ch
].category
[0] == 'P');
522 /* The traditional POSIX definition of punctuation is every graphic,
523 non-alphanumeric character. */
524 return (is_graph (ch
) && !is_alpha (ch
) && !is_digit (ch
));
529 is_combining (unsigned int ch
)
531 return (unicode_attributes
[ch
].name
!= NULL
532 && unicode_combining
[ch
] != 0);
536 is_combining_level3 (unsigned int ch
)
538 return is_combining (ch
)
539 && !(unicode_attributes
[ch
].combining
[0] != '\0'
540 && unicode_attributes
[ch
].combining
[0] != '0'
541 && strtoul (unicode_attributes
[ch
].combining
, NULL
, 10) >= 200);
544 /* Output a character class (= property) table. */
547 output_charclass (FILE *stream
, const char *classname
,
548 bool (*func
) (unsigned int))
553 const int max_column
= 75;
556 for (i
= 0; i
< 0x10000; i
++)
557 table
[i
] = (int) func (i
);
559 fprintf (stream
, "%s ", classname
);
560 need_semicolon
= false;
562 for (i
= 0; i
< 0x10000; )
568 unsigned int low
, high
;
574 while (i
< 0x10000 && table
[i
]);
578 sprintf (buf
, "<U%04X>", low
);
580 sprintf (buf
, "<U%04X>..<U%04X>", low
, high
);
584 fprintf (stream
, ";");
588 if (column
+ strlen (buf
) > max_column
)
590 fprintf (stream
, "/\n ");
594 fprintf (stream
, "%s", buf
);
595 column
+= strlen (buf
);
596 need_semicolon
= true;
599 fprintf (stream
, "\n");
602 /* Output a character mapping table. */
605 output_charmap (FILE *stream
, const char *mapname
,
606 unsigned int (*func
) (unsigned int))
611 const int max_column
= 75;
614 for (i
= 0; i
< 0x10000; i
++)
615 table
[i
] = (func (i
) != i
);
617 fprintf (stream
, "%s ", mapname
);
618 need_semicolon
= false;
620 for (i
= 0; i
< 0x10000; i
++)
625 sprintf (buf
, "(<U%04X>,<U%04X>)", i
, func (i
));
629 fprintf (stream
, ";");
633 if (column
+ strlen (buf
) > max_column
)
635 fprintf (stream
, "/\n ");
639 fprintf (stream
, "%s", buf
);
640 column
+= strlen (buf
);
641 need_semicolon
= true;
643 fprintf (stream
, "\n");
646 /* Output the width table. */
649 output_widthmap (FILE *stream
)
653 /* Output the tables to the given file. */
656 output_tables (const char *filename
, const char *version
)
661 stream
= fopen (filename
, "w");
664 fprintf (stderr
, "cannot open '%s' for writing\n", filename
);
668 fprintf (stream
, "escape_char /\n");
669 fprintf (stream
, "comment_char %%\n");
670 fprintf (stream
, "\n");
671 fprintf (stream
, "%% Generated automatically by gen-unicode for Unicode %s.\n",
673 fprintf (stream
, "\n");
675 fprintf (stream
, "LC_IDENTIFICATION\n");
676 fprintf (stream
, "title \"Unicode %s FDCC-set\"\n", version
);
677 fprintf (stream
, "source \"UnicodeData.txt, PropList.txt\"\n");
678 fprintf (stream
, "address \"\"\n");
679 fprintf (stream
, "contact \"\"\n");
680 fprintf (stream
, "email \"bug-glibc@gnu.org\"\n");
681 fprintf (stream
, "tel \"\"\n");
682 fprintf (stream
, "fax \"\"\n");
683 fprintf (stream
, "language \"\"\n");
684 fprintf (stream
, "territory \"Earth\"\n");
685 fprintf (stream
, "revision \"%s\"\n", version
);
690 strftime (date
, sizeof (date
), "%Y-%m-%d", gmtime (&now
));
691 fprintf (stream
, "date \"%s\"\n", date
);
693 fprintf (stream
, "category \"unicode:2000\";LC_CTYPE\n");
694 fprintf (stream
, "END LC_IDENTIFICATION\n");
695 fprintf (stream
, "\n");
698 for (ch
= 0; ch
< 0x10000; ch
++)
700 /* toupper restriction: "Only characters specified for the keywords
701 lower and upper shall be specified. */
702 if (to_upper (ch
) != ch
&& !(is_lower (ch
) || is_upper (ch
)))
704 "<U%04X> is not upper|lower but toupper(0x%04X) = 0x%04X\n",
705 ch
, ch
, to_upper (ch
));
707 /* tolower restriction: "Only characters specified for the keywords
708 lower and upper shall be specified. */
709 if (to_lower (ch
) != ch
&& !(is_lower (ch
) || is_upper (ch
)))
711 "<U%04X> is not upper|lower but tolower(0x%04X) = 0x%04X\n",
712 ch
, ch
, to_lower (ch
));
714 /* alpha restriction: "Characters classified as either upper or lower
715 shall automatically belong to this class. */
716 if ((is_lower (ch
) || is_upper (ch
)) && !is_alpha (ch
))
717 fprintf (stderr
, "<U%04X> is upper|lower but not alpha\n", ch
);
719 /* alpha restriction: "No character specified for the keywords cntrl,
720 digit, punct or space shall be specified." */
721 if (is_alpha (ch
) && is_cntrl (ch
))
722 fprintf (stderr
, "<U%04X> is alpha and cntrl\n", ch
);
723 if (is_alpha (ch
) && is_digit (ch
))
724 fprintf (stderr
, "<U%04X> is alpha and digit\n", ch
);
725 if (is_alpha (ch
) && is_punct (ch
))
726 fprintf (stderr
, "<U%04X> is alpha and punct\n", ch
);
727 if (is_alpha (ch
) && is_space (ch
))
728 fprintf (stderr
, "<U%04X> is alpha and space\n", ch
);
730 /* space restriction: "No character specified for the keywords upper,
731 lower, alpha, digit, graph or xdigit shall be specified."
732 upper, lower, alpha already checked above. */
733 if (is_space (ch
) && is_digit (ch
))
734 fprintf (stderr
, "<U%04X> is space and digit\n", ch
);
735 if (is_space (ch
) && is_graph (ch
))
736 fprintf (stderr
, "<U%04X> is space and graph\n", ch
);
737 if (is_space (ch
) && is_xdigit (ch
))
738 fprintf (stderr
, "<U%04X> is space and xdigit\n", ch
);
740 /* cntrl restriction: "No character specified for the keywords upper,
741 lower, alpha, digit, punct, graph, print or xdigit shall be
742 specified." upper, lower, alpha already checked above. */
743 if (is_cntrl (ch
) && is_digit (ch
))
744 fprintf (stderr
, "<U%04X> is cntrl and digit\n", ch
);
745 if (is_cntrl (ch
) && is_punct (ch
))
746 fprintf (stderr
, "<U%04X> is cntrl and punct\n", ch
);
747 if (is_cntrl (ch
) && is_graph (ch
))
748 fprintf (stderr
, "<U%04X> is cntrl and graph\n", ch
);
749 if (is_cntrl (ch
) && is_print (ch
))
750 fprintf (stderr
, "<U%04X> is cntrl and print\n", ch
);
751 if (is_cntrl (ch
) && is_xdigit (ch
))
752 fprintf (stderr
, "<U%04X> is cntrl and xdigit\n", ch
);
754 /* punct restriction: "No character specified for the keywords upper,
755 lower, alpha, digit, cntrl, xdigit or as the <space> character shall
756 be specified." upper, lower, alpha, cntrl already checked above. */
757 if (is_punct (ch
) && is_digit (ch
))
758 fprintf (stderr
, "<U%04X> is punct and digit\n", ch
);
759 if (is_punct (ch
) && is_xdigit (ch
))
760 fprintf (stderr
, "<U%04X> is punct and xdigit\n", ch
);
761 if (is_punct (ch
) && (ch
== 0x0020))
762 fprintf (stderr
, "<U%04X> is punct\n", ch
);
764 /* graph restriction: "No character specified for the keyword cntrl
765 shall be specified." Already checked above. */
767 /* print restriction: "No character specified for the keyword cntrl
768 shall be specified." Already checked above. */
770 /* graph - print relation: differ only in the <space> character.
771 How is this possible if there are more than one space character?!
772 I think susv2/xbd/locale.html should speak of "space characters",
773 not "space character". */
774 if (is_print (ch
) && !(is_graph (ch
) || /* ch == 0x0020 */ is_space (ch
)))
775 fprintf (stderr
, "<U%04X> is print but not graph|<space>\n", ch
);
776 if (!is_print (ch
) && (is_graph (ch
) || ch
== 0x0020))
777 fprintf (stderr
, "<U%04X> is graph|<space> but not print\n", ch
);
780 fprintf (stream
, "LC_CTYPE\n");
781 output_charclass (stream
, "upper", is_upper
);
782 output_charclass (stream
, "lower", is_lower
);
783 output_charclass (stream
, "alpha", is_alpha
);
784 output_charclass (stream
, "digit", is_digit
);
785 output_charclass (stream
, "outdigit", is_outdigit
);
786 output_charclass (stream
, "blank", is_blank
);
787 output_charclass (stream
, "space", is_space
);
788 output_charclass (stream
, "cntrl", is_cntrl
);
789 output_charclass (stream
, "punct", is_punct
);
790 output_charclass (stream
, "xdigit", is_xdigit
);
791 output_charclass (stream
, "graph", is_graph
);
792 output_charclass (stream
, "print", is_print
);
793 output_charclass (stream
, "class \"combining\";", is_combining
);
794 output_charclass (stream
, "class \"combining_level3\";", is_combining_level3
);
795 output_charmap (stream
, "toupper", to_upper
);
796 output_charmap (stream
, "tolower", to_lower
);
797 output_charmap (stream
, "map \"totitle\";", to_title
);
798 output_widthmap (stream
);
799 fprintf (stream
, "END LC_CTYPE\n");
801 if (ferror (stream
) || fclose (stream
))
803 fprintf (stderr
, "error writing to '%s'\n", filename
);
809 main (int argc
, char * argv
[])
813 fprintf (stderr
, "Usage: %s UnicodeData.txt PropList.txt version\n",
818 fill_attributes (argv
[1]);
819 fill_combining (argv
[2]);
821 output_tables ("unicode", argv
[3]);