1 /* Generate a Unicode conforming LC_CTYPE category from a UnicodeData file.
2 Copyright (C) 2000-2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 $ gen-unicode /usr/local/share/Unidata/UnicodeData.txt 3.1
31 /* This structure represents one line in the UnicodeData.txt file. */
32 struct unicode_attribute
34 const char *name
; /* Character name */
35 const char *category
; /* General category */
36 const char *combining
; /* Canonical combining classes */
37 const char *bidi
; /* Bidirectional category */
38 const char *decomposition
; /* Character decomposition mapping */
39 const char *decdigit
; /* Decimal digit value */
40 const char *digit
; /* Digit value */
41 const char *numeric
; /* Numeric value */
42 int mirrored
; /* mirrored */
43 const char *oldname
; /* Old Unicode 1.0 name */
44 const char *comment
; /* Comment */
45 unsigned int upper
; /* Uppercase mapping */
46 unsigned int lower
; /* Lowercase mapping */
47 unsigned int title
; /* Titlecase mapping */
50 /* Missing fields are represented with "" for strings, and NONE for
52 #define NONE (~(unsigned int)0)
54 /* The entire contents of the UnicodeData.txt file. */
55 struct unicode_attribute unicode_attributes
[0x110000];
57 /* Stores in unicode_attributes[i] the values from the given fields. */
59 fill_attribute (unsigned int i
,
60 const char *field1
, const char *field2
,
61 const char *field3
, const char *field4
,
62 const char *field5
, const char *field6
,
63 const char *field7
, const char *field8
,
64 const char *field9
, const char *field10
,
65 const char *field11
, const char *field12
,
66 const char *field13
, const char *field14
)
68 struct unicode_attribute
* uni
;
72 fprintf (stderr
, "index too large\n");
75 if (strcmp (field2
, "Cs") == 0)
76 /* Surrogates are UTF-16 artefacts, not real characters. Ignore them. */
78 uni
= &unicode_attributes
[i
];
79 /* Copy the strings. */
80 uni
->name
= strdup (field1
);
81 uni
->category
= (field2
[0] == '\0' ? "" : strdup (field2
));
82 uni
->combining
= (field3
[0] == '\0' ? "" : strdup (field3
));
83 uni
->bidi
= (field4
[0] == '\0' ? "" : strdup (field4
));
84 uni
->decomposition
= (field5
[0] == '\0' ? "" : strdup (field5
));
85 uni
->decdigit
= (field6
[0] == '\0' ? "" : strdup (field6
));
86 uni
->digit
= (field7
[0] == '\0' ? "" : strdup (field7
));
87 uni
->numeric
= (field8
[0] == '\0' ? "" : strdup (field8
));
88 uni
->mirrored
= (field9
[0] == 'Y');
89 uni
->oldname
= (field10
[0] == '\0' ? "" : strdup (field10
));
90 uni
->comment
= (field11
[0] == '\0' ? "" : strdup (field11
));
91 uni
->upper
= (field12
[0] =='\0' ? NONE
: strtoul (field12
, NULL
, 16));
92 uni
->lower
= (field13
[0] =='\0' ? NONE
: strtoul (field13
, NULL
, 16));
93 uni
->title
= (field14
[0] =='\0' ? NONE
: strtoul (field14
, NULL
, 16));
96 /* Maximum length of a field in the UnicodeData.txt file. */
99 /* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
100 Reads up to (but excluding) DELIM.
101 Returns 1 when a field was successfully read, otherwise 0. */
103 getfield (FILE *stream
, char *buffer
, int delim
)
108 for (; (c
= getc (stream
)), (c
!= EOF
&& c
!= delim
); )
110 /* The original unicode.org UnicodeData.txt file happens to have
111 CR/LF line terminators. Silently convert to LF. */
115 /* Put c into the buffer. */
116 if (++count
>= FIELDLEN
- 1)
118 fprintf (stderr
, "field too long\n");
131 /* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
134 fill_attributes (const char *unicodedata_filename
)
138 char field0
[FIELDLEN
];
139 char field1
[FIELDLEN
];
140 char field2
[FIELDLEN
];
141 char field3
[FIELDLEN
];
142 char field4
[FIELDLEN
];
143 char field5
[FIELDLEN
];
144 char field6
[FIELDLEN
];
145 char field7
[FIELDLEN
];
146 char field8
[FIELDLEN
];
147 char field9
[FIELDLEN
];
148 char field10
[FIELDLEN
];
149 char field11
[FIELDLEN
];
150 char field12
[FIELDLEN
];
151 char field13
[FIELDLEN
];
152 char field14
[FIELDLEN
];
155 for (i
= 0; i
< 0x110000; i
++)
156 unicode_attributes
[i
].name
= NULL
;
158 stream
= fopen (unicodedata_filename
, "r");
161 fprintf (stderr
, "error during fopen of '%s'\n", unicodedata_filename
);
170 n
= getfield (stream
, field0
, ';');
171 n
+= getfield (stream
, field1
, ';');
172 n
+= getfield (stream
, field2
, ';');
173 n
+= getfield (stream
, field3
, ';');
174 n
+= getfield (stream
, field4
, ';');
175 n
+= getfield (stream
, field5
, ';');
176 n
+= getfield (stream
, field6
, ';');
177 n
+= getfield (stream
, field7
, ';');
178 n
+= getfield (stream
, field8
, ';');
179 n
+= getfield (stream
, field9
, ';');
180 n
+= getfield (stream
, field10
, ';');
181 n
+= getfield (stream
, field11
, ';');
182 n
+= getfield (stream
, field12
, ';');
183 n
+= getfield (stream
, field13
, ';');
184 n
+= getfield (stream
, field14
, '\n');
189 fprintf (stderr
, "short line in'%s':%d\n",
190 unicodedata_filename
, lineno
);
193 i
= strtoul (field0
, NULL
, 16);
195 && strlen (field1
) >= 9
196 && !strcmp (field1
+ strlen(field1
) - 8, ", First>"))
198 /* Deal with a range. */
200 n
= getfield (stream
, field0
, ';');
201 n
+= getfield (stream
, field1
, ';');
202 n
+= getfield (stream
, field2
, ';');
203 n
+= getfield (stream
, field3
, ';');
204 n
+= getfield (stream
, field4
, ';');
205 n
+= getfield (stream
, field5
, ';');
206 n
+= getfield (stream
, field6
, ';');
207 n
+= getfield (stream
, field7
, ';');
208 n
+= getfield (stream
, field8
, ';');
209 n
+= getfield (stream
, field9
, ';');
210 n
+= getfield (stream
, field10
, ';');
211 n
+= getfield (stream
, field11
, ';');
212 n
+= getfield (stream
, field12
, ';');
213 n
+= getfield (stream
, field13
, ';');
214 n
+= getfield (stream
, field14
, '\n');
217 fprintf (stderr
, "missing end range in '%s':%d\n",
218 unicodedata_filename
, lineno
);
221 if (!(field1
[0] == '<'
222 && strlen (field1
) >= 8
223 && !strcmp (field1
+ strlen (field1
) - 7, ", Last>")))
225 fprintf (stderr
, "missing end range in '%s':%d\n",
226 unicodedata_filename
, lineno
);
229 field1
[strlen (field1
) - 7] = '\0';
230 j
= strtoul (field0
, NULL
, 16);
232 fill_attribute (i
, field1
+1, field2
, field3
, field4
, field5
,
233 field6
, field7
, field8
, field9
, field10
,
234 field11
, field12
, field13
, field14
);
238 /* Single character line */
239 fill_attribute (i
, field1
, field2
, field3
, field4
, field5
,
240 field6
, field7
, field8
, field9
, field10
,
241 field11
, field12
, field13
, field14
);
244 if (ferror (stream
) || fclose (stream
))
246 fprintf (stderr
, "error reading from '%s'\n", unicodedata_filename
);
251 /* Character mappings. */
254 to_upper (unsigned int ch
)
256 if (unicode_attributes
[ch
].name
!= NULL
257 && unicode_attributes
[ch
].upper
!= NONE
)
258 return unicode_attributes
[ch
].upper
;
264 to_lower (unsigned int ch
)
266 if (unicode_attributes
[ch
].name
!= NULL
267 && unicode_attributes
[ch
].lower
!= NONE
)
268 return unicode_attributes
[ch
].lower
;
274 to_title (unsigned int ch
)
276 if (unicode_attributes
[ch
].name
!= NULL
277 && unicode_attributes
[ch
].title
!= NONE
)
278 return unicode_attributes
[ch
].title
;
283 /* Character class properties. */
286 is_upper (unsigned int ch
)
288 return (to_lower (ch
) != ch
);
292 is_lower (unsigned int ch
)
294 return (to_upper (ch
) != ch
)
295 /* <U00DF> is lowercase, but without simple to_upper mapping. */
300 is_alpha (unsigned int ch
)
302 return (unicode_attributes
[ch
].name
!= NULL
303 && ((unicode_attributes
[ch
].category
[0] == 'L'
304 /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
305 <U0E2F>, <U0E46> should belong to is_punct. */
306 && (ch
!= 0x0E2F) && (ch
!= 0x0E46))
307 /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
308 <U0E31>, <U0E34>..<U0E3A>, <U0E47>..<U0E4E> are is_alpha. */
310 || (ch
>= 0x0E34 && ch
<= 0x0E3A)
311 || (ch
>= 0x0E47 && ch
<= 0x0E4E)
312 /* Avoid warning for <U0345>. */
314 /* Avoid warnings for <U2160>..<U217F>. */
315 || (unicode_attributes
[ch
].category
[0] == 'N'
316 && unicode_attributes
[ch
].category
[1] == 'l')
317 /* Avoid warnings for <U24B6>..<U24E9>. */
318 || (unicode_attributes
[ch
].category
[0] == 'S'
319 && unicode_attributes
[ch
].category
[1] == 'o'
320 && strstr (unicode_attributes
[ch
].name
, " LETTER ")
322 /* Consider all the non-ASCII digits as alphabetic.
323 ISO C 99 forbids us to have them in category "digit",
324 but we want iswalnum to return true on them. */
325 || (unicode_attributes
[ch
].category
[0] == 'N'
326 && unicode_attributes
[ch
].category
[1] == 'd'
327 && !(ch
>= 0x0030 && ch
<= 0x0039))));
331 is_digit (unsigned int ch
)
334 return (unicode_attributes
[ch
].name
!= NULL
335 && unicode_attributes
[ch
].category
[0] == 'N'
336 && unicode_attributes
[ch
].category
[1] == 'd');
337 /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
338 a zero. Must add <0> in front of them by hand. */
340 /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
343 The iswdigit function tests for any wide character that corresponds
344 to a decimal-digit character (as defined in 5.2.1).
346 the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
348 return (ch
>= 0x0030 && ch
<= 0x0039);
353 is_outdigit (unsigned int ch
)
355 return (ch
>= 0x0030 && ch
<= 0x0039);
359 is_blank (unsigned int ch
)
361 return (ch
== 0x0009 /* '\t' */
362 /* Category Zs without mention of "<noBreak>" */
363 || (unicode_attributes
[ch
].name
!= NULL
364 && unicode_attributes
[ch
].category
[0] == 'Z'
365 && unicode_attributes
[ch
].category
[1] == 's'
366 && !strstr (unicode_attributes
[ch
].decomposition
, "<noBreak>")));
370 is_space (unsigned int ch
)
372 /* Don't make U+00A0 a space. Non-breaking space means that all programs
373 should treat it like a punctuation character, not like a space. */
374 return (ch
== 0x0020 /* ' ' */
375 || ch
== 0x000C /* '\f' */
376 || ch
== 0x000A /* '\n' */
377 || ch
== 0x000D /* '\r' */
378 || ch
== 0x0009 /* '\t' */
379 || ch
== 0x000B /* '\v' */
380 /* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
381 || (unicode_attributes
[ch
].name
!= NULL
382 && unicode_attributes
[ch
].category
[0] == 'Z'
383 && (unicode_attributes
[ch
].category
[1] == 'l'
384 || unicode_attributes
[ch
].category
[1] == 'p'
385 || (unicode_attributes
[ch
].category
[1] == 's'
386 && !strstr (unicode_attributes
[ch
].decomposition
,
391 is_cntrl (unsigned int ch
)
393 return (unicode_attributes
[ch
].name
!= NULL
394 && (!strcmp (unicode_attributes
[ch
].name
, "<control>")
395 /* Categories Zl and Zp */
396 || (unicode_attributes
[ch
].category
[0] == 'Z'
397 && (unicode_attributes
[ch
].category
[1] == 'l'
398 || unicode_attributes
[ch
].category
[1] == 'p'))));
402 is_xdigit (unsigned int ch
)
406 || (ch
>= 0x0041 && ch
<= 0x0046)
407 || (ch
>= 0x0061 && ch
<= 0x0066);
409 /* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99
412 The iswxdigit function tests for any wide character that corresponds
413 to a hexadecimal-digit character (as defined in 6.4.4.1).
415 hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
417 return (ch
>= 0x0030 && ch
<= 0x0039)
418 || (ch
>= 0x0041 && ch
<= 0x0046)
419 || (ch
>= 0x0061 && ch
<= 0x0066);
424 is_graph (unsigned int ch
)
426 return (unicode_attributes
[ch
].name
!= NULL
427 && strcmp (unicode_attributes
[ch
].name
, "<control>")
432 is_print (unsigned int ch
)
434 return (unicode_attributes
[ch
].name
!= NULL
435 && strcmp (unicode_attributes
[ch
].name
, "<control>")
436 /* Categories Zl and Zp */
437 && !(unicode_attributes
[ch
].name
!= NULL
438 && unicode_attributes
[ch
].category
[0] == 'Z'
439 && (unicode_attributes
[ch
].category
[1] == 'l'
440 || unicode_attributes
[ch
].category
[1] == 'p')));
444 is_punct (unsigned int ch
)
447 return (unicode_attributes
[ch
].name
!= NULL
448 && unicode_attributes
[ch
].category
[0] == 'P');
450 /* The traditional POSIX definition of punctuation is every graphic,
451 non-alphanumeric character. */
452 return (is_graph (ch
) && !is_alpha (ch
) && !is_digit (ch
));
457 is_combining (unsigned int ch
)
459 /* Up to Unicode 3.0.1 we took the Combining property from the PropList.txt
460 file. In 3.0.1 it was identical to the union of the general categories
461 "Mn", "Mc", "Me". In Unicode 3.1 this property has been dropped from the
462 PropList.txt file, so we take the latter definition. */
463 return (unicode_attributes
[ch
].name
!= NULL
464 && unicode_attributes
[ch
].category
[0] == 'M'
465 && (unicode_attributes
[ch
].category
[1] == 'n'
466 || unicode_attributes
[ch
].category
[1] == 'c'
467 || unicode_attributes
[ch
].category
[1] == 'e'));
471 is_combining_level3 (unsigned int ch
)
473 return is_combining (ch
)
474 && !(unicode_attributes
[ch
].combining
[0] != '\0'
475 && unicode_attributes
[ch
].combining
[0] != '0'
476 && strtoul (unicode_attributes
[ch
].combining
, NULL
, 10) >= 200);
479 /* Return the UCS symbol string for a Unicode character. */
481 ucs_symbol (unsigned int i
)
483 static char buf
[11+1];
485 sprintf (buf
, (i
< 0x10000 ? "<U%04X>" : "<U%08X>"), i
);
489 /* Return the UCS symbol range string for a Unicode characters interval. */
491 ucs_symbol_range (unsigned int low
, unsigned int high
)
493 static char buf
[24+1];
495 strcpy (buf
, ucs_symbol (low
));
497 strcat (buf
, ucs_symbol (high
));
501 /* Output a character class (= property) table. */
504 output_charclass (FILE *stream
, const char *classname
,
505 bool (*func
) (unsigned int))
507 char table
[0x110000];
510 const int max_column
= 75;
513 for (i
= 0; i
< 0x110000; i
++)
514 table
[i
] = (int) func (i
);
516 fprintf (stream
, "%s ", classname
);
517 need_semicolon
= false;
519 for (i
= 0; i
< 0x110000; )
525 unsigned int low
, high
;
531 while (i
< 0x110000 && table
[i
]);
535 strcpy (buf
, ucs_symbol (low
));
537 strcpy (buf
, ucs_symbol_range (low
, high
));
541 fprintf (stream
, ";");
545 if (column
+ strlen (buf
) > max_column
)
547 fprintf (stream
, "/\n ");
551 fprintf (stream
, "%s", buf
);
552 column
+= strlen (buf
);
553 need_semicolon
= true;
556 fprintf (stream
, "\n");
559 /* Output a character mapping table. */
562 output_charmap (FILE *stream
, const char *mapname
,
563 unsigned int (*func
) (unsigned int))
565 char table
[0x110000];
568 const int max_column
= 75;
571 for (i
= 0; i
< 0x110000; i
++)
572 table
[i
] = (func (i
) != i
);
574 fprintf (stream
, "%s ", mapname
);
575 need_semicolon
= false;
577 for (i
= 0; i
< 0x110000; i
++)
583 strcat (buf
, ucs_symbol (i
));
585 strcat (buf
, ucs_symbol (func (i
)));
590 fprintf (stream
, ";");
594 if (column
+ strlen (buf
) > max_column
)
596 fprintf (stream
, "/\n ");
600 fprintf (stream
, "%s", buf
);
601 column
+= strlen (buf
);
602 need_semicolon
= true;
604 fprintf (stream
, "\n");
607 /* Output the width table. */
610 output_widthmap (FILE *stream
)
614 /* Output the tables to the given file. */
617 output_tables (const char *filename
, const char *version
)
622 stream
= fopen (filename
, "w");
625 fprintf (stderr
, "cannot open '%s' for writing\n", filename
);
629 fprintf (stream
, "escape_char /\n");
630 fprintf (stream
, "comment_char %%\n");
631 fprintf (stream
, "\n");
632 fprintf (stream
, "%% Generated automatically by gen-unicode-ctype for Unicode %s.\n",
634 fprintf (stream
, "\n");
636 fprintf (stream
, "LC_IDENTIFICATION\n");
637 fprintf (stream
, "title \"Unicode %s FDCC-set\"\n", version
);
638 fprintf (stream
, "source \"UnicodeData.txt, PropList.txt\"\n");
639 fprintf (stream
, "address \"\"\n");
640 fprintf (stream
, "contact \"\"\n");
641 fprintf (stream
, "email \"bug-glibc@gnu.org\"\n");
642 fprintf (stream
, "tel \"\"\n");
643 fprintf (stream
, "fax \"\"\n");
644 fprintf (stream
, "language \"\"\n");
645 fprintf (stream
, "territory \"Earth\"\n");
646 fprintf (stream
, "revision \"%s\"\n", version
);
651 strftime (date
, sizeof (date
), "%Y-%m-%d", gmtime (&now
));
652 fprintf (stream
, "date \"%s\"\n", date
);
654 fprintf (stream
, "category \"unicode:2001\";LC_CTYPE\n");
655 fprintf (stream
, "END LC_IDENTIFICATION\n");
656 fprintf (stream
, "\n");
659 for (ch
= 0; ch
< 0x110000; ch
++)
661 /* toupper restriction: "Only characters specified for the keywords
662 lower and upper shall be specified. */
663 if (to_upper (ch
) != ch
&& !(is_lower (ch
) || is_upper (ch
)))
665 "%s is not upper|lower but toupper(0x%04X) = 0x%04X\n",
666 ucs_symbol (ch
), ch
, to_upper (ch
));
668 /* tolower restriction: "Only characters specified for the keywords
669 lower and upper shall be specified. */
670 if (to_lower (ch
) != ch
&& !(is_lower (ch
) || is_upper (ch
)))
672 "%s is not upper|lower but tolower(0x%04X) = 0x%04X\n",
673 ucs_symbol (ch
), ch
, to_lower (ch
));
675 /* alpha restriction: "Characters classified as either upper or lower
676 shall automatically belong to this class. */
677 if ((is_lower (ch
) || is_upper (ch
)) && !is_alpha (ch
))
678 fprintf (stderr
, "%s is upper|lower but not alpha\n", ucs_symbol (ch
));
680 /* alpha restriction: "No character specified for the keywords cntrl,
681 digit, punct or space shall be specified." */
682 if (is_alpha (ch
) && is_cntrl (ch
))
683 fprintf (stderr
, "%s is alpha and cntrl\n", ucs_symbol (ch
));
684 if (is_alpha (ch
) && is_digit (ch
))
685 fprintf (stderr
, "%s is alpha and digit\n", ucs_symbol (ch
));
686 if (is_alpha (ch
) && is_punct (ch
))
687 fprintf (stderr
, "%s is alpha and punct\n", ucs_symbol (ch
));
688 if (is_alpha (ch
) && is_space (ch
))
689 fprintf (stderr
, "%s is alpha and space\n", ucs_symbol (ch
));
691 /* space restriction: "No character specified for the keywords upper,
692 lower, alpha, digit, graph or xdigit shall be specified."
693 upper, lower, alpha already checked above. */
694 if (is_space (ch
) && is_digit (ch
))
695 fprintf (stderr
, "%s is space and digit\n", ucs_symbol (ch
));
696 if (is_space (ch
) && is_graph (ch
))
697 fprintf (stderr
, "%s is space and graph\n", ucs_symbol (ch
));
698 if (is_space (ch
) && is_xdigit (ch
))
699 fprintf (stderr
, "%s is space and xdigit\n", ucs_symbol (ch
));
701 /* cntrl restriction: "No character specified for the keywords upper,
702 lower, alpha, digit, punct, graph, print or xdigit shall be
703 specified." upper, lower, alpha already checked above. */
704 if (is_cntrl (ch
) && is_digit (ch
))
705 fprintf (stderr
, "%s is cntrl and digit\n", ucs_symbol (ch
));
706 if (is_cntrl (ch
) && is_punct (ch
))
707 fprintf (stderr
, "%s is cntrl and punct\n", ucs_symbol (ch
));
708 if (is_cntrl (ch
) && is_graph (ch
))
709 fprintf (stderr
, "%s is cntrl and graph\n", ucs_symbol (ch
));
710 if (is_cntrl (ch
) && is_print (ch
))
711 fprintf (stderr
, "%s is cntrl and print\n", ucs_symbol (ch
));
712 if (is_cntrl (ch
) && is_xdigit (ch
))
713 fprintf (stderr
, "%s is cntrl and xdigit\n", ucs_symbol (ch
));
715 /* punct restriction: "No character specified for the keywords upper,
716 lower, alpha, digit, cntrl, xdigit or as the <space> character shall
717 be specified." upper, lower, alpha, cntrl already checked above. */
718 if (is_punct (ch
) && is_digit (ch
))
719 fprintf (stderr
, "%s is punct and digit\n", ucs_symbol (ch
));
720 if (is_punct (ch
) && is_xdigit (ch
))
721 fprintf (stderr
, "%s is punct and xdigit\n", ucs_symbol (ch
));
722 if (is_punct (ch
) && (ch
== 0x0020))
723 fprintf (stderr
, "%s is punct\n", ucs_symbol (ch
));
725 /* graph restriction: "No character specified for the keyword cntrl
726 shall be specified." Already checked above. */
728 /* print restriction: "No character specified for the keyword cntrl
729 shall be specified." Already checked above. */
731 /* graph - print relation: differ only in the <space> character.
732 How is this possible if there are more than one space character?!
733 I think susv2/xbd/locale.html should speak of "space characters",
734 not "space character". */
735 if (is_print (ch
) && !(is_graph (ch
) || /* ch == 0x0020 */ is_space (ch
)))
737 "%s is print but not graph|<space>\n", ucs_symbol (ch
));
738 if (!is_print (ch
) && (is_graph (ch
) || ch
== 0x0020))
740 "%s is graph|<space> but not print\n", ucs_symbol (ch
));
743 fprintf (stream
, "LC_CTYPE\n");
744 output_charclass (stream
, "upper", is_upper
);
745 output_charclass (stream
, "lower", is_lower
);
746 output_charclass (stream
, "alpha", is_alpha
);
747 output_charclass (stream
, "digit", is_digit
);
748 output_charclass (stream
, "outdigit", is_outdigit
);
749 output_charclass (stream
, "blank", is_blank
);
750 output_charclass (stream
, "space", is_space
);
751 output_charclass (stream
, "cntrl", is_cntrl
);
752 output_charclass (stream
, "punct", is_punct
);
753 output_charclass (stream
, "xdigit", is_xdigit
);
754 output_charclass (stream
, "graph", is_graph
);
755 output_charclass (stream
, "print", is_print
);
756 output_charclass (stream
, "class \"combining\";", is_combining
);
757 output_charclass (stream
, "class \"combining_level3\";", is_combining_level3
);
758 output_charmap (stream
, "toupper", to_upper
);
759 output_charmap (stream
, "tolower", to_lower
);
760 output_charmap (stream
, "map \"totitle\";", to_title
);
761 output_widthmap (stream
);
762 fprintf (stream
, "END LC_CTYPE\n");
764 if (ferror (stream
) || fclose (stream
))
766 fprintf (stderr
, "error writing to '%s'\n", filename
);
772 main (int argc
, char * argv
[])
776 fprintf (stderr
, "Usage: %s UnicodeData.txt version\n", argv
[0]);
780 fill_attributes (argv
[1]);
782 output_tables ("unicode", argv
[2]);