2 * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
4 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
21 #include "include/bits/uClibc_ctype.h"
23 /* TODO: maybe support -v like gen_wctype.c */
24 #define verbose_msg(msg...) if (verbose) fprintf(stderr, msg)
26 /* #define CTYPE_PACKED */
27 #define UPLOW_IDX_SHIFT 3
28 /* best if 2 unpacked or 3 packed */
29 #define CTYPE_IDX_SHIFT 3
30 /* 3 or 4 are very similar */
31 #define C2WC_IDX_SHIFT 3
33 #define CTYPE_IDX_LEN (128 >> (CTYPE_IDX_SHIFT))
34 #define UPLOW_IDX_LEN (128 >> (UPLOW_IDX_SHIFT))
35 #define C2WC_IDX_LEN (128 >> (C2WC_IDX_SHIFT))
37 /* #ifdef CTYPE_PACKED */
38 /* #define CTYPE_ROW_LEN (1 << ((CTYPE_IDX_SHIFT)-1)) */
40 #define CTYPE_ROW_LEN (1 << (CTYPE_IDX_SHIFT))
42 #define UPLOW_ROW_LEN (1 << (UPLOW_IDX_SHIFT))
43 #define C2WC_ROW_LEN (1 << (C2WC_IDX_SHIFT))
47 #define MAX_WCHAR (0x2600-1)
49 static unsigned char ctype_tbl
[256 * CTYPE_ROW_LEN
];
50 static unsigned char uplow_tbl
[256 * UPLOW_ROW_LEN
];
52 static unsigned short c2wc_tbl
[256 * C2WC_ROW_LEN
];
54 static unsigned char tt
[MAX_WCHAR
+1];
55 static unsigned char ti
[MAX_WCHAR
+1];
56 static unsigned char xi
[MAX_WCHAR
+1];
58 static int n_ctype_rows
;
59 static int n_uplow_rows
;
61 static int n_c2wc_rows
;
66 #define RANGE MAX_WCHAR
71 #define II_LEN ((MAX_WCHAR+1) >> (TT_SHIFT+TI_SHIFT))
74 unsigned long c2w
[256];
75 unsigned char w2c
[MAX_WCHAR
];
76 unsigned char ii
[II_LEN
];
77 unsigned char ctype_idx
[CTYPE_IDX_LEN
];
78 unsigned char uplow_idx
[UPLOW_IDX_LEN
];
79 unsigned char c2wc_idx
[C2WC_IDX_LEN
];
82 int main(int argc
, char **argv
)
86 unsigned long max_wchar
;
92 unsigned char row
[256];
94 unsigned short wrow
[256];
96 char codeset_list
[500];
97 char codeset_index
[30];
98 int codeset_list_end
= 0;
101 if (!setlocale(LC_CTYPE
, "en_US.UTF-8")) {
102 /* Silly foreigners disabling en_US locales */
103 FILE *fp
= popen("locale -a", "r");
111 if (fgets(buf
, sizeof(buf
) - 10, fp
) == NULL
)
115 if (len
> 0 && buf
[len
- 1] == '\n')
117 if (len
< 5 || strcasecmp(&buf
[len
-5], ".UTF8") != 0)
118 strcat(buf
, ".UTF8");
119 if (setlocale(LC_CTYPE
, buf
))
124 printf("could not find a UTF8 locale ... please enable en_US.UTF-8\n");
131 printf("#undef __CTYPE_HAS_8_BIT_LOCALES\n\n");
133 printf("#define __LOCALE_DATA_NUM_CODESETS\t\t0\n");
134 printf("#define __LOCALE_DATA_CODESET_LIST\t\t\"\"\n");
136 printf("#define __CTYPE_HAS_8_BIT_LOCALES\t\t1\n\n");
139 printf("#define __LOCALE_DATA_Cctype_IDX_SHIFT\t%d\n", CTYPE_IDX_SHIFT
);
140 printf("#define __LOCALE_DATA_Cctype_IDX_LEN\t\t%d\n", CTYPE_IDX_LEN
);
142 printf("#define __LOCALE_DATA_Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN
>> 1);
143 printf("#define __LOCALE_DATA_Cctype_PACKED\t\t1\n");
145 printf("#define __LOCALE_DATA_Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN
);
146 printf("#undef __LOCALE_DATA_Cctype_PACKED\n");
149 printf("\n#define __LOCALE_DATA_Cuplow_IDX_SHIFT\t%d\n", UPLOW_IDX_SHIFT
);
150 printf("#define __LOCALE_DATA_Cuplow_IDX_LEN\t\t%d\n", UPLOW_IDX_LEN
);
151 printf("#define __LOCALE_DATA_Cuplow_ROW_LEN\t\t%d\n", UPLOW_ROW_LEN
);
154 printf("\n#define __LOCALE_DATA_Cc2wc_IDX_LEN\t\t%d\n", C2WC_IDX_LEN
);
155 printf("#define __LOCALE_DATA_Cc2wc_IDX_SHIFT\t\t%d\n", C2WC_IDX_SHIFT
);
156 printf("#define __LOCALE_DATA_Cc2wc_ROW_LEN\t\t%d\n", C2WC_ROW_LEN
);
159 printf("\ntypedef struct {\n");
160 printf("\tunsigned char idx8ctype[%d];\n", CTYPE_IDX_LEN
);
161 printf("\tunsigned char idx8uplow[%d];\n", UPLOW_IDX_LEN
);
163 printf("\tunsigned char idx8c2wc[%d];\n", C2WC_IDX_LEN
);
164 printf("\tunsigned char idx8wc2c[%d];\n", II_LEN
);
167 printf("} __codeset_8_bit_t;\n\n");
169 printf("} __attribute__((__packed__)) __codeset_8_bit_t;\n\n");
170 #endif /* __metag__ */
172 printf("#ifdef WANT_DATA\n\n");
173 printf("static const __codeset_8_bit_t codeset_8_bit[%d] = {\n", argc
-1);
177 codeset_index
[0] = 0;
179 if (!(fp
= fopen(*++argv
,"r"))) {
180 fprintf(stderr
, "cannot open file \"%s\"\n", *argv
);
183 fprintf(stderr
, "processing %s... ", *argv
);
190 s0
= strrchr(*argv
, '/');
196 s1
= strrchr(s0
, '.');
203 /* if ((numsets == 0) && strncmp("ASCII", s0, n)) { */
204 /* printf("error - first codeset isn't ASCII!\n"); */
205 /* return EXIT_FAILURE; */
208 if (numsets
>= sizeof(codeset_index
)) {
209 fprintf(stderr
, "error - too many codesets!\n");
213 if (codeset_list_end
+ n
+ 1 + numsets
+ 1 + 1 >= 256) {
214 fprintf(stderr
, "error - codeset list to big!\n");
218 codeset_index
[numsets
+1] = codeset_index
[numsets
] + n
+1;
219 strncpy(codeset_list
+ codeset_list_end
, s0
, n
);
220 codeset_list_end
+= (n
+1);
221 codeset_list
[codeset_list_end
- 1] = 0;
223 printf("\t{ /* %.*s */", n
, s0
);
226 memset(&csd
[numsets
], 0, sizeof(charset_data
));
227 memset(xi
, 0, sizeof(xi
));
232 while (fgets(buf
,sizeof(buf
),fp
)) {
233 if ((2 != sscanf(buf
, "{ %lx , %lx", &c
, &wc
))
234 || (c
>= 256) || (wc
> MAX_WCHAR
)) {
235 fprintf(stderr
, "error: scanf failure! \"%s\"\n", buf
);
239 /* don't put in w2c... dynamicly build tt instead. */
241 if (c
<= 0x7f) { /* check the 7bit entries but don't store */
243 fprintf(stderr
, "error: c != wc in %s\n", buf
);
246 csd
[numsets
].c2w
[c
] = wc
;
247 csd
[numsets
].w2c
[wc
] = 0; /* ignore */
248 if (wc
> max_wchar
) {
252 csd
[numsets
].c2w
[c
] = wc
;
253 csd
[numsets
].w2c
[wc
] = c
;
254 if (wc
> max_wchar
) {
260 fprintf(stderr
, "%d lines ", lines
);
262 for (i
= 0 ; i
<= MAX_WCHAR
; i
+= (1 << TT_SHIFT
)) {
263 p
= &csd
[numsets
].w2c
[i
];
264 for (j
= 0 ; j
< tt_num
; j
++) {
265 if (!memcmp(p
, &tt
[j
<< TT_SHIFT
], (1 << TT_SHIFT
))) {
269 if (j
== tt_num
) { /* new entry */
270 memcpy(&tt
[j
<< TT_SHIFT
], p
, (1 << TT_SHIFT
));
273 xi
[i
>> TT_SHIFT
] = j
;
276 for (i
= 0 ; i
<= (MAX_WCHAR
>> TT_SHIFT
) ; i
+= (1 << TI_SHIFT
)) {
278 for (j
= 0 ; j
< ti_num
; j
++) {
279 if (!memcmp(p
, &ti
[j
<< TI_SHIFT
], (1 << TI_SHIFT
))) {
283 if (j
== ti_num
) { /* new entry */
284 memcpy(&ti
[j
<< TI_SHIFT
], p
, (1 << TI_SHIFT
));
287 csd
[numsets
].ii
[i
>> TI_SHIFT
] = j
;
288 /* fprintf(stderr, "%d ", i >> TI_SHIFT); */
292 printf("\n\t\t/* idx8ctype data */\n\t\t{");
293 for (i
= 128 ; i
< 256 ; i
++) {
297 /* if (!(i & 0x7)) { */
301 c
= csd
[numsets
].c2w
[i
];
303 if (c
== 0) { /* non-existant char in codeset */
304 d
= __CTYPE_unclassified
;
305 } else if (iswdigit(c
)) {
307 } else if (iswalpha(c
)) {
308 d
= __CTYPE_alpha_nonupper_nonlower
;
310 d
= __CTYPE_alpha_lower
;
312 d
= __CTYPE_alpha_upper_lower
;
314 } else if (iswupper(c
)) {
315 d
= __CTYPE_alpha_upper
;
317 } else if (iswpunct(c
)) {
319 } else if (iswgraph(c
)) {
321 } else if (iswprint(c
)) {
322 d
= __CTYPE_print_space_nonblank
;
324 d
= __CTYPE_print_space_blank
;
326 } else if (iswspace(c
) && !iswcntrl(c
)) {
327 d
= __CTYPE_space_nonblank_noncntrl
;
329 d
= __CTYPE_space_blank_noncntrl
;
331 } else if (iswcntrl(c
)) {
332 d
= __CTYPE_cntrl_nonspace
;
334 d
= __CTYPE_cntrl_space_nonblank
;
336 d
= __CTYPE_cntrl_space_blank
;
340 d
= __CTYPE_unclassified
;
344 row
[i
& (CTYPE_ROW_LEN
-1)] = d
;
345 if ((i
& (CTYPE_ROW_LEN
-1)) == (CTYPE_ROW_LEN
-1)) {
347 for (j
=0 ; j
< n_ctype_rows
; j
++) {
348 if (!memcmp(p
, row
, CTYPE_ROW_LEN
)) {
353 if (j
== n_ctype_rows
) { /* new entry */
354 if (++n_ctype_rows
> 256) {
355 fprintf(stderr
, "error -- to many ctype rows!\n");
358 memcpy(p
, row
, CTYPE_ROW_LEN
);
360 csd
[numsets
].ctype_idx
[i
>> CTYPE_IDX_SHIFT
] = j
;
361 if (!((i
>> CTYPE_IDX_SHIFT
) & 0x7)
362 && (i
!= (127 + CTYPE_ROW_LEN
))
376 printf(",\n\t\t/* idx8uplow data */\n\t\t{");
377 for (i
= 128 ; i
< 256 ; i
++) {
379 /* if (!(i & 0x7)) { */
382 c
= csd
[numsets
].c2w
[i
];
387 if (u
>= 0x80) u
= csd
[numsets
].w2c
[u
];
388 if (l
>= 0x80) l
= csd
[numsets
].w2c
[l
];
390 if (u
== 0) u
= i
; /* upper is missing, so ignore */
391 if (l
== 0) l
= i
; /* lower is missing, so ignore */
394 /* store as unsigned char and let overflow handle it. */
395 /* if ((((u-i) < CHAR_MIN) || ((u-i) > CHAR_MAX)) */
396 /* || (((i-l) < CHAR_MIN) || ((i-l) > CHAR_MAX)) */
398 /* fprintf(stderr, "error - uplow diff out of range! %d %ld %ld\n", */
400 /* return EXIT_FAILURE; */
403 row
[i
& (UPLOW_ROW_LEN
-1)] = ((l
==i
) ? (u
-i
) : (i
-l
));
404 if ((i
& (UPLOW_ROW_LEN
-1)) == (UPLOW_ROW_LEN
-1)) {
406 for (j
=0 ; j
< n_uplow_rows
; j
++) {
407 if (!memcmp(p
, row
, UPLOW_ROW_LEN
)) {
412 if (j
== n_uplow_rows
) { /* new entry */
413 if (++n_uplow_rows
> 256) {
414 fprintf(stderr
, "error -- to many uplow rows!\n");
417 memcpy(p
, row
, UPLOW_ROW_LEN
);
419 csd
[numsets
].uplow_idx
[i
>> UPLOW_IDX_SHIFT
] = j
;
420 if (!((i
>> UPLOW_IDX_SHIFT
) & 0x7)
421 && (i
!= (127 + UPLOW_ROW_LEN
))
429 if (!(i
& 0x7) && i
) {
432 printf(" %4ld,", (l
==i
) ? (u
-i
) : (i
-l
));
433 /* printf(" %4ld,", (l==i) ? u : l); */
435 if ((u
!= i
) || (l
!= i
)) {
437 printf(" %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, \n",
441 (unsigned long) towlower(c
),
443 (unsigned long) towupper(c
));
446 printf(" %#08lx, %8ld, %d, %8ld, %d, %#08lx\n",
463 #else /* DO_WIDE_CHAR */
466 printf(",\n\t\t/* idx8c2wc data */\n\t\t{");
467 for (i
= 128 ; i
< 256 ; i
++) {
469 wrow
[i
& (C2WC_ROW_LEN
-1)] = csd
[numsets
].c2w
[i
];
470 if ((i
& (C2WC_ROW_LEN
-1)) == (C2WC_ROW_LEN
-1)) {
471 p
= (unsigned char *) c2wc_tbl
;
472 for (j
=0 ; j
< n_c2wc_rows
; j
++) {
473 if (!memcmp(p
, (char *) wrow
, 2*C2WC_ROW_LEN
)) {
478 if (j
== n_c2wc_rows
) { /* new entry */
479 if (++n_c2wc_rows
> 256) {
480 fprintf(stderr
, "error -- to many c2wc rows!\n");
483 memcpy(p
, (char *) wrow
, 2*C2WC_ROW_LEN
);
485 csd
[numsets
].c2wc_idx
[i
>> C2WC_IDX_SHIFT
] = j
;
486 if (!((i
>> C2WC_IDX_SHIFT
) & 0x7)
487 && (i
!= (127 + C2WC_ROW_LEN
))
494 if (!(i
& 0x7) && i
) {
497 printf(" %#6lx,", csd
[numsets
].c2w
[i
]);
504 /* fprintf(stderr, "\nII_LEN = %d\n", II_LEN); */
505 printf("\t\t/* idx8wc2c data */\n\t\t{");
506 for (i
= 0 ; i
< II_LEN
; i
++) {
507 if (!(i
& 0x7) && i
) {
510 printf(" %#4x,", csd
[numsets
].ii
[i
]);
515 #endif /* DO_WIDE_CHAR */
520 fprintf(stderr
, "done\n");
523 printf("\n#endif /* WANT_DATA */\n");
527 printf("#define __LOCALE_DATA_Cwc2c_DOMAIN_MAX\t%#x\n", RANGE
);
528 printf("#define __LOCALE_DATA_Cwc2c_TI_SHIFT\t\t%d\n", TI_SHIFT
);
529 printf("#define __LOCALE_DATA_Cwc2c_TT_SHIFT\t\t%d\n", TT_SHIFT
);
530 printf("#define __LOCALE_DATA_Cwc2c_II_LEN\t\t%d\n", II_LEN
);
531 printf("#define __LOCALE_DATA_Cwc2c_TI_LEN\t\t%d\n", ti_num
<< TI_SHIFT
);
532 printf("#define __LOCALE_DATA_Cwc2c_TT_LEN\t\t%d\n", tt_num
<< TT_SHIFT
);
535 printf("\n#define __LOCALE_DATA_Cwc2c_TBL_LEN\t\t%d\n",
536 (ti_num
<< TI_SHIFT
) + (tt_num
<< TT_SHIFT
));
538 printf("#ifdef WANT_DATA\n\n");
539 printf("static const unsigned char __LOCALE_DATA_Cwc2c_data[%d] = {\n",
540 (ti_num
<< TI_SHIFT
) + (tt_num
<< TT_SHIFT
));
541 printf("\t/* ti_table */\n\t");
542 for (i
=0 ; i
< ti_num
<< TI_SHIFT
; i
++) {
546 printf(" %#4x,", ti
[i
]);
549 printf("\t/* tt_table */\n\t");
550 for (i
=0 ; i
< tt_num
<< TT_SHIFT
; i
++) {
554 printf(" %#4x,", tt
[i
]);
558 printf("\n#endif /* WANT_DATA */\n");
559 #endif /* DO_WIDE_CHAR */
561 printf("\n#define __LOCALE_DATA_Cuplow_TBL_LEN\t\t%d\n",
562 n_uplow_rows
* UPLOW_ROW_LEN
);
563 printf("\n#ifdef WANT_DATA\n\n");
565 printf("\nstatic const unsigned char __LOCALE_DATA_Cuplow_data[%d] = {\n",
566 n_uplow_rows
* UPLOW_ROW_LEN
);
568 for (j
=0 ; j
< n_uplow_rows
; j
++) {
570 for (i
=0 ; i
< UPLOW_ROW_LEN
; i
++) {
571 printf(" %#4x,", (unsigned int)((unsigned char) p
[i
]));
578 printf("\n#endif /* WANT_DATA */\n");
579 printf("\n#define __LOCALE_DATA_Cctype_TBL_LEN\t\t%d\n",
581 n_ctype_rows
* CTYPE_ROW_LEN
/ 2
583 n_ctype_rows
* CTYPE_ROW_LEN
586 printf("\n#ifdef WANT_DATA\n\n");
589 printf("\nstatic const unsigned char __LOCALE_DATA_Cctype_data[%d] = {\n",
591 n_ctype_rows
* CTYPE_ROW_LEN
/ 2
593 n_ctype_rows
* CTYPE_ROW_LEN
597 for (j
=0 ; j
< n_ctype_rows
; j
++) {
599 for (i
=0 ; i
< CTYPE_ROW_LEN
; i
++) {
601 printf(" %#4x,", (unsigned int)(p
[i
] + (p
[i
+1] << 4)));
604 printf(" %#4x,", (unsigned int)p
[i
]);
612 printf("\n#endif /* WANT_DATA */\n");
616 printf("\n#define __LOCALE_DATA_Cc2wc_TBL_LEN\t\t%d\n",
617 n_c2wc_rows
* C2WC_ROW_LEN
);
618 printf("\n#ifdef WANT_DATA\n\n");
620 printf("\nstatic const unsigned short __LOCALE_DATA_Cc2wc_data[%d] = {\n",
621 n_c2wc_rows
* C2WC_ROW_LEN
);
622 p
= (unsigned char *) c2wc_tbl
;
623 for (j
=0 ; j
< n_c2wc_rows
; j
++) {
625 for (i
=0 ; i
< C2WC_ROW_LEN
; i
++) {
626 printf(" %#6x,", (unsigned int)(((unsigned short *)p
)[i
]));
632 printf("\n#endif /* WANT_DATA */\n");
633 #endif /* DO_WIDE_CHAR */
636 printf("#define __LOCALE_DATA_NUM_CODESETS\t\t%d\n", numsets
);
637 printf("#define __LOCALE_DATA_CODESET_LIST \\\n\t\"");
638 for (i
=0 ; i
< numsets
; i
++) {
639 printf("\\x%02x", numsets
+ 1 + (unsigned char) codeset_index
[i
]);
640 if (((i
& 7) == 7) && (i
+ 1 < numsets
)) {
641 printf("\" \\\n\t\"");
644 printf("\" \\\n\t\"\\0\"");
645 for (i
=0 ; i
< numsets
; i
++) {
646 printf(" \\\n\t\"%s\\0\"",
647 codeset_list
+ ((unsigned char)codeset_index
[i
]));
651 for (i
=0 ; i
< numsets
; i
++) {
654 strcpy(buf
, codeset_list
+ ((unsigned char)codeset_index
[i
]));
655 for (z
=buf
; *z
; z
++) {
660 printf("#define __CTYPE_HAS_CODESET_%s\n", buf
);
663 printf("#define __CTYPE_HAS_CODESET_UTF_8\n");
664 #endif /* DO_WIDE_CHAR */
667 printf("\n#endif /* __CTYPE_HAS_8_BIT_LOCALES */\n\n");
672 fprintf(stderr
, "tt_num = %d ti_num = %d\n", tt_num
, ti_num
);
673 fprintf(stderr
, "max_wchar = %#lx\n", max_wchar
);
675 fprintf(stderr
, "size is %d * %d + %d * %d + %d * %d = %d\n",
676 tt_num
, 1 << TT_SHIFT
, ti_num
, 1 << TI_SHIFT
,
677 ((MAX_WCHAR
>> (TT_SHIFT
+ TI_SHIFT
)) + 1), numsets
,
678 j
= tt_num
* (1 << TT_SHIFT
) + ti_num
* (1 << TI_SHIFT
)
679 + ((MAX_WCHAR
>> (TT_SHIFT
+ TI_SHIFT
)) + 1) * numsets
);
681 #endif /* DO_WIDE_CHAR */
689 fprintf(stderr
, "ctype - CTYPE_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n",
690 CTYPE_IDX_SHIFT
, numsets
, CTYPE_IDX_LEN
, n_ctype_rows
, CTYPE_ROW_LEN
/ i
,
691 j
= numsets
* CTYPE_IDX_LEN
+ n_ctype_rows
* CTYPE_ROW_LEN
/ i
);
694 fprintf(stderr
, "uplow - UPLOW_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n",
695 UPLOW_IDX_SHIFT
, numsets
, UPLOW_IDX_LEN
, n_uplow_rows
, UPLOW_ROW_LEN
,
696 j
= numsets
* UPLOW_IDX_LEN
+ n_uplow_rows
* UPLOW_ROW_LEN
);
701 fprintf(stderr
, "c2wc - C2WC_IDX_SHIFT = %d -- %d * %d + 2 * %d * %d = %d\n",
702 C2WC_IDX_SHIFT
, numsets
, C2WC_IDX_LEN
, n_c2wc_rows
, C2WC_ROW_LEN
,
703 j
= numsets
* C2WC_IDX_LEN
+ 2 * n_c2wc_rows
* C2WC_ROW_LEN
);
706 #endif /* DO_WIDE_CHAR */
708 fprintf(stderr
, "total size = %d\n", total_size
);
710 /* for (i=0 ; i < numsets ; i++) { */
711 /* printf("codeset_index[i] = %d codeset_list[ci[i]] = \"%s\"\n", */
712 /* (unsigned char) codeset_index[i], */
713 /* codeset_list + ((unsigned char)codeset_index[i])); */