locales: add patch from OpenWrt
[uclibc-ng.git] / extra / locale / gen_wctype.c
blobd5b5c5c28b311fee35fd28bcd4ab8d24e64fceae
1 /*
2 * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
4 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
5 */
6 #define _GNU_SOURCE
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <locale.h>
11 #include <wctype.h>
12 #include <limits.h>
13 #include <stdint.h>
14 #include <wchar.h>
15 #include <ctype.h>
17 #include "include/bits/uClibc_charclass.h"
19 /* 0x9 : space blank */
20 /* 0xa : space */
21 /* 0xb : space */
22 /* 0xc : space */
23 /* 0xd : space */
24 /* 0x20 : space blank */
25 /* 0x1680 : space blank */
26 /* 0x2000 : space blank */
27 /* 0x2001 : space blank */
28 /* 0x2002 : space blank */
29 /* 0x2003 : space blank */
30 /* 0x2004 : space blank */
31 /* 0x2005 : space blank */
32 /* 0x2006 : space blank */
33 /* 0x2008 : space blank */
34 /* 0x2009 : space blank */
35 /* 0x200a : space blank */
36 /* 0x200b : space blank */
37 /* 0x2028 : space */
38 /* 0x2029 : space */
39 /* 0x3000 : space blank */
41 /* typecount[ 0] = 88670 C_alpha_nonupper_nonlower */
42 /* typecount[ 1] = 742 C_alpha_lower */
43 /* typecount[ 2] = 4 C_alpha_upper_lower */
44 /* typecount[ 3] = 731 C_alpha_upper */
45 /* typecount[ 4] = 10 C_digit */
46 /* typecount[ 5] = 10270 C_punct */
47 /* typecount[ 6] = 0 C_graph */
48 /* typecount[ 7] = 0 C_print_space_nonblank */
49 /* typecount[ 8] = 14 C_print_space_blank */
50 /* typecount[ 9] = 0 C_space_nonblank_noncntrl */
51 /* typecount[10] = 0 C_space_blank_noncntrl */
52 /* typecount[11] = 6 C_cntrl_space_nonblank */
53 /* typecount[12] = 1 C_cntrl_space_blank */
54 /* typecount[13] = 60 C_cntrl_nonspace */
55 /* typecount[14] = 96100 C_unclassified */
56 /* typecount[15] = 0 empty_slot */
59 /* Set to #if 0 to restrict wchars to 16 bits. */
60 #if 1
61 #define RANGE 0x2ffffUL
62 #elif 0
63 #define RANGE 0x1ffffUL
64 #else
65 #define RANGE 0xffffUL /* Restrict for 16-bit wchar_t... */
66 #endif
68 /* Some macros that test for various (w)ctype classes when passed one of the
69 * designator values enumerated above. */
70 #define mywalnum(D,C) ((unsigned)(D - 1) <= (__CTYPE_digit - 1))
71 #define mywalpha(D,C) ((unsigned)(D - 1) <= (__CTYPE_alpha_upper - 1))
72 #define mywblank(D,C) ((unsigned)(D - __CTYPE_print_space_nonblank) <= 5 && (D & 1))
73 #define mywcntrl(D,C) ((unsigned)(D - __CTYPE_cntrl_space_nonblank) <= 2)
74 #define mywdigit(D,C) (D == __CTYPE_digit)
75 #define mywgraph(D,C) ((unsigned)(D - 1) <= (__CTYPE_graph - 1))
76 #define mywlower(D,C) ((unsigned)(D - __CTYPE_alpha_lower) <= 1)
77 #define mywprint(D,C) ((unsigned)(D - 1) <= (__CTYPE_print_space_blank - 1))
78 #define mywpunct(D,C) (D == __CTYPE_punct)
79 #define mywspace(D,C) ((unsigned)(D - __CTYPE_print_space_nonblank) <= 5)
80 #define mywupper(D,C) ((unsigned)(D - __CTYPE_alpha_upper_lower) <= 1)
81 /* #define mywxdigit(D,C) -- isxdigit is untestable this way.
82 * But that's ok as isxdigit() (and isdigit() too) are locale-invariant. */
83 #define mywxdigit(D,C) (mywdigit(D,C) || (unsigned)(((C) | 0x20) - 'a') <= 5)
85 typedef struct {
86 short l;
87 short u;
88 } uldiff_entry;
90 typedef struct {
91 uint16_t ii_len;
92 uint16_t ti_len;
93 uint16_t ut_len;
95 unsigned char ii_shift;
96 unsigned char ti_shift;
98 unsigned char *ii;
99 unsigned char *ti;
100 unsigned char *ut;
101 } table_data;
103 static unsigned verbose;
104 #define verbose_msg(msg...) if (verbose) fprintf(stderr, msg)
106 void output_table(const char *name, table_data *tbl)
108 size_t i;
110 printf("#define __LOCALE_DATA_WC%s_II_LEN %7u\n", name, tbl->ii_len);
111 printf("#define __LOCALE_DATA_WC%s_TI_LEN %7u\n", name, tbl->ti_len);
112 printf("#define __LOCALE_DATA_WC%s_UT_LEN %7u\n", name, tbl->ut_len);
114 printf("#define __LOCALE_DATA_WC%s_II_SHIFT %7u\n", name, tbl->ii_shift);
115 printf("#define __LOCALE_DATA_WC%s_TI_SHIFT %7u\n", name, tbl->ti_shift);
117 printf("\n#ifdef WANT_WC%s_data\n", name);
119 i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
120 printf("\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
121 for (i = 0; i < tbl->ii_len; i++) {
122 if (i % 12 == 0) {
123 printf("\n");
125 printf(" %#04x,", tbl->ii[i]);
127 for (i = 0; i < tbl->ti_len; i++) {
128 if (i % 12 == 0) {
129 printf("\n");
131 printf(" %#04x,", tbl->ti[i]);
133 for (i = 0; i < tbl->ut_len; i++) {
134 if (i % 12 == 0) {
135 printf("\n");
137 printf(" %#04x,", tbl->ut[i]);
139 printf("\n};\n\n");
141 printf("#endif /* WANT_WC%s_data */\n\n", name);
144 static void dump_table_data(table_data *tbl)
146 verbose_msg("ii_shift = %d ti_shift = %d\n"
147 "ii_len = %d ti_len = %d ut_len = %d\n"
148 "total = %d\n",
149 tbl->ii_shift, tbl->ti_shift,
150 tbl->ii_len, tbl->ti_len, tbl->ut_len,
151 (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
154 /* For sorting the blocks of unsigned chars. */
155 static size_t nu_val;
157 int nu_memcmp(const void *a, const void *b)
159 return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
162 static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
164 #define MAXTO 255 /* Restrict to minimal unsigned char max. */
166 int main(int argc, char **argv)
168 long int u, l, tt;
169 size_t smallest, t;
170 unsigned int c;
171 unsigned int d;
172 int i, n;
173 int ul_count = 0;
174 uldiff_entry uldiff[MAXTO];
175 table_data cttable;
176 table_data ultable;
177 #if 0
178 table_data combtable;
179 table_data widthtable;
180 long int last_comb = 0;
181 #endif
182 unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
183 unsigned char ult[RANGE+1]; /* upper/lower table */
184 unsigned char combt[(RANGE/4)+1]; /* combining */
185 unsigned char widtht[(RANGE/4)+1]; /* width */
186 wctrans_t totitle;
187 wctype_t is_comb, is_comb3;
189 long int typecount[16];
190 const char *typename[16];
191 static const char empty_slot[] = "empty_slot";
192 int built = 0;
194 #define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
196 for (i = 0; i < 16; i++) {
197 typename[i] = empty_slot;
200 INIT_TYPENAME(unclassified);
201 INIT_TYPENAME(alpha_nonupper_nonlower);
202 INIT_TYPENAME(alpha_lower);
203 INIT_TYPENAME(alpha_upper_lower);
204 INIT_TYPENAME(alpha_upper);
205 INIT_TYPENAME(digit);
206 INIT_TYPENAME(punct);
207 INIT_TYPENAME(graph);
208 INIT_TYPENAME(print_space_nonblank);
209 INIT_TYPENAME(print_space_blank);
210 INIT_TYPENAME(space_nonblank_noncntrl);
211 INIT_TYPENAME(space_blank_noncntrl);
212 INIT_TYPENAME(cntrl_space_nonblank);
213 INIT_TYPENAME(cntrl_space_blank);
214 INIT_TYPENAME(cntrl_nonspace);
216 memset(&cttable, 0, sizeof(table_data));
217 memset(&ultable, 0, sizeof(table_data));
218 #if 0
219 memset(combtable, 0, sizeof(table_data));
220 memset(widthtable, 0, sizeof(table_data));
221 #endif
222 setvbuf(stdout, NULL, _IONBF, 0);
224 while (--argc) {
225 ++argv;
226 if (!strcmp(*argv, "-v")) {
227 ++verbose;
228 continue;
230 /* setlocale might be just a stub */
231 /* if (!setlocale(LC_CTYPE, *argv)) {
232 verbose_msg("setlocale(LC_CTYPE,%s) failed! Skipping this locale...\n", *argv);
233 continue;
236 if (!(totitle = wctrans("totitle"))) {
237 verbose_msg("no totitle transformation.\n");
239 if (!(is_comb = wctype("combining"))) {
240 verbose_msg("no combining wctype.\n");
242 if (!(is_comb3 = wctype("combining_level3"))) {
243 verbose_msg("no combining_level3 wctype.\n");
246 if (!built) {
247 built = 1;
248 ul_count = 1;
249 uldiff[0].u = uldiff[0].l = 0;
251 memset(wct, 0, sizeof(wct));
252 memset(combt, 0, sizeof(combt));
253 memset(widtht, 0, sizeof(widtht));
255 for (i = 0; i < 16; i++) {
256 typecount[i] = 0;
259 for (c = 0; c <= RANGE; c++) {
260 if (iswdigit(c)) {
261 d = __CTYPE_digit;
262 } else if (iswalpha(c)) {
263 d = __CTYPE_alpha_nonupper_nonlower;
264 if (iswlower(c)) {
265 d = __CTYPE_alpha_lower;
266 if (iswupper(c)) {
267 d = __CTYPE_alpha_upper_lower;
269 } else if (iswupper(c)) {
270 d = __CTYPE_alpha_upper;
272 } else if (iswpunct(c)) {
273 d = __CTYPE_punct;
274 } else if (iswgraph(c)) {
275 d = __CTYPE_graph;
276 } else if (iswprint(c)) {
277 d = __CTYPE_print_space_nonblank;
278 if (iswblank(c)) {
279 d = __CTYPE_print_space_blank;
281 } else if (iswspace(c) && !iswcntrl(c)) {
282 d = __CTYPE_space_nonblank_noncntrl;
283 if (iswblank(c)) {
284 d = __CTYPE_space_blank_noncntrl;
286 } else if (iswcntrl(c)) {
287 d = __CTYPE_cntrl_nonspace;
288 if (iswspace(c)) {
289 d = __CTYPE_cntrl_space_nonblank;
290 if (iswblank(c)) {
291 d = __CTYPE_cntrl_space_blank;
294 } else {
295 d = __CTYPE_unclassified;
298 ++typecount[d];
299 #if 0
300 if (iswspace(c)) {
301 if (iswblank(c)) {
302 verbose_msg("%#8x : space blank\n", c);
303 } else {
304 verbose_msg("%#8x : space\n", c);
307 #endif
308 #if 0
309 if (c < 256) {
310 unsigned int curr_stdclib;
312 curr_stdclib = 0;
313 if (isalnum(c)) ++curr_stdclib; curr_stdclib <<= 1;
314 if (isalpha(c)) ++curr_stdclib; curr_stdclib <<= 1;
315 if (isblank(c)) ++curr_stdclib; curr_stdclib <<= 1;
316 if (iscntrl(c)) ++curr_stdclib; curr_stdclib <<= 1;
317 if (isdigit(c)) ++curr_stdclib; curr_stdclib <<= 1;
318 if (isgraph(c)) ++curr_stdclib; curr_stdclib <<= 1;
319 if (islower(c)) ++curr_stdclib; curr_stdclib <<= 1;
320 if (isprint(c)) ++curr_stdclib; curr_stdclib <<= 1;
321 if (ispunct(c)) ++curr_stdclib; curr_stdclib <<= 1;
322 if (isspace(c)) ++curr_stdclib; curr_stdclib <<= 1;
323 if (isupper(c)) ++curr_stdclib; curr_stdclib <<= 1;
324 if (isxdigit(c)) ++curr_stdclib;
325 verbose_msg("%#8x : ctype %#4x\n", c, curr_stdclib);
327 #endif
328 #if 1
329 /* Paranoid checking... */
331 unsigned int curr_stdclib;
332 unsigned int mine;
334 curr_stdclib = 0;
335 if (iswalnum(c)) ++curr_stdclib; curr_stdclib <<= 1;
336 if (iswalpha(c)) ++curr_stdclib; curr_stdclib <<= 1;
337 if (iswblank(c)) ++curr_stdclib; curr_stdclib <<= 1;
338 if (iswcntrl(c)) ++curr_stdclib; curr_stdclib <<= 1;
339 if (iswdigit(c)) ++curr_stdclib; curr_stdclib <<= 1;
340 if (iswgraph(c)) ++curr_stdclib; curr_stdclib <<= 1;
341 if (iswlower(c)) ++curr_stdclib; curr_stdclib <<= 1;
342 if (iswprint(c)) ++curr_stdclib; curr_stdclib <<= 1;
343 if (iswpunct(c)) ++curr_stdclib; curr_stdclib <<= 1;
344 if (iswspace(c)) ++curr_stdclib; curr_stdclib <<= 1;
345 if (iswupper(c)) ++curr_stdclib; curr_stdclib <<= 1;
346 if (iswxdigit(c)) ++curr_stdclib;
348 mine = 0;
349 if (mywalnum(d,c)) ++mine; mine <<= 1;
350 if (mywalpha(d,c)) ++mine; mine <<= 1;
351 if (mywblank(d,c)) ++mine; mine <<= 1;
352 if (mywcntrl(d,c)) ++mine; mine <<= 1;
353 if (mywdigit(d,c)) ++mine; mine <<= 1;
354 if (mywgraph(d,c)) ++mine; mine <<= 1;
355 if (mywlower(d,c)) ++mine; mine <<= 1;
356 if (mywprint(d,c)) ++mine; mine <<= 1;
357 if (mywpunct(d,c)) ++mine; mine <<= 1;
358 if (mywspace(d,c)) ++mine; mine <<= 1;
359 if (mywupper(d,c)) ++mine; mine <<= 1;
360 if (mywxdigit(d,c)) ++mine;
362 if (curr_stdclib != mine) {
363 verbose_msg("%#8x : curr_stdclib %#4x != %#4x mine %u\n", c, curr_stdclib, mine, d);
364 return EXIT_FAILURE;
366 #if 0
367 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
368 /* if (!iswpunct(c)) { */
369 verbose_msg("%#8x : %d %d %#4x\n",
370 c, iswctype(c,is_comb),iswctype(c,is_comb3), curr_stdclib);
371 /* } */
373 #endif
374 #if 0
375 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
376 if (!last_comb) {
377 verbose_msg("%#8x - ", c);
378 last_comb = c;
379 } else if (last_comb + 1 < c) {
380 verbose_msg("%#8x\n%#8x - ", last_comb, c);
381 last_comb = c;
382 } else {
383 last_comb = c;
386 #endif
388 #endif
390 combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
391 << ((c & 3) << 1));
392 /* comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
394 /* widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
396 if (c & 1) { /* Use the high nibble for odd numbered wchars. */
397 d <<= 4;
399 wct[c/2] |= d;
401 l = (long)(int) towlower(c) - c;
402 u = (long)(int) towupper(c) - c;
403 ult[c] = 0;
404 if (l || u) {
405 if ((l != (short)l) || (u != (short)u)) {
406 verbose_msg("range assumption error! %x %ld %ld\n", c, l, u);
407 return EXIT_FAILURE;
409 for (i = 0; i < ul_count; i++) {
410 if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
411 goto found;
414 uldiff[ul_count].l = l;
415 uldiff[ul_count].u = u;
416 ++ul_count;
417 if (ul_count > MAXTO) {
418 verbose_msg("too many touppers/tolowers!\n");
419 return EXIT_FAILURE;
421 found:
422 ult[c] = i;
426 for (i = 0; i < 16; i++) {
427 verbose_msg("typecount[%2d] = %8ld %s\n", i, typecount[i], typename[i]);
430 verbose_msg("optimizing is* table..\n");
431 n = -1;
432 smallest = SIZE_MAX;
433 cttable.ii = NULL;
434 for (i = 0; i < 14; i++) {
435 t = newopt(wct, (RANGE/2)+1, i, &cttable);
436 if (smallest >= t) {
437 n = i;
438 smallest = t;
439 /* } else { */
440 /* break; */
443 verbose_msg("smallest = %zu\n", smallest);
444 if (!(cttable.ii = malloc(smallest))) {
445 verbose_msg("couldn't allocate space!\n");
446 return EXIT_FAILURE;
448 smallest = SIZE_MAX;
449 newopt(wct, (RANGE/2)+1, n, &cttable);
450 ++cttable.ti_shift; /* correct for nibble mode */
452 verbose_msg("optimizing u/l-to table..\n");
453 smallest = SIZE_MAX;
454 ultable.ii = NULL;
455 for (i = 0; i < 14; i++) {
456 t = newopt(ult, RANGE+1, i, &ultable);
457 if (smallest >= t) {
458 n = i;
459 smallest = t;
460 /* } else { */
461 /* break; */
464 verbose_msg("%lu (smallest) + %lu (u/l diffs) = %lu\n",
465 (unsigned long) smallest,
466 (unsigned long) (4 * ul_count),
467 (unsigned long) (smallest + 4 * ul_count)
469 verbose_msg("smallest = %zu\n", smallest);
470 if (!(ultable.ii = malloc(smallest))) {
471 verbose_msg("couldn't allocate space!\n");
472 return EXIT_FAILURE;
474 smallest = SIZE_MAX;
475 newopt(ult, RANGE+1, n, &ultable);
476 #if 0
477 verbose_msg("optimizing comb table..\n");
478 smallest = SIZE_MAX;
479 combtable.ii = NULL;
480 for (i = 0; i < 14; i++) {
481 t = newopt(combt, sizeof(combt), i, &combtable);
482 if (smallest >= t) {
483 n = i;
484 smallest = t;
485 /* } else { */
486 /* break; */
489 verbose_msg("smallest = %zu\n", smallest);
490 if (!(combtable.ii = malloc(smallest))) {
491 verbose_msg("couldn't allocate space!\n");
492 return EXIT_FAILURE;
494 smallest = SIZE_MAX;
495 newopt(combt, sizeof(combt), n, &combtable);
496 combtable.ti_shift += 4; /* correct for 4 entries per */
497 #endif
498 #if 0
499 verbose_msg("optimizing width table..\n");
500 smallest = SIZE_MAX;
501 widthtable.ii = NULL;
502 for (i = 0; i < 14; i++) {
503 t = newopt(widtht, sizeof(widtht), i, &widthtable);
504 if (smallest >= t) {
505 n = i;
506 smallest = t;
507 /* } else { */
508 /* break; */
511 verbose_msg("smallest = %zu\n", smallest);
512 if (!(widthtable.ii = malloc(smallest))) {
513 verbose_msg("couldn't allocate space!\n");
514 return EXIT_FAILURE;
516 smallest = SIZE_MAX;
517 newopt(widtht, sizeof(widtht), n, &widthtable);
518 widthtable.ti_shift += 4; /* correct for 4 entries per */
519 #endif
520 #if 0
521 verbose_msg("optimizing comb3 table..\n");
522 smallest = SIZE_MAX;
523 comb3table.ii = NULL;
524 for (i = 0; i < 14; i++) {
525 t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
526 if (smallest >= t) {
527 n = i;
528 smallest = t;
529 /* } else { */
530 /* break; */
533 verbose_msg("smallest = %zu\n", smallest);
534 if (!(comb3table.ii = malloc(smallest))) {
535 verbose_msg("couldn't allocate space!\n");
536 return EXIT_FAILURE;
538 smallest = SIZE_MAX;
539 newopt(comb3t, sizeof(comb3t), n, &comb3table);
540 comb3table.ti_shift += 8; /* correct for 4 entries per */
541 #endif
543 dump_table_data(&cttable);
544 dump_table_data(&ultable);
545 #if 0
546 dump_table_data(&combtable);
547 #endif
550 verbose_msg("verifying for %s...\n", *argv);
551 #if RANGE == 0xffffU
552 for (c = 0; c <= 0xffffUL; c++)
553 #else
554 for (c = 0; c <= 0x10ffffUL; c++)
555 #endif
557 unsigned int curr_stdclib;
558 unsigned int mine;
559 unsigned int upper, lower;
561 #if 0
562 #if RANGE < 0x10000UL
563 if (c == 0x10000UL) {
564 c = 0x30000UL; /* skip 1st and 2nd sup planes */
566 #elif RANGE < 0x20000UL
567 if (c == 0x20000UL) {
568 c = 0x30000UL; /* skip 2nd sup planes */
570 #endif
571 #endif
572 curr_stdclib = 0;
573 if (iswalnum(c)) ++curr_stdclib; curr_stdclib <<= 1;
574 if (iswalpha(c)) ++curr_stdclib; curr_stdclib <<= 1;
575 if (iswblank(c)) ++curr_stdclib; curr_stdclib <<= 1;
576 if (iswcntrl(c)) ++curr_stdclib; curr_stdclib <<= 1;
577 if (iswdigit(c)) ++curr_stdclib; curr_stdclib <<= 1;
578 if (iswgraph(c)) ++curr_stdclib; curr_stdclib <<= 1;
579 if (iswlower(c)) ++curr_stdclib; curr_stdclib <<= 1;
580 if (iswprint(c)) ++curr_stdclib; curr_stdclib <<= 1;
581 if (iswpunct(c)) ++curr_stdclib; curr_stdclib <<= 1;
582 if (iswspace(c)) ++curr_stdclib; curr_stdclib <<= 1;
583 if (iswupper(c)) ++curr_stdclib; curr_stdclib <<= 1;
584 if (iswxdigit(c)) ++curr_stdclib;
587 unsigned int u;
588 int n = 0, sc = 0; /* = 0 for verbose_msg only */
589 int i0 = 0, i1 = 0;
591 u = c;
592 if (u <= RANGE) {
593 sc = u & ((1 << cttable.ti_shift) - 1);
594 u >>= cttable.ti_shift;
595 n = u & ((1 << cttable.ii_shift) - 1);
596 u >>= cttable.ii_shift;
598 i0 = cttable.ii[u];
599 i0 <<= cttable.ii_shift;
600 i1 = cttable.ti[i0 + n];
601 i1 <<= (cttable.ti_shift - 1);
602 d = cttable.ut[i1 + (sc >> 1)];
604 if (sc & 1) {
605 d >>= 4;
607 d &= 0x0f;
608 } else if (((unsigned)(c - 0xe0020UL) <= 0x5f) || (c == 0xe0001UL)) {
609 d = __CTYPE_punct;
610 } else if ((unsigned)(c - 0xf0000UL) < 0x20000UL) {
611 if ((c & 0xffffU) <= 0xfffdU) {
612 d = __CTYPE_punct;
613 } else {
614 d = __CTYPE_unclassified;
616 } else {
617 d = __CTYPE_unclassified;
620 mine = 0;
621 if (mywalnum(d,c)) ++mine; mine <<= 1;
622 if (mywalpha(d,c)) ++mine; mine <<= 1;
623 if (mywblank(d,c)) ++mine; mine <<= 1;
624 if (mywcntrl(d,c)) ++mine; mine <<= 1;
625 if (mywdigit(d,c)) ++mine; mine <<= 1;
626 if (mywgraph(d,c)) ++mine; mine <<= 1;
627 if (mywlower(d,c)) ++mine; mine <<= 1;
628 if (mywprint(d,c)) ++mine; mine <<= 1;
629 if (mywpunct(d,c)) ++mine; mine <<= 1;
630 if (mywspace(d,c)) ++mine; mine <<= 1;
631 if (mywupper(d,c)) ++mine; mine <<= 1;
632 if (mywxdigit(d,c)) ++mine;
634 if (curr_stdclib != mine) {
635 verbose_msg("%#8x : curr_stdclib %#4x != %#4x mine %d\n", c, curr_stdclib, mine, d);
636 if (c < 0x30000UL) {
637 verbose_msg("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
641 upper = lower = u = c;
642 if (u <= RANGE) {
643 sc = u & ((1 << ultable.ti_shift) - 1);
644 u >>= ultable.ti_shift;
645 n = u & ((1 << ultable.ii_shift) - 1);
646 u >>= ultable.ii_shift;
648 i0 = ultable.ii[u];
649 i0 <<= ultable.ii_shift;
650 i1 = ultable.ti[i0 + n];
651 i1 <<= (ultable.ti_shift);
652 i1 += sc;
653 i0 = ultable.ut[i1];
654 upper = c + uldiff[i0].u;
655 lower = c + uldiff[i0].l;
658 if (towupper(c) != upper) {
659 verbose_msg("%#8x : towupper curr_stdclib %#4x != %#4x mine\n",
660 c, towupper(c), upper);
663 if (towlower(c) != lower) {
664 verbose_msg("%#8x : towlower curr_stdclib %#4x != %#4x mine i0 = %d\n",
665 c, towlower(c), lower, i0);
668 if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
669 verbose_msg("%#8x : totitle curr_stdclib %#4lx != %#4x mine i0 = %d\n",
670 c, tt, upper, i0);
674 if ((c & 0xfff) == 0xfff) verbose_msg(".");
676 verbose_msg("done\n");
679 if (built) {
680 printf("#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX %#8lx\n\n",
681 (unsigned long) RANGE);
682 output_table("ctype", &cttable);
683 output_table("uplow", &ultable);
685 #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
686 printf("#define __LOCALE_DATA_WCuplow_diffs %7u\n", ul_count);
687 printf("\n#ifdef WANT_WCuplow_diff_data\n\n");
688 printf("\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
689 2 * (size_t) ul_count);
690 for (i = 0; i < ul_count; i++) {
691 if (i % 4 == 0) {
692 printf("\n");
694 printf(" %6d, %6d,", uldiff[i].u, uldiff[i].l);
696 printf("\n};\n\n");
697 printf("#endif /* WANT_WCuplow_diff_data */\n\n");
699 /* output_table("comb", &combtable); */
700 /* output_table("width", &widthtable); */
703 return !built;
706 size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
708 static int recurse;
709 unsigned char *ti[RANGE+1]; /* table index */
710 size_t numblocks;
711 size_t blocksize;
712 size_t uniq;
713 size_t i, j;
714 size_t smallest, t;
715 unsigned char *ii_save;
716 int uniqblock[256];
717 unsigned char uit[RANGE+1];
718 int shift2;
720 memset(uniqblock, 0x00, sizeof(uniqblock));
722 ii_save = NULL;
723 blocksize = 1 << shift;
724 numblocks = usize >> shift;
726 /* init table index */
727 for (i=j = 0; i < numblocks; i++) {
728 ti[i] = ut + j;
729 j += blocksize;
732 /* sort */
733 nu_val = blocksize;
734 qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
736 uniq = 1;
737 uit[(ti[0]-ut)/blocksize] = 0;
738 for (i=1; i < numblocks; i++) {
739 if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
740 if (++uniq > 255) {
741 break;
743 uniqblock[uniq - 1] = i;
745 #if 1
746 else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
747 verbose_msg("bad sort %li!\n", (long) i);
748 abort();
750 #endif
751 uit[(ti[i]-ut)/blocksize] = uniq - 1;
754 smallest = SIZE_MAX;
755 shift2 = -1;
757 if (uniq > 255)
758 return SIZE_MAX;
760 smallest = numblocks + uniq * blocksize;
761 if (!recurse) {
762 ++recurse;
763 for (j=1; j < 14; j++) {
764 if ((numblocks >> j) < 2) break;
765 if (tbl) {
766 ii_save = tbl->ii;
767 tbl->ii = NULL;
769 if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
770 t += uniq * blocksize;
772 if (tbl) {
773 tbl->ii = ii_save;
775 if (smallest >= t) {
776 shift2 = j;
777 smallest = t;
778 if (!tbl->ii) {
779 verbose_msg("ishift %u tshift %u size %lu\n",
780 shift2, shift, (unsigned long) t);
782 /* } else { */
783 /* break; */
786 --recurse;
789 if (tbl->ii) {
790 if (recurse) {
791 tbl->ii_shift = shift;
792 tbl->ii_len = numblocks;
793 memcpy(tbl->ii, uit, numblocks);
794 tbl->ti = tbl->ii + tbl->ii_len;
795 tbl->ti_len = uniq * blocksize;
796 for (i = 0; i < uniq; i++) {
797 memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
799 } else {
800 ++recurse;
801 verbose_msg("setting ishift %u tshift %u\n",
802 shift2, shift);
803 newopt(uit, numblocks, shift2, tbl);
804 --recurse;
805 tbl->ti_shift = shift;
806 tbl->ut_len = uniq * blocksize;
807 tbl->ut = tbl->ti + tbl->ti_len;
808 for (i = 0; i < uniq; i++) {
809 memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
813 return smallest;
815 /* vi: set sw=4 ts=4: */