libstdc++: Fix std::runtime_format deviations from the spec [PR113320]
[official-gcc.git] / libcpp / makeuname2c.cc
blob01fe4f4b4ac03464cc347f3e8b8aee51c51d4fc0
1 /* Make uname2c.h from various sources.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
19 /* Run this program as
20 ./makeuname2c UnicodeData.txt NameAliases.txt > uname2c.h
22 This program generates 2 big arrays and 2 small ones.
23 The large ones are uname2c_dict, initialized by string literal
24 representing dictionary, and uname2c_tree, which is a space optimized
25 radix tree.
26 The format of the radix tree is:
27 byte 0 either 0x80 + (key[0] - ' ') (if key_len == 1)
28 or key_len (otherwise)
29 either of them ored with 0x40 if it has a codepoint
30 byte 1 LSB of offset into uname2c_dict for key (only if key_len > 1)
31 byte 2 MSB of offset into uname2c_dict for key (only if key_len > 1)
32 if key_len == 1, the above 2 bytes are omitted
33 byte 3 LSB of codepoint (only if it has a codepoint)
34 byte 4 middle byte of codepoint (ditto)
35 byte 5 MSB of codepoint (ditto), ored with 0x80 if node has children
36 ored with 0x40 if it doesn't have siblings
37 if it doesn't have a codepoint, the above 3 bytes are omitted
38 and we assume that the node has children
39 byte 6, 7, 8 uleb128 encoded offset to first child relative to the end
40 of the uleb128 (only if node has children)
41 byte 9 0xff (only if node doesn't have a codepoint and doesn't
42 have siblings)
44 For prefixes of Unicode NR1 or NR2 rule generated names, on a node
45 representing end of the prefix codepoint is 0xd800 + index into
46 uname2c_generated array with indexes into uname2c_pairs array of
47 code points (low, high) of the ranges terminated by single 0.
48 0xd800 is NR1 rule (Hangul syllables), rest are NR2 rules.
51 #include <assert.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <stdint.h>
55 #include <ctype.h>
56 #include <limits.h>
57 #include <stdarg.h>
58 #include <stdbool.h>
59 #include <stdlib.h>
61 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
63 #define NUM_CODE_POINTS 0x110000
64 #define MAX_CODE_POINT 0x10ffff
65 #define NO_VALUE 0xdc00
66 #define GENERATED 0xd800
68 struct entry { const char *name; unsigned long codepoint; };
69 static struct entry *entries;
70 static unsigned long num_allocated, num_entries;
72 /* Unicode 15.1 Table 4-8. */
73 struct generated {
74 const char *prefix;
75 /* max_high is a workaround for UnicodeData.txt inconsistencies
76 on a few CJK UNIFIED IDEOGRAPH- ranges where the "*, Last>"
77 entry is a few code points above the end of the range. */
78 unsigned long low, high, max_high;
79 int idx, ok;
81 static struct generated generated_ranges[] =
82 { { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */
83 { "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */
84 { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9fff, 0, 1, 0 },
85 { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6df, 0, 1, 0 },
86 { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b739, 0, 1, 0 },
87 { "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 },
88 { "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 },
89 { "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 },
90 { "CJK UNIFIED IDEOGRAPH-", 0x2ebf0, 0x2ee5d, 0, 1, 0 },
91 { "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 },
92 { "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 },
93 { "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 },
94 { "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 },
95 { "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 },
96 { "NUSHU CHARACTER-", 0x1b170, 0x1b2fb, 0, 4, 0 },
97 { "CJK COMPATIBILITY IDEOGRAPH-", 0xf900, 0xfa6d, 0, 5, 0 },
98 { "CJK COMPATIBILITY IDEOGRAPH-", 0xfa70, 0xfad9, 0, 5, 0 },
99 { "CJK COMPATIBILITY IDEOGRAPH-", 0x2f800, 0x2fa1d, 0, 5, 0 }
102 struct node {
103 struct node *sibling, *child;
104 const char *key;
105 size_t key_len, key_idx, node_size, size_sum, child_off;
106 unsigned long codepoint;
107 bool in_dict;
109 static struct node *root, **nodes;
110 static unsigned long num_nodes;
111 static size_t dict_size, tree_size, max_entry_len;
112 static char *dict;
113 static unsigned char *tree;
115 /* Die! */
117 static void
118 fail (const char *s, ...)
120 va_list ap;
122 va_start (ap, s);
123 vfprintf (stderr, s, ap);
124 va_end (ap);
125 fputc ('\n', stderr);
126 exit (1);
129 static void *
130 xmalloc (size_t size)
132 void *ret = malloc (size);
134 if (ret == NULL)
135 fail ("failed to allocate %ld bytes", (long) size);
136 return ret;
139 static void *
140 xrealloc (void *p, size_t size)
142 void *ret = p ? realloc (p, size) : malloc (size);
144 if (ret == NULL)
145 fail ("failed to allocate %ld bytes", (long) size);
146 return ret;
149 static int
150 entrycmp (const void *p1, const void *p2)
152 const struct entry *e1 = (const struct entry *) p1;
153 const struct entry *e2 = (const struct entry *) p2;
154 int ret = strcmp (e1->name, e2->name);
156 if (ret != 0)
157 return ret;
158 if (e1->codepoint < e2->codepoint)
159 return -1;
160 if (e1->codepoint > e2->codepoint)
161 return 1;
162 return 0;
165 static int
166 nodecmp (const void *p1, const void *p2)
168 const struct node *n1 = *(const struct node *const *) p1;
169 const struct node *n2 = *(const struct node *const *) p2;
170 if (n1->key_len > n2->key_len)
171 return -1;
172 if (n1->key_len < n2->key_len)
173 return 1;
174 return memcmp (n1->key, n2->key, n1->key_len);
177 /* Read UnicodeData.txt and fill in the 'decomp' table to be the
178 decompositions of characters for which both the character
179 decomposed and all the code points in the decomposition are valid
180 for some supported language version, and the 'all_decomp' table to
181 be the decompositions of all characters without those
182 constraints. */
184 static void
185 read_table (char *fname, bool aliases_p)
187 FILE *f = fopen (fname, "r");
188 const char *sname = aliases_p ? "NameAliases.txt" : "UnicodeData.txt";
190 if (!f)
191 fail ("opening %s", sname);
192 for (;;)
194 char line[256];
195 unsigned long codepoint;
196 const char *name, *aname;
197 char *l;
198 size_t i;
200 if (!fgets (line, sizeof (line), f))
201 break;
202 codepoint = strtoul (line, &l, 16);
203 if (l == line && aliases_p)
205 /* NameAliased.txt can contain comments and empty lines. */
206 if (*line == '#' || *line == '\n')
207 continue;
209 if (l == line || *l != ';')
210 fail ("parsing %s, reading code point", sname);
211 if (codepoint > MAX_CODE_POINT)
212 fail ("parsing %s, code point too large", sname);
214 name = l + 1;
215 do {
216 ++l;
217 } while (*l != ';');
219 aname = NULL;
220 if (aliases_p)
222 /* Ignore figment and abbreviation aliases. */
223 if (strcmp (l + 1, "correction\n") != 0
224 && strcmp (l + 1, "control\n") != 0
225 && strcmp (l + 1, "alternate\n") != 0)
226 continue;
227 i = ARRAY_SIZE (generated_ranges);
229 else
231 for (i = 0; i < ARRAY_SIZE (generated_ranges); ++i)
232 if (codepoint >= generated_ranges[i].low
233 && codepoint <= generated_ranges[i].max_high)
234 break;
235 if (i != ARRAY_SIZE (generated_ranges))
237 if (*name == '<' && l[-1] == '>')
239 if (codepoint == generated_ranges[i].low
240 && l - name >= 9
241 && memcmp (l - 8, ", First>", 8) == 0
242 && generated_ranges[i].ok == 0)
244 generated_ranges[i].ok = INT_MAX - 1;
245 aname = generated_ranges[i].prefix;
246 codepoint = GENERATED + generated_ranges[i].idx;
248 /* Unfortunately, UnicodeData.txt isn't consistent
249 with the Table 4-8 range endpoints in 3 cases,
250 the ranges are longer there by a few codepoints.
251 So use the max_high hack to avoid verification
252 failures. */
253 else if (codepoint == generated_ranges[i].max_high
254 && l - name >= 8
255 && memcmp (l - 7, ", Last>", 7) == 0
256 && generated_ranges[i].ok == INT_MAX - 1)
258 generated_ranges[i].ok = INT_MAX;
259 continue;
261 else
262 fail ("unexpected generated entry %lx %.*s",
263 codepoint, (int) (l - name), name);
265 else if (codepoint
266 == generated_ranges[i].low + generated_ranges[i].ok
267 && l - name == (strlen (generated_ranges[i].prefix)
268 + (name - 1 - line))
269 && memcmp (name, generated_ranges[i].prefix,
270 strlen (generated_ranges[i].prefix)) == 0
271 && memcmp (name + strlen (generated_ranges[i].prefix),
272 line, name - 1 - line) == 0)
274 ++generated_ranges[i].ok;
275 if (codepoint != generated_ranges[i].low)
276 continue;
277 aname = generated_ranges[i].prefix;
278 codepoint = GENERATED + generated_ranges[i].idx;
280 else
281 fail ("unexpected generated entry %lx %.*s",
282 codepoint, (int) (l - name), name);
283 if (aname == generated_ranges[i].prefix)
285 size_t j;
287 /* Don't add an entry for a generated range where the
288 same prefix has been added already. */
289 for (j = 0; j < i; ++j)
290 if (generated_ranges[j].idx == generated_ranges[i].idx
291 && generated_ranges[j].ok != 0)
292 break;
293 if (j < i)
294 continue;
297 else if (*name == '<' && l[-1] == '>')
298 continue;
301 if (num_entries == num_allocated)
303 num_allocated = num_allocated ? 2 * num_allocated : 65536;
304 entries = (struct entry *) xrealloc (entries, num_allocated
305 * sizeof (entries[0]));
308 if (aname == NULL)
310 char *a = (char *) xmalloc (l + 1 - name);
311 if (l - name > max_entry_len)
312 max_entry_len = l - name;
313 memcpy (a, name, l - name);
314 a[l - name] = '\0';
315 aname = a;
317 entries[num_entries].name = aname;
318 entries[num_entries++].codepoint = codepoint;
320 if (ferror (f))
321 fail ("reading %s", sname);
322 fclose (f);
325 /* Assumes nodes are added from sorted array, so we never
326 add any node before existing one, only after it. */
328 static void
329 node_add (struct node **p, const char *key, size_t key_len,
330 unsigned long codepoint)
332 struct node *n;
333 size_t i;
337 if (*p == NULL)
339 *p = n = (struct node *) xmalloc (sizeof (struct node));
340 ++num_nodes;
341 assert (key_len);
342 n->sibling = NULL;
343 n->child = NULL;
344 n->key = key;
345 n->key_len = key_len;
346 n->codepoint = codepoint;
347 return;
349 n = *p;
350 for (i = 0; i < n->key_len && i < key_len; ++i)
351 if (n->key[i] != key[i])
352 break;
353 if (i == 0)
355 p = &n->sibling;
356 continue;
358 if (i == n->key_len)
360 assert (key_len > n->key_len);
361 p = &n->child;
362 key += n->key_len;
363 key_len -= n->key_len;
364 continue;
366 /* Need to split the node. */
367 assert (i < key_len);
368 n = (struct node *) xmalloc (sizeof (struct node));
369 ++num_nodes;
370 n->sibling = NULL;
371 n->child = (*p)->child;
372 n->key = (*p)->key + i;
373 n->key_len = (*p)->key_len - i;
374 n->codepoint = (*p)->codepoint;
375 (*p)->child = n;
376 (*p)->key_len = i;
377 (*p)->codepoint = NO_VALUE;
378 key += i;
379 key_len -= i;
380 p = &n->sibling;
382 while (1);
385 static void
386 append_nodes (struct node *n)
388 for (; n; n = n->sibling)
390 nodes[num_nodes++] = n;
391 append_nodes (n->child);
395 static size_t
396 sizeof_uleb128 (size_t val)
398 size_t sz = 0;
401 val >>= 7;
402 sz += 1;
404 while (val != 0);
405 return sz;
408 static void
409 size_nodes (struct node *n)
411 if (n->child)
412 size_nodes (n->child);
413 if (n->sibling)
414 size_nodes (n->sibling);
415 n->node_size = 1 + (n->key_len > 1) * 2;
416 if (n->codepoint != NO_VALUE)
417 n->node_size += 3;
418 else if (n->sibling == NULL)
419 ++n->node_size;
420 n->size_sum = 0;
421 n->child_off = 0;
422 if (n->sibling)
423 n->size_sum += n->sibling->size_sum;
424 if (n->child)
426 n->child_off = n->size_sum + (n->codepoint == NO_VALUE
427 && n->sibling == NULL);
428 n->node_size += sizeof_uleb128 (n->child_off);
430 n->size_sum += n->node_size;
431 if (n->child)
432 n->size_sum += n->child->size_sum;
433 tree_size += n->node_size;
436 static void
437 write_uleb128 (unsigned char *p, size_t val)
439 unsigned char c;
442 c = val & 0x7f;
443 val >>= 7;
444 if (val)
445 c |= 0x80;
446 *p++ = c;
448 while (val);
451 static void
452 write_nodes (struct node *n, size_t off)
454 for (; n; n = n->sibling)
456 assert (off < tree_size && tree[off] == 0);
457 if (n->key_len > 1)
459 assert (n->key_len < 64);
460 tree[off] = n->key_len;
462 else
463 tree[off] = (n->key[0] - ' ') | 0x80;
464 assert ((tree[off] & 0x40) == 0);
465 if (n->codepoint != NO_VALUE)
466 tree[off] |= 0x40;
467 off++;
468 if (n->key_len > 1)
470 tree[off++] = n->key_idx & 0xff;
471 tree[off++] = (n->key_idx >> 8) & 0xff;
473 if (n->codepoint != NO_VALUE)
475 assert (n->codepoint < (1L << 21));
476 tree[off++] = n->codepoint & 0xff;
477 tree[off++] = (n->codepoint >> 8) & 0xff;
478 tree[off] = (n->codepoint >> 16) & 0xff;
479 if (n->child)
480 tree[off] |= 0x80;
481 if (!n->sibling)
482 tree[off] |= 0x40;
483 off++;
485 if (n->child)
487 write_uleb128 (&tree[off], n->child_off);
488 off += sizeof_uleb128 (n->child_off);
489 write_nodes (n->child, off + n->child_off);
491 if (n->codepoint == NO_VALUE
492 && n->sibling == NULL)
493 tree[off++] = 0xff;
495 assert (off <= tree_size);
498 static void
499 build_radix_tree (void)
501 size_t i, j, k, key_idx;
503 for (i = 0; i < ARRAY_SIZE (generated_ranges); ++i)
504 if (generated_ranges[i].ok == INT_MAX)
506 if (generated_ranges[i].max_high - generated_ranges[i].high > 15UL)
507 break;
509 else if (generated_ranges[i].ok == (generated_ranges[i].high
510 - generated_ranges[i].low + 1))
512 if (generated_ranges[i].max_high != generated_ranges[i].high)
513 break;
515 else
516 break;
517 if (i < ARRAY_SIZE (generated_ranges))
518 fail ("uncovered generated range %s %lx %lx",
519 generated_ranges[i].prefix, generated_ranges[i].low,
520 generated_ranges[i].high);
521 /* Sort entries alphabetically, node_add relies on that. */
522 qsort (entries, num_entries, sizeof (struct entry), entrycmp);
523 for (i = 1; i < num_entries; ++i)
524 if (i && strcmp (entries[i].name, entries[i - 1].name) == 0)
525 fail ("multiple entries for name %s", entries[i].name);
527 for (i = 0; i < num_entries; ++i)
528 node_add (&root, entries[i].name, strlen (entries[i].name),
529 entries[i].codepoint);
531 nodes = (struct node **) xmalloc (num_nodes * sizeof (struct node *));
532 i = num_nodes;
533 num_nodes = 0;
534 append_nodes (root);
535 assert (num_nodes == i);
536 /* Sort node pointers by decreasing string length to handle substrings
537 right. */
538 qsort (nodes, num_nodes, sizeof (struct node *), nodecmp);
539 if (nodes[0]->key_len >= 64)
540 /* We could actually encode even 64 and 65, as key_len 0 and 1 will
541 never appear in the multiple letter key encodings, so could subtract
542 2. */
543 fail ("can't encode key length %d >= 64, so need to split some radix "
544 "tree nodes to ensure length fits", nodes[0]->key_len);
546 /* Verify a property charset.cc UAX44-LM2 matching relies on:
547 if - is at the end of key of some node, then all its siblings
548 start with alphanumeric characters.
549 Only 2 character names and 1 alias have - followed by space:
550 U+0F0A TIBETAN MARK BKA- SHOG YIG MGO
551 U+0FD0 TIBETAN MARK BKA- SHOG GI MGO RGYAN
552 U+0FD0 TIBETAN MARK BSKA- SHOG GI MGO RGYAN
553 so the KA- in there will always be followed at least by SHOG
554 in the same node.
555 If this changes, charset.cc needs to change. */
556 for (i = 0; i < num_nodes; ++i)
557 if (nodes[i]->key[nodes[i]->key_len - 1] == '-'
558 && nodes[i]->child)
560 struct node *n;
562 for (n = nodes[i]->child; n; n = n->sibling)
563 if (n->key[0] == ' ')
564 fail ("node with key %.*s followed by node with key %.*s",
565 (int) nodes[i]->key_len, nodes[i]->key,
566 (int) n->key_len, n->key);
569 /* This is expensive, O(num_nodes * num_nodes * nodes[0]->key_len), but
570 fortunately num_nodes is < 64K and key_len < 64. */
571 key_idx = 0;
572 for (i = 0; i < num_nodes; ++i)
574 nodes[i]->key_idx = SIZE_MAX;
575 nodes[i]->in_dict = false;
576 if (nodes[i]->key_len > 1)
578 for (j = 0; j < i; ++j)
579 /* Can't rely on memmem unfortunately. */
580 if (nodes[j]->in_dict)
582 for (k = 0; k <= nodes[j]->key_len - nodes[i]->key_len; ++k)
583 if (nodes[j]->key[k] == nodes[i]->key[0]
584 && memcmp (nodes[j]->key + k + 1, nodes[i]->key + 1,
585 nodes[i]->key_len - 1) == 0)
587 nodes[i]->key_idx = nodes[j]->key_idx + k;
588 j = i;
589 break;
591 if (j == i)
592 break;
593 for (; k < nodes[j]->key_len; ++k)
594 if (nodes[j]->key[k] == nodes[i]->key[0]
595 && memcmp (nodes[j]->key + k + 1, nodes[i]->key + 1,
596 nodes[j]->key_len - 1 - k) == 0)
598 size_t l;
600 for (l = j + 1; l < i; ++l)
601 if (nodes[l]->in_dict)
602 break;
603 if (l < i
604 && memcmp (nodes[l]->key,
605 nodes[i]->key + (nodes[j]->key_len - k),
606 nodes[i]->key_len
607 - (nodes[j]->key_len - k)) == 0)
609 nodes[i]->key_idx = nodes[j]->key_idx + k;
610 j = i;
612 else
613 j = l - 1;
614 break;
617 if (nodes[i]->key_idx == SIZE_MAX)
619 nodes[i]->key_idx = key_idx;
620 nodes[i]->in_dict = true;
621 key_idx += nodes[i]->key_len;
625 if (key_idx >= 65536)
626 /* We only use 2 bytes for offsets into the dictionary.
627 If it grows more, there is e.g. a possibility to replace
628 most often seen words or substrings in the dictionary
629 with characters other than [A-Z0-9 -] (say LETTER occurs
630 in the dictionary almost 197 times and so by using a
631 instead of LETTER we could save (6 - 1) * 197 bytes,
632 with some on the side table mapping 'a' to "LETTER". */
633 fail ("too large dictionary %ld", (long) key_idx);
634 dict_size = key_idx;
636 size_nodes (root);
638 dict = (char *) xmalloc (dict_size + 1);
639 for (i = 0; i < num_nodes; ++i)
640 if (nodes[i]->in_dict)
641 memcpy (dict + nodes[i]->key_idx, nodes[i]->key, nodes[i]->key_len);
642 dict[dict_size] = '\0';
644 tree = (unsigned char *) xmalloc (tree_size);
645 memset (tree, 0, tree_size);
646 write_nodes (root, 0);
649 /* Print out the huge copyright notice. */
651 static void
652 write_copyright (void)
654 static const char copyright[] = "\
655 /* Unicode name to codepoint.\n\
656 Copyright (C) 2005-2024 Free Software Foundation, Inc.\n\
658 This program is free software; you can redistribute it and/or modify it\n\
659 under the terms of the GNU General Public License as published by the\n\
660 Free Software Foundation; either version 3, or (at your option) any\n\
661 later version.\n\
663 This program is distributed in the hope that it will be useful,\n\
664 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
665 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
666 GNU General Public License for more details.\n\
668 You should have received a copy of the GNU General Public License\n\
669 along with this program; see the file COPYING3. If not see\n\
670 <http://www.gnu.org/licenses/>.\n\
673 Copyright (C) 1991-2023 Unicode, Inc. All rights reserved.\n\
674 Distributed under the Terms of Use in\n\
675 http://www.unicode.org/copyright.html.\n\
677 Permission is hereby granted, free of charge, to any person\n\
678 obtaining a copy of the Unicode data files and any associated\n\
679 documentation (the \"Data Files\") or Unicode software and any\n\
680 associated documentation (the \"Software\") to deal in the Data Files\n\
681 or Software without restriction, including without limitation the\n\
682 rights to use, copy, modify, merge, publish, distribute, and/or\n\
683 sell copies of the Data Files or Software, and to permit persons to\n\
684 whom the Data Files or Software are furnished to do so, provided\n\
685 that (a) the above copyright notice(s) and this permission notice\n\
686 appear with all copies of the Data Files or Software, (b) both the\n\
687 above copyright notice(s) and this permission notice appear in\n\
688 associated documentation, and (c) there is clear notice in each\n\
689 modified Data File or in the Software as well as in the\n\
690 documentation associated with the Data File(s) or Software that the\n\
691 data or software has been modified.\n\
693 THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY\n\
694 OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE\n\
695 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n\
696 NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE\n\
697 COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR\n\
698 ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY\n\
699 DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\n\
700 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\n\
701 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\n\
702 OF THE DATA FILES OR SOFTWARE.\n\
704 Except as contained in this notice, the name of a copyright holder\n\
705 shall not be used in advertising or otherwise to promote the sale,\n\
706 use or other dealings in these Data Files or Software without prior\n\
707 written authorization of the copyright holder. */\n";
709 puts (copyright);
712 static void
713 write_dict (void)
715 size_t i;
717 printf ("static const char uname2c_dict[%ld] =\n", (long) (dict_size + 1));
718 for (i = 0; i < dict_size; i += 77)
719 printf ("\"%.77s\"%s\n", dict + i, i + 76 > dict_size ? ";" : "");
720 puts ("");
723 static void
724 write_tree (void)
726 size_t i, j;
728 printf ("static const unsigned char uname2c_tree[%ld] = {\n",
729 (long) tree_size);
730 for (i = 0, j = 0; i < tree_size; ++i)
732 printf ("%s0x%02x%s", j == 0 ? " " : "", tree[i],
733 i == tree_size - 1 ? " };\n\n" : j == 11 ? ",\n" : ", ");
734 if (j == 11)
735 j = 0;
736 else
737 ++j;
741 static void
742 write_generated (void)
744 size_t i, j;
746 puts ("static const cppchar_t uname2c_pairs[] = {");
747 for (i = 0; i < ARRAY_SIZE (generated_ranges); ++i)
749 if (i == 0)
751 else if (generated_ranges[i - 1].idx != generated_ranges[i].idx)
752 puts (", 0,");
753 else
754 puts (",");
755 printf (" 0x%lx, 0x%lx /* %s */",
756 generated_ranges[i].low,
757 generated_ranges[i].high,
758 generated_ranges[i].prefix);
760 puts (", 0 };\n");
762 puts ("static const unsigned char uname2c_generated[] = {");
763 for (i = 0, j = -1; i < ARRAY_SIZE (generated_ranges); ++i)
765 if (i == 0 || generated_ranges[i - 1].idx != generated_ranges[i].idx)
766 printf ("%s %d /* %s */", i ? ",\n" : "",
767 ++j, generated_ranges[i].prefix);
768 j += 2;
770 puts (" };\n");
773 /* Main program. */
776 main (int argc, char **argv)
778 size_t i;
780 if (argc != 3)
781 fail ("too few arguments to makeradixtree");
782 for (i = 0; i < ARRAY_SIZE (generated_ranges); ++i)
783 if (!generated_ranges[i].max_high)
784 generated_ranges[i].max_high = generated_ranges[i].high;
785 read_table (argv[1], false);
786 read_table (argv[2], true);
787 build_radix_tree ();
789 write_copyright ();
790 write_dict ();
791 write_tree ();
792 write_generated ();
793 printf ("static const unsigned int uname2c_max_name_len = %ld;\n\n", max_entry_len);
794 return 0;