Add 'libgomp.oacc-fortran/declare-allocatable-1.f90'
[official-gcc.git] / libcpp / makeuname2c.cc
blob2b3c70853149381654ad12b8dc77c9ceff473684
1 /* Make uname2c.h from various sources.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
19 /* Run this program as
20 ./makeuname2c UnicodeData.txt NameAliases.txt > uname2c.h
22 This program generates 2 big arrays and 2 small ones.
23 The large ones are uname2c_dict, initialized by string literal
24 representing dictionary, and uname2c_tree, which is a space optimized
25 radix tree.
26 The format of the radix tree is:
27 byte 0 either 0x80 + (key[0] - ' ') (if key_len == 1)
28 or key_len (otherwise)
29 either of them ored with 0x40 if it has a codepoint
30 byte 1 LSB of offset into uname2c_dict for key (only if key_len > 1)
31 byte 2 MSB of offset into uname2c_dict for key (only if key_len > 1)
32 if key_len == 1, the above 2 bytes are omitted
33 byte 3 LSB of codepoint (only if it has a codepoint)
34 byte 4 middle byte of codepoint (ditto)
35 byte 5 MSB of codepoint (ditto), ored with 0x80 if node has children
36 ored with 0x40 if it doesn't have siblings
37 if it doesn't have a codepoint, the above 3 bytes are omitted
38 and we assume that the node has children
39 byte 6, 7, 8 uleb128 encoded offset to first child relative to the end
40 of the uleb128 (only if node has children)
41 byte 9 0xff (only if node doesn't have a codepoint and doesn't
42 have siblings)
44 For prefixes of Unicode NR1 or NR2 rule generated names, on a node
45 representing end of the prefix codepoint is 0xd800 + index into
46 uname2c_generated array with indexes into uname2c_pairs array of
47 code points (low, high) of the ranges terminated by single 0.
48 0xd800 is NR1 rule (Hangul syllables), rest are NR2 rules.
51 #include <assert.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <stdint.h>
55 #include <ctype.h>
56 #include <limits.h>
57 #include <stdarg.h>
58 #include <stdbool.h>
59 #include <stdlib.h>
61 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
63 #define NUM_CODE_POINTS 0x110000
64 #define MAX_CODE_POINT 0x10ffff
65 #define NO_VALUE 0xdc00
66 #define GENERATED 0xd800
68 struct entry { const char *name; unsigned long codepoint; };
69 static struct entry *entries;
70 static unsigned long num_allocated, num_entries;
72 /* Unicode 14 Table 4-8. */
73 struct generated {
74 const char *prefix;
75 /* max_high is a workaround for UnicodeData.txt inconsistencies
76 on a few CJK UNIFIED IDEOGRAPH- ranges where the "*, Last>"
77 entry is a few code points above the end of the range. */
78 unsigned long low, high, max_high;
79 int idx, ok;
81 static struct generated generated_ranges[] =
82 { { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */
83 { "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */
84 { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9ffc, 0x9fff, 1, 0 },
85 { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6dd, 0x2a6df, 1, 0 },
86 { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b734, 0x2b738, 1, 0 },
87 { "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 },
88 { "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 },
89 { "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 },
90 { "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 },
91 { "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 },
92 { "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 },
93 { "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 },
94 { "NUSHU CHARACTER-", 0x1b170, 0x1b2fb, 0, 4, 0 },
95 { "CJK COMPATIBILITY IDEOGRAPH-", 0xf900, 0xfa6d, 0, 5, 0 },
96 { "CJK COMPATIBILITY IDEOGRAPH-", 0xfa70, 0xfad9, 0, 5, 0 },
97 { "CJK COMPATIBILITY IDEOGRAPH-", 0x2f800, 0x2fa1d, 0, 5, 0 }
100 struct node {
101 struct node *sibling, *child;
102 const char *key;
103 size_t key_len, key_idx, node_size, size_sum, child_off;
104 unsigned long codepoint;
105 bool in_dict;
107 static struct node *root, **nodes;
108 static unsigned long num_nodes;
109 static size_t dict_size, tree_size, max_entry_len;
110 static char *dict;
111 static unsigned char *tree;
113 /* Die! */
115 static void
116 fail (const char *s, ...)
118 va_list ap;
120 va_start (ap, s);
121 vfprintf (stderr, s, ap);
122 va_end (ap);
123 fputc ('\n', stderr);
124 exit (1);
127 static void *
128 xmalloc (size_t size)
130 void *ret = malloc (size);
132 if (ret == NULL)
133 fail ("failed to allocate %ld bytes", (long) size);
134 return ret;
137 static void *
138 xrealloc (void *p, size_t size)
140 void *ret = p ? realloc (p, size) : malloc (size);
142 if (ret == NULL)
143 fail ("failed to allocate %ld bytes", (long) size);
144 return ret;
147 static int
148 entrycmp (const void *p1, const void *p2)
150 const struct entry *e1 = (const struct entry *) p1;
151 const struct entry *e2 = (const struct entry *) p2;
152 int ret = strcmp (e1->name, e2->name);
154 if (ret != 0)
155 return ret;
156 if (e1->codepoint < e2->codepoint)
157 return -1;
158 if (e1->codepoint > e2->codepoint)
159 return 1;
160 return 0;
163 static int
164 nodecmp (const void *p1, const void *p2)
166 const struct node *n1 = *(const struct node *const *) p1;
167 const struct node *n2 = *(const struct node *const *) p2;
168 if (n1->key_len > n2->key_len)
169 return -1;
170 if (n1->key_len < n2->key_len)
171 return 1;
172 return memcmp (n1->key, n2->key, n1->key_len);
175 /* Read UnicodeData.txt and fill in the 'decomp' table to be the
176 decompositions of characters for which both the character
177 decomposed and all the code points in the decomposition are valid
178 for some supported language version, and the 'all_decomp' table to
179 be the decompositions of all characters without those
180 constraints. */
182 static void
183 read_table (char *fname, bool aliases_p)
185 FILE *f = fopen (fname, "r");
186 const char *sname = aliases_p ? "NameAliases.txt" : "UnicodeData.txt";
188 if (!f)
189 fail ("opening %s", sname);
190 for (;;)
192 char line[256];
193 unsigned long codepoint;
194 const char *name, *aname;
195 char *l;
196 size_t i;
198 if (!fgets (line, sizeof (line), f))
199 break;
200 codepoint = strtoul (line, &l, 16);
201 if (l == line && aliases_p)
203 /* NameAliased.txt can contain comments and empty lines. */
204 if (*line == '#' || *line == '\n')
205 continue;
207 if (l == line || *l != ';')
208 fail ("parsing %s, reading code point", sname);
209 if (codepoint > MAX_CODE_POINT)
210 fail ("parsing %s, code point too large", sname);
212 name = l + 1;
213 do {
214 ++l;
215 } while (*l != ';');
217 aname = NULL;
218 if (aliases_p)
220 /* Ignore figment and abbreviation aliases. */
221 if (strcmp (l + 1, "correction\n") != 0
222 && strcmp (l + 1, "control\n") != 0
223 && strcmp (l + 1, "alternate\n") != 0)
224 continue;
225 i = ARRAY_SIZE (generated_ranges);
227 else
229 for (i = 0; i < ARRAY_SIZE (generated_ranges); ++i)
230 if (codepoint >= generated_ranges[i].low
231 && codepoint <= generated_ranges[i].max_high)
232 break;
233 if (i != ARRAY_SIZE (generated_ranges))
235 if (*name == '<' && l[-1] == '>')
237 if (codepoint == generated_ranges[i].low
238 && l - name >= 9
239 && memcmp (l - 8, ", First>", 8) == 0
240 && generated_ranges[i].ok == 0)
242 generated_ranges[i].ok = INT_MAX - 1;
243 aname = generated_ranges[i].prefix;
244 codepoint = GENERATED + generated_ranges[i].idx;
246 /* Unfortunately, UnicodeData.txt isn't consistent
247 with the Table 4-8 range endpoints in 3 cases,
248 the ranges are longer there by a few codepoints.
249 So use the max_high hack to avoid verification
250 failures. */
251 else if (codepoint == generated_ranges[i].max_high
252 && l - name >= 8
253 && memcmp (l - 7, ", Last>", 7) == 0
254 && generated_ranges[i].ok == INT_MAX - 1)
256 generated_ranges[i].ok = INT_MAX;
257 continue;
259 else
260 fail ("unexpected generated entry %lx %.*s",
261 codepoint, (int) (l - name), name);
263 else if (codepoint
264 == generated_ranges[i].low + generated_ranges[i].ok
265 && l - name == (strlen (generated_ranges[i].prefix)
266 + (name - 1 - line))
267 && memcmp (name, generated_ranges[i].prefix,
268 strlen (generated_ranges[i].prefix)) == 0
269 && memcmp (name + strlen (generated_ranges[i].prefix),
270 line, name - 1 - line) == 0)
272 ++generated_ranges[i].ok;
273 if (codepoint != generated_ranges[i].low)
274 continue;
275 aname = generated_ranges[i].prefix;
276 codepoint = GENERATED + generated_ranges[i].idx;
278 else
279 fail ("unexpected generated entry %lx %.*s",
280 codepoint, (int) (l - name), name);
281 if (aname == generated_ranges[i].prefix)
283 size_t j;
285 /* Don't add an entry for a generated range where the
286 same prefix has been added already. */
287 for (j = 0; j < i; ++j)
288 if (generated_ranges[j].idx == generated_ranges[i].idx
289 && generated_ranges[j].ok != 0)
290 break;
291 if (j < i)
292 continue;
295 else if (*name == '<' && l[-1] == '>')
296 continue;
299 if (num_entries == num_allocated)
301 num_allocated = num_allocated ? 2 * num_allocated : 65536;
302 entries = (struct entry *) xrealloc (entries, num_allocated
303 * sizeof (entries[0]));
306 if (aname == NULL)
308 char *a = (char *) xmalloc (l + 1 - name);
309 if (l - name > max_entry_len)
310 max_entry_len = l - name;
311 memcpy (a, name, l - name);
312 a[l - name] = '\0';
313 aname = a;
315 entries[num_entries].name = aname;
316 entries[num_entries++].codepoint = codepoint;
318 if (ferror (f))
319 fail ("reading %s", sname);
320 fclose (f);
323 /* Assumes nodes are added from sorted array, so we never
324 add any node before existing one, only after it. */
326 static void
327 node_add (struct node **p, const char *key, size_t key_len,
328 unsigned long codepoint)
330 struct node *n;
331 size_t i;
335 if (*p == NULL)
337 *p = n = (struct node *) xmalloc (sizeof (struct node));
338 ++num_nodes;
339 assert (key_len);
340 n->sibling = NULL;
341 n->child = NULL;
342 n->key = key;
343 n->key_len = key_len;
344 n->codepoint = codepoint;
345 return;
347 n = *p;
348 for (i = 0; i < n->key_len && i < key_len; ++i)
349 if (n->key[i] != key[i])
350 break;
351 if (i == 0)
353 p = &n->sibling;
354 continue;
356 if (i == n->key_len)
358 assert (key_len > n->key_len);
359 p = &n->child;
360 key += n->key_len;
361 key_len -= n->key_len;
362 continue;
364 /* Need to split the node. */
365 assert (i < key_len);
366 n = (struct node *) xmalloc (sizeof (struct node));
367 ++num_nodes;
368 n->sibling = NULL;
369 n->child = (*p)->child;
370 n->key = (*p)->key + i;
371 n->key_len = (*p)->key_len - i;
372 n->codepoint = (*p)->codepoint;
373 (*p)->child = n;
374 (*p)->key_len = i;
375 (*p)->codepoint = NO_VALUE;
376 key += i;
377 key_len -= i;
378 p = &n->sibling;
380 while (1);
383 static void
384 append_nodes (struct node *n)
386 for (; n; n = n->sibling)
388 nodes[num_nodes++] = n;
389 append_nodes (n->child);
393 static size_t
394 sizeof_uleb128 (size_t val)
396 size_t sz = 0;
399 val >>= 7;
400 sz += 1;
402 while (val != 0);
403 return sz;
406 static void
407 size_nodes (struct node *n)
409 if (n->child)
410 size_nodes (n->child);
411 if (n->sibling)
412 size_nodes (n->sibling);
413 n->node_size = 1 + (n->key_len > 1) * 2;
414 if (n->codepoint != NO_VALUE)
415 n->node_size += 3;
416 else if (n->sibling == NULL)
417 ++n->node_size;
418 n->size_sum = 0;
419 n->child_off = 0;
420 if (n->sibling)
421 n->size_sum += n->sibling->size_sum;
422 if (n->child)
424 n->child_off = n->size_sum + (n->codepoint == NO_VALUE
425 && n->sibling == NULL);
426 n->node_size += sizeof_uleb128 (n->child_off);
428 n->size_sum += n->node_size;
429 if (n->child)
430 n->size_sum += n->child->size_sum;
431 tree_size += n->node_size;
434 static void
435 write_uleb128 (unsigned char *p, size_t val)
437 unsigned char c;
440 c = val & 0x7f;
441 val >>= 7;
442 if (val)
443 c |= 0x80;
444 *p++ = c;
446 while (val);
449 static void
450 write_nodes (struct node *n, size_t off)
452 for (; n; n = n->sibling)
454 assert (off < tree_size && tree[off] == 0);
455 if (n->key_len > 1)
457 assert (n->key_len < 64);
458 tree[off] = n->key_len;
460 else
461 tree[off] = (n->key[0] - ' ') | 0x80;
462 assert ((tree[off] & 0x40) == 0);
463 if (n->codepoint != NO_VALUE)
464 tree[off] |= 0x40;
465 off++;
466 if (n->key_len > 1)
468 tree[off++] = n->key_idx & 0xff;
469 tree[off++] = (n->key_idx >> 8) & 0xff;
471 if (n->codepoint != NO_VALUE)
473 assert (n->codepoint < (1L << 21));
474 tree[off++] = n->codepoint & 0xff;
475 tree[off++] = (n->codepoint >> 8) & 0xff;
476 tree[off] = (n->codepoint >> 16) & 0xff;
477 if (n->child)
478 tree[off] |= 0x80;
479 if (!n->sibling)
480 tree[off] |= 0x40;
481 off++;
483 if (n->child)
485 write_uleb128 (&tree[off], n->child_off);
486 off += sizeof_uleb128 (n->child_off);
487 write_nodes (n->child, off + n->child_off);
489 if (n->codepoint == NO_VALUE
490 && n->sibling == NULL)
491 tree[off++] = 0xff;
493 assert (off <= tree_size);
496 static void
497 build_radix_tree (void)
499 size_t i, j, k, key_idx;
501 for (i = 0; i < ARRAY_SIZE (generated_ranges); ++i)
502 if (generated_ranges[i].ok == INT_MAX)
504 if (generated_ranges[i].max_high - generated_ranges[i].high > 15UL)
505 break;
507 else if (generated_ranges[i].ok == (generated_ranges[i].high
508 - generated_ranges[i].low + 1))
510 if (generated_ranges[i].max_high != generated_ranges[i].high)
511 break;
513 else
514 break;
515 if (i < ARRAY_SIZE (generated_ranges))
516 fail ("uncovered generated range %s %lx %lx",
517 generated_ranges[i].prefix, generated_ranges[i].low,
518 generated_ranges[i].high);
519 /* Sort entries alphabetically, node_add relies on that. */
520 qsort (entries, num_entries, sizeof (struct entry), entrycmp);
521 for (i = 1; i < num_entries; ++i)
522 if (i && strcmp (entries[i].name, entries[i - 1].name) == 0)
523 fail ("multiple entries for name %s", entries[i].name);
525 for (i = 0; i < num_entries; ++i)
526 node_add (&root, entries[i].name, strlen (entries[i].name),
527 entries[i].codepoint);
529 nodes = (struct node **) xmalloc (num_nodes * sizeof (struct node *));
530 i = num_nodes;
531 num_nodes = 0;
532 append_nodes (root);
533 assert (num_nodes == i);
534 /* Sort node pointers by decreasing string length to handle substrings
535 right. */
536 qsort (nodes, num_nodes, sizeof (struct node *), nodecmp);
537 if (nodes[0]->key_len >= 64)
538 /* We could actually encode even 64 and 65, as key_len 0 and 1 will
539 never appear in the multiple letter key encodings, so could subtract
540 2. */
541 fail ("can't encode key length %d >= 64, so need to split some radix "
542 "tree nodes to ensure length fits", nodes[0]->key_len);
544 /* Verify a property charset.cc UAX44-LM2 matching relies on:
545 if - is at the end of key of some node, then all its siblings
546 start with alphanumeric characters.
547 Only 2 character names and 1 alias have - followed by space:
548 U+0F0A TIBETAN MARK BKA- SHOG YIG MGO
549 U+0FD0 TIBETAN MARK BKA- SHOG GI MGO RGYAN
550 U+0FD0 TIBETAN MARK BSKA- SHOG GI MGO RGYAN
551 so the KA- in there will always be followed at least by SHOG
552 in the same node.
553 If this changes, charset.cc needs to change. */
554 for (i = 0; i < num_nodes; ++i)
555 if (nodes[i]->key[nodes[i]->key_len - 1] == '-'
556 && nodes[i]->child)
558 struct node *n;
560 for (n = nodes[i]->child; n; n = n->sibling)
561 if (n->key[0] == ' ')
562 fail ("node with key %.*s followed by node with key %.*s",
563 (int) nodes[i]->key_len, nodes[i]->key,
564 (int) n->key_len, n->key);
567 /* This is expensive, O(num_nodes * num_nodes * nodes[0]->key_len), but
568 fortunately num_nodes is < 64K and key_len < 64. */
569 key_idx = 0;
570 for (i = 0; i < num_nodes; ++i)
572 nodes[i]->key_idx = SIZE_MAX;
573 nodes[i]->in_dict = false;
574 if (nodes[i]->key_len > 1)
576 for (j = 0; j < i; ++j)
577 /* Can't rely on memmem unfortunately. */
578 if (nodes[j]->in_dict)
580 for (k = 0; k <= nodes[j]->key_len - nodes[i]->key_len; ++k)
581 if (nodes[j]->key[k] == nodes[i]->key[0]
582 && memcmp (nodes[j]->key + k + 1, nodes[i]->key + 1,
583 nodes[i]->key_len - 1) == 0)
585 nodes[i]->key_idx = nodes[j]->key_idx + k;
586 j = i;
587 break;
589 if (j == i)
590 break;
591 for (; k < nodes[j]->key_len; ++k)
592 if (nodes[j]->key[k] == nodes[i]->key[0]
593 && memcmp (nodes[j]->key + k + 1, nodes[i]->key + 1,
594 nodes[j]->key_len - 1 - k) == 0)
596 size_t l;
598 for (l = j + 1; l < i; ++l)
599 if (nodes[l]->in_dict)
600 break;
601 if (l < i
602 && memcmp (nodes[l]->key,
603 nodes[i]->key + (nodes[j]->key_len - k),
604 nodes[i]->key_len
605 - (nodes[j]->key_len - k)) == 0)
607 nodes[i]->key_idx = nodes[j]->key_idx + k;
608 j = i;
610 else
611 j = l - 1;
612 break;
615 if (nodes[i]->key_idx == SIZE_MAX)
617 nodes[i]->key_idx = key_idx;
618 nodes[i]->in_dict = true;
619 key_idx += nodes[i]->key_len;
623 if (key_idx >= 65536)
624 /* We only use 2 bytes for offsets into the dictionary.
625 If it grows more, there is e.g. a possibility to replace
626 most often seen words or substrings in the dictionary
627 with characters other than [A-Z0-9 -] (say LETTER occurs
628 in the dictionary almost 197 times and so by using a
629 instead of LETTER we could save (6 - 1) * 197 bytes,
630 with some on the side table mapping 'a' to "LETTER". */
631 fail ("too large dictionary %ld", (long) key_idx);
632 dict_size = key_idx;
634 size_nodes (root);
636 dict = (char *) xmalloc (dict_size + 1);
637 for (i = 0; i < num_nodes; ++i)
638 if (nodes[i]->in_dict)
639 memcpy (dict + nodes[i]->key_idx, nodes[i]->key, nodes[i]->key_len);
640 dict[dict_size] = '\0';
642 tree = (unsigned char *) xmalloc (tree_size);
643 memset (tree, 0, tree_size);
644 write_nodes (root, 0);
647 /* Print out the huge copyright notice. */
649 static void
650 write_copyright (void)
652 static const char copyright[] = "\
653 /* Unicode name to codepoint.\n\
654 Copyright (C) 2005-2022 Free Software Foundation, Inc.\n\
656 This program is free software; you can redistribute it and/or modify it\n\
657 under the terms of the GNU General Public License as published by the\n\
658 Free Software Foundation; either version 3, or (at your option) any\n\
659 later version.\n\
661 This program is distributed in the hope that it will be useful,\n\
662 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
663 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
664 GNU General Public License for more details.\n\
666 You should have received a copy of the GNU General Public License\n\
667 along with this program; see the file COPYING3. If not see\n\
668 <http://www.gnu.org/licenses/>.\n\
671 Copyright (C) 1991-2021 Unicode, Inc. All rights reserved.\n\
672 Distributed under the Terms of Use in\n\
673 http://www.unicode.org/copyright.html.\n\
675 Permission is hereby granted, free of charge, to any person\n\
676 obtaining a copy of the Unicode data files and any associated\n\
677 documentation (the \"Data Files\") or Unicode software and any\n\
678 associated documentation (the \"Software\") to deal in the Data Files\n\
679 or Software without restriction, including without limitation the\n\
680 rights to use, copy, modify, merge, publish, distribute, and/or\n\
681 sell copies of the Data Files or Software, and to permit persons to\n\
682 whom the Data Files or Software are furnished to do so, provided\n\
683 that (a) the above copyright notice(s) and this permission notice\n\
684 appear with all copies of the Data Files or Software, (b) both the\n\
685 above copyright notice(s) and this permission notice appear in\n\
686 associated documentation, and (c) there is clear notice in each\n\
687 modified Data File or in the Software as well as in the\n\
688 documentation associated with the Data File(s) or Software that the\n\
689 data or software has been modified.\n\
691 THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY\n\
692 OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE\n\
693 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n\
694 NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE\n\
695 COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR\n\
696 ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY\n\
697 DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\n\
698 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\n\
699 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\n\
700 OF THE DATA FILES OR SOFTWARE.\n\
702 Except as contained in this notice, the name of a copyright holder\n\
703 shall not be used in advertising or otherwise to promote the sale,\n\
704 use or other dealings in these Data Files or Software without prior\n\
705 written authorization of the copyright holder. */\n";
707 puts (copyright);
710 static void
711 write_dict (void)
713 size_t i;
715 printf ("static const char uname2c_dict[%ld] =\n", (long) (dict_size + 1));
716 for (i = 0; i < dict_size; i += 77)
717 printf ("\"%.77s\"%s\n", dict + i, i + 76 > dict_size ? ";" : "");
718 puts ("");
721 static void
722 write_tree (void)
724 size_t i, j;
726 printf ("static const unsigned char uname2c_tree[%ld] = {\n",
727 (long) tree_size);
728 for (i = 0, j = 0; i < tree_size; ++i)
730 printf ("%s0x%02x%s", j == 0 ? " " : "", tree[i],
731 i == tree_size - 1 ? " };\n\n" : j == 11 ? ",\n" : ", ");
732 if (j == 11)
733 j = 0;
734 else
735 ++j;
739 static void
740 write_generated (void)
742 size_t i, j;
744 puts ("static const cppchar_t uname2c_pairs[] = {");
745 for (i = 0; i < ARRAY_SIZE (generated_ranges); ++i)
747 if (i == 0)
749 else if (generated_ranges[i - 1].idx != generated_ranges[i].idx)
750 puts (", 0,");
751 else
752 puts (",");
753 printf (" 0x%lx, 0x%lx /* %s */",
754 generated_ranges[i].low,
755 generated_ranges[i].high,
756 generated_ranges[i].prefix);
758 puts (", 0 };\n");
760 puts ("static const unsigned char uname2c_generated[] = {");
761 for (i = 0, j = -1; i < ARRAY_SIZE (generated_ranges); ++i)
763 if (i == 0 || generated_ranges[i - 1].idx != generated_ranges[i].idx)
764 printf ("%s %d /* %s */", i ? ",\n" : "",
765 ++j, generated_ranges[i].prefix);
766 j += 2;
768 puts (" };\n");
771 /* Main program. */
774 main (int argc, char **argv)
776 size_t i;
778 if (argc != 3)
779 fail ("too few arguments to makeradixtree");
780 for (i = 0; i < ARRAY_SIZE (generated_ranges); ++i)
781 if (!generated_ranges[i].max_high)
782 generated_ranges[i].max_high = generated_ranges[i].high;
783 read_table (argv[1], false);
784 read_table (argv[2], true);
785 build_radix_tree ();
787 write_copyright ();
788 write_dict ();
789 write_tree ();
790 write_generated ();
791 printf ("static const unsigned int uname2c_max_name_len = %ld;\n\n", max_entry_len);
792 return 0;