Update.
[glibc.git] / locale / programs / charmap.c
blobdec09a611816234dc57142659d84765b7f8fd337
1 /* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <ctype.h>
25 #include <dirent.h>
26 #include <errno.h>
27 #include <libintl.h>
28 #include <limits.h>
29 #include <obstack.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
34 #include "error.h"
35 #include "linereader.h"
36 #include "charmap.h"
37 #include "locfile.h"
38 #include "repertoire.h"
40 #include <assert.h>
43 /* Define the lookup function. */
44 #include "charmap-kw.h"
47 extern void *xmalloc (size_t __n);
49 /* Prototypes for local functions. */
50 static struct charmap_t *parse_charmap (struct linereader *cmfile);
51 static void new_width (struct linereader *cmfile, struct charmap_t *result,
52 const char *from, const char *to,
53 unsigned long int width);
54 static void charmap_new_char (struct linereader *lr, struct charmap_t *cm,
55 int nbytes, char *bytes, const char *from,
56 const char *to, int decimal_ellipsis, int step);
59 struct charmap_t *
60 charmap_read (const char *filename)
62 struct charmap_t *result = NULL;
64 if (filename != NULL)
66 struct linereader *cmfile;
68 /* First try the name as found in the parameter. */
69 cmfile = lr_open (filename, charmap_hash);
70 if (cmfile == NULL)
72 /* No successful. So start looking through the directories
73 in the I18NPATH if this is a simple name. */
74 if (strchr (filename, '/') == NULL)
76 char *i18npath = getenv ("I18NPATH");
77 if (i18npath != NULL && *i18npath != '\0')
79 char path[strlen (filename) + 1 + strlen (i18npath)
80 + sizeof ("/charmaps/") - 1];
81 char *next;
82 i18npath = strdupa (i18npath);
85 while (cmfile == NULL
86 && (next = strsep (&i18npath, ":")) != NULL)
88 stpcpy (stpcpy (stpcpy (path, next), "/charmaps/"),
89 filename);
91 cmfile = lr_open (path, charmap_hash);
93 if (cmfile == NULL)
95 /* Try without the "/charmaps" part. */
96 stpcpy (stpcpy (path, next), filename);
98 cmfile = lr_open (path, charmap_hash);
103 if (cmfile == NULL)
105 /* Try the default directory. */
106 char path[sizeof (CHARMAP_PATH) + strlen (filename) + 1];
108 stpcpy (stpcpy (stpcpy (path, CHARMAP_PATH), "/"), filename);
109 cmfile = lr_open (path, charmap_hash);
114 if (cmfile != NULL)
116 result = parse_charmap (cmfile);
118 if (result == NULL && !be_quiet)
119 error (0, errno, _("character map file `%s' not found"), filename);
123 if (result == NULL)
125 /* OK, one more try. We also accept the names given to the
126 character sets in the files. Sometimes they differ from the
127 file name. */
128 DIR *dir;
129 struct dirent *dirent;
131 dir = opendir (CHARMAP_PATH);
132 if (dir != NULL)
134 while ((dirent = readdir (dir)) != NULL)
135 if (strcmp (dirent->d_name, ".") != 0
136 && strcmp (dirent->d_name, "..") != 0)
138 char buf[sizeof (CHARMAP_PATH)
139 + strlen (dirent->d_name) + 1];
140 FILE *fp;
141 #ifdef _DIRENT_HAVE_D_TYPE
142 if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_REG)
143 continue;
144 #endif
145 stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"),
146 dirent->d_name);
148 fp = fopen (buf, "r");
149 if (fp != NULL)
151 char *name = NULL;
153 while (!feof (fp))
155 char junk[BUFSIZ];
157 if (fscanf (fp, " <code_set_name> %as", &name) == 1
158 || (fscanf (fp, " <code_set_name> \"%as\"", &name)
159 == 1)
160 || fscanf (fp, "%% alias %as", &name) == 1)
162 if (strcasecmp (name, filename) == 0)
163 break;
165 free (name);
166 name = NULL;
169 if (fgets (junk, sizeof junk, fp) != NULL)
171 if (strstr (junk, "CHARMAP") != NULL)
172 /* We cannot expect more aliases from now on. */
173 break;
175 while (strchr (junk, '\n') == NULL
176 && fgets (junk, sizeof junk, fp) != NULL)
177 continue;
181 fclose (fp);
183 if (name != NULL)
185 struct linereader *cmfile;
187 cmfile = lr_open (buf, charmap_hash);
188 result = (cmfile == NULL
189 ? NULL : parse_charmap (cmfile));
191 if (result)
192 return result;
194 break;
199 closedir (dir);
203 if (result == NULL)
205 struct linereader *cmfile;
207 cmfile = lr_open (CHARMAP_PATH "/" DEFAULT_CHARMAP, charmap_hash);
209 result = cmfile == NULL ? NULL : parse_charmap (cmfile);
211 if (result == NULL)
212 error (4, errno, _("default character map file `%s' not found"),
213 DEFAULT_CHARMAP);
216 return result;
220 static struct charmap_t *
221 parse_charmap (struct linereader *cmfile)
223 struct charmap_t *result;
224 int state;
225 enum token_t expected_tok = tok_error;
226 const char *expected_str = NULL;
227 char *from_name = NULL;
228 char *to_name = NULL;
229 enum token_t ellipsis = 0;
230 int step = 1;
232 /* We don't want symbolic names in string to be translated. */
233 cmfile->translate_strings = 0;
235 /* Allocate room for result. */
236 result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t));
237 memset (result, '\0', sizeof (struct charmap_t));
238 /* The default DEFAULT_WIDTH is 1. */
239 result->width_default = 1;
241 #define obstack_chunk_alloc malloc
242 #define obstack_chunk_free free
243 obstack_init (&result->mem_pool);
245 if (init_hash (&result->char_table, 256)
246 || init_hash (&result->byte_table, 256))
248 free (result);
249 return NULL;
252 /* We use a state machine to describe the charmap description file
253 format. */
254 state = 1;
255 while (1)
257 /* What's on? */
258 struct token *now = lr_token (cmfile, NULL, NULL);
259 enum token_t nowtok = now->tok;
260 struct token *arg;
262 if (nowtok == tok_eof)
263 break;
265 switch (state)
267 case 1:
268 /* The beginning. We expect the special declarations, EOL or
269 `CHARMAP'. */
270 if (nowtok == tok_eol)
271 /* Ignore empty lines. */
272 continue;
274 if (nowtok == tok_charmap)
276 from_name = NULL;
277 to_name = NULL;
279 /* We have to set up the real work. Fill in some
280 default values. */
281 if (result->mb_cur_max == 0)
282 result->mb_cur_max = 1;
283 if (result->mb_cur_min == 0)
284 result->mb_cur_min = result->mb_cur_max;
285 if (result->mb_cur_min > result->mb_cur_max)
287 if (!be_quiet)
288 error (0, 0, _("\
289 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
290 cmfile->fname);
292 result->mb_cur_min = result->mb_cur_max;
295 lr_ignore_rest (cmfile, 1);
297 state = 2;
298 continue;
301 if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max
302 && nowtok != tok_mb_cur_min && nowtok != tok_escape_char
303 && nowtok != tok_comment_char && nowtok != tok_g0esc
304 && nowtok != tok_g1esc && nowtok != tok_g2esc
305 && nowtok != tok_g3esc && nowtok != tok_repertoiremap
306 && nowtok != tok_include)
308 lr_error (cmfile, _("syntax error in prolog: %s"),
309 _("invalid definition"));
311 lr_ignore_rest (cmfile, 0);
312 continue;
315 /* We know that we need an argument. */
316 arg = lr_token (cmfile, NULL, NULL);
318 switch (nowtok)
320 case tok_code_set_name:
321 case tok_repertoiremap:
322 if (arg->tok != tok_ident && arg->tok != tok_string)
324 badarg:
325 lr_error (cmfile, _("syntax error in prolog: %s"),
326 _("bad argument"));
328 lr_ignore_rest (cmfile, 0);
329 continue;
332 if (nowtok == tok_code_set_name)
333 result->code_set_name = obstack_copy0 (&result->mem_pool,
334 arg->val.str.startmb,
335 arg->val.str.lenmb);
336 else
337 result->repertoiremap = obstack_copy0 (&result->mem_pool,
338 arg->val.str.startmb,
339 arg->val.str.lenmb);
341 lr_ignore_rest (cmfile, 1);
342 continue;
344 case tok_mb_cur_max:
345 case tok_mb_cur_min:
346 if (arg->tok != tok_number)
347 goto badarg;
349 if (verbose
350 && ((nowtok == tok_mb_cur_max
351 && result->mb_cur_max != 0)
352 || (nowtok == tok_mb_cur_max
353 && result->mb_cur_max != 0)))
354 lr_error (cmfile, _("duplicate definition of <%s>"),
355 nowtok == tok_mb_cur_min
356 ? "mb_cur_min" : "mb_cur_max");
358 if (arg->val.num < 1)
360 lr_error (cmfile,
361 _("value for <%s> must be 1 or greater"),
362 nowtok == tok_mb_cur_min
363 ? "mb_cur_min" : "mb_cur_max");
365 lr_ignore_rest (cmfile, 0);
366 continue;
368 if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0
369 && (int) arg->val.num < result->mb_cur_min)
370 || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0
371 && (int) arg->val.num > result->mb_cur_max))
373 lr_error (cmfile, _("\
374 value of <%s> must be greater or equal than the value of <%s>"),
375 "mb_cur_max", "mb_cur_min");
377 lr_ignore_rest (cmfile, 0);
378 continue;
381 if (nowtok == tok_mb_cur_max)
382 result->mb_cur_max = arg->val.num;
383 else
384 result->mb_cur_min = arg->val.num;
386 lr_ignore_rest (cmfile, 1);
387 continue;
389 case tok_escape_char:
390 case tok_comment_char:
391 if (arg->tok != tok_ident)
392 goto badarg;
394 if (arg->val.str.lenmb != 1)
396 lr_error (cmfile, _("\
397 argument to <%s> must be a single character"),
398 nowtok == tok_escape_char ? "escape_char"
399 : "comment_char");
401 lr_ignore_rest (cmfile, 0);
402 continue;
405 if (nowtok == tok_escape_char)
406 cmfile->escape_char = *arg->val.str.startmb;
407 else
408 cmfile->comment_char = *arg->val.str.startmb;
410 lr_ignore_rest (cmfile, 1);
411 continue;
413 case tok_g0esc:
414 case tok_g1esc:
415 case tok_g2esc:
416 case tok_g3esc:
417 case tok_escseq:
418 lr_ignore_rest (cmfile, 0); /* XXX */
419 continue;
421 case tok_include:
422 lr_error (cmfile, _("\
423 character sets with locking states are not supported"));
424 exit (4);
426 default:
427 /* Cannot happen. */
428 assert (! "Should not happen");
430 break;
432 case 2:
433 /* We have seen `CHARMAP' and now are in the body. Each line
434 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
435 if (nowtok == tok_eol)
436 /* Ignore empty lines. */
437 continue;
439 if (nowtok == tok_end)
441 expected_tok = tok_charmap;
442 expected_str = "CHARMAP";
443 state = 90;
444 continue;
447 if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
449 lr_error (cmfile, _("syntax error in %s definition: %s"),
450 "CHARMAP", _("no symbolic name given"));
452 lr_ignore_rest (cmfile, 0);
453 continue;
456 /* If the previous line was not completely correct free the
457 used memory. */
458 if (from_name != NULL)
459 obstack_free (&result->mem_pool, from_name);
461 if (nowtok == tok_bsymbol)
462 from_name = (char *) obstack_copy0 (&result->mem_pool,
463 now->val.str.startmb,
464 now->val.str.lenmb);
465 else
467 obstack_printf (&result->mem_pool, "U%08X",
468 cmfile->token.val.ucs4);
469 obstack_1grow (&result->mem_pool, '\0');
470 from_name = (char *) obstack_finish (&result->mem_pool);
472 to_name = NULL;
474 state = 3;
475 continue;
477 case 3:
478 /* We have two possibilities: We can see an ellipsis or an
479 encoding value. */
480 if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
481 || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2
482 || nowtok == tok_ellipsis2_2)
484 ellipsis = nowtok;
485 if (nowtok == tok_ellipsis4_2)
487 step = 2;
488 nowtok = tok_ellipsis4;
490 else if (nowtok == tok_ellipsis2_2)
492 step = 2;
493 nowtok = tok_ellipsis2;
495 state = 4;
496 continue;
498 /* FALLTHROUGH */
500 case 5:
501 if (nowtok != tok_charcode)
503 lr_error (cmfile, _("syntax error in %s definition: %s"),
504 "CHARMAP", _("invalid encoding given"));
506 lr_ignore_rest (cmfile, 0);
508 state = 2;
509 continue;
512 if (now->val.charcode.nbytes < result->mb_cur_min)
513 lr_error (cmfile, _("too few bytes in character encoding"));
514 else if (now->val.charcode.nbytes > result->mb_cur_max)
515 lr_error (cmfile, _("too many bytes in character encoding"));
516 else
517 charmap_new_char (cmfile, result, now->val.charcode.nbytes,
518 now->val.charcode.bytes, from_name, to_name,
519 ellipsis != tok_ellipsis2, step);
521 /* Ignore trailing comment silently. */
522 lr_ignore_rest (cmfile, 0);
524 from_name = NULL;
525 to_name = NULL;
526 ellipsis = tok_none;
527 step = 1;
529 state = 2;
530 continue;
532 case 4:
533 if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
535 lr_error (cmfile, _("syntax error in %s definition: %s"),
536 "CHARMAP",
537 _("no symbolic name given for end of range"));
539 lr_ignore_rest (cmfile, 0);
540 continue;
543 /* Copy the to-name in a safe place. */
544 if (nowtok == tok_bsymbol)
545 to_name = (char *) obstack_copy0 (&result->mem_pool,
546 cmfile->token.val.str.startmb,
547 cmfile->token.val.str.lenmb);
548 else
550 obstack_printf (&result->mem_pool, "U%08X",
551 cmfile->token.val.ucs4);
552 obstack_1grow (&result->mem_pool, '\0');
553 to_name = (char *) obstack_finish (&result->mem_pool);
556 state = 5;
557 continue;
559 case 90:
560 if (nowtok != expected_tok)
561 lr_error (cmfile, _("\
562 `%1$s' definition does not end with `END %1$s'"), expected_str);
564 lr_ignore_rest (cmfile, nowtok == expected_tok);
565 state = 91;
566 continue;
568 case 91:
569 /* Waiting for WIDTH... */
570 if (nowtok == tok_eol)
571 /* Ignore empty lines. */
572 continue;
574 if (nowtok == tok_width_default)
576 state = 92;
577 continue;
580 if (nowtok == tok_width)
582 lr_ignore_rest (cmfile, 1);
583 state = 93;
584 continue;
587 if (nowtok == tok_width_variable)
589 lr_ignore_rest (cmfile, 1);
590 state = 98;
591 continue;
594 lr_error (cmfile, _("\
595 only WIDTH definitions are allowed to follow the CHARMAP definition"));
597 lr_ignore_rest (cmfile, 0);
598 continue;
600 case 92:
601 if (nowtok != tok_number)
602 lr_error (cmfile, _("value for %s must be an integer"),
603 "WIDTH_DEFAULT");
604 else
605 result->width_default = now->val.num;
607 lr_ignore_rest (cmfile, nowtok == tok_number);
609 state = 91;
610 continue;
612 case 93:
613 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
614 "%s...%s %d\n". */
615 if (nowtok == tok_eol)
616 /* ignore empty lines. */
617 continue;
619 if (nowtok == tok_end)
621 expected_tok = tok_width;
622 expected_str = "WIDTH";
623 state = 90;
624 continue;
627 if (nowtok != tok_bsymbol)
629 lr_error (cmfile, _("syntax error in %s definition: %s"),
630 "WIDTH", _("no symbolic name given"));
632 lr_ignore_rest (cmfile, 0);
633 continue;
636 if (from_name != NULL)
637 obstack_free (&result->mem_pool, from_name);
639 from_name = (char *) obstack_copy0 (&result->mem_pool,
640 now->val.str.startmb,
641 now->val.str.lenmb);
642 to_name = NULL;
644 state = 94;
645 continue;
647 case 94:
648 if (nowtok == tok_ellipsis3)
650 state = 95;
651 continue;
654 case 96:
655 if (nowtok != tok_number)
656 lr_error (cmfile, _("value for %s must be an integer"),
657 "WIDTH");
658 else
660 /* Store width for chars. */
661 new_width (cmfile, result, from_name, to_name, now->val.num);
663 from_name = NULL;
664 to_name = NULL;
667 lr_ignore_rest (cmfile, nowtok == tok_number);
669 state = 93;
670 continue;
672 case 95:
673 if (nowtok != tok_bsymbol)
675 lr_error (cmfile, _("syntax error in %s definition: %s"),
676 "WIDTH", _("no symbolic name given for end of range"));
678 lr_ignore_rest (cmfile, 0);
680 state = 93;
681 continue;
684 to_name = (char *) obstack_copy0 (&result->mem_pool,
685 now->val.str.startmb,
686 now->val.str.lenmb);
688 state = 96;
689 continue;
691 case 98:
692 /* We now expect `END WIDTH_VARIABLE' or lines of the format
693 "%s\n" or "%s...%s\n". */
694 if (nowtok == tok_eol)
695 /* ignore empty lines. */
696 continue;
698 if (nowtok == tok_end)
700 expected_tok = tok_width_variable;
701 expected_str = "WIDTH_VARIABLE";
702 state = 90;
703 continue;
706 if (nowtok != tok_bsymbol)
708 lr_error (cmfile, _("syntax error in %s definition: %s"),
709 "WIDTH_VARIABLE", _("no symbolic name given"));
711 lr_ignore_rest (cmfile, 0);
713 continue;
716 if (from_name != NULL)
717 obstack_free (&result->mem_pool, from_name);
719 from_name = (char *) obstack_copy0 (&result->mem_pool,
720 now->val.str.startmb,
721 now->val.str.lenmb);
722 to_name = NULL;
724 state = 99;
725 continue;
727 case 99:
728 if (nowtok == tok_ellipsis3)
729 state = 100;
731 /* Store info. */
732 from_name = NULL;
734 /* Warn */
735 state = 98;
736 continue;
738 case 100:
739 if (nowtok != tok_bsymbol)
740 lr_error (cmfile, _("syntax error in %s definition: %s"),
741 "WIDTH_VARIABLE",
742 _("no symbolic name given for end of range"));
743 else
745 to_name = (char *) obstack_copy0 (&result->mem_pool,
746 now->val.str.startmb,
747 now->val.str.lenmb);
748 /* XXX Enter value into table. */
751 lr_ignore_rest (cmfile, nowtok == tok_bsymbol);
753 state = 98;
754 continue;
756 default:
757 error (5, 0, _("%s: error in state machine"), __FILE__);
758 /* NOTREACHED */
760 break;
763 if (state != 91 && !be_quiet)
764 error (0, 0, _("%s: premature end of file"), cmfile->fname);
766 lr_close (cmfile);
768 return result;
772 static void
773 new_width (struct linereader *cmfile, struct charmap_t *result,
774 const char *from, const char *to, unsigned long int width)
776 struct charseq *from_val;
777 struct charseq *to_val;
779 from_val = charmap_find_value (result, from, strlen (from));
780 if (from_val == NULL)
782 lr_error (cmfile, _("unknown character `%s'"), from);
783 return;
786 if (to == NULL)
787 to_val = from_val;
788 else
790 to_val = charmap_find_value (result, to, strlen (to));
791 if (to_val == NULL)
793 lr_error (cmfile, _("unknown character `%s'"), to);
794 return;
798 if (result->nwidth_rules >= result->nwidth_rules_max)
800 size_t new_size = result->nwidth_rules + 32;
801 struct width_rule *new_rules =
802 (struct width_rule *) obstack_alloc (&result->mem_pool,
803 (new_size
804 * sizeof (struct width_rule)));
806 memcpy (new_rules, result->width_rules,
807 result->nwidth_rules_max * sizeof (struct width_rule));
809 result->width_rules = new_rules;
810 result->nwidth_rules_max = new_size;
813 result->width_rules[result->nwidth_rules].from = from_val;
814 result->width_rules[result->nwidth_rules].to = to_val;
815 result->width_rules[result->nwidth_rules].width = (unsigned int) width;
816 ++result->nwidth_rules;
820 struct charseq *
821 charmap_find_value (const struct charmap_t *cm, const char *name, size_t len)
823 void *result;
825 return (find_entry ((hash_table *) &cm->char_table, name, len, &result)
826 < 0 ? NULL : (struct charseq *) result);
830 static void
831 charmap_new_char (struct linereader *lr, struct charmap_t *cm,
832 int nbytes, char *bytes, const char *from, const char *to,
833 int decimal_ellipsis, int step)
835 hash_table *ht = &cm->char_table;
836 hash_table *bt = &cm->byte_table;
837 struct obstack *ob = &cm->mem_pool;
838 char *from_end;
839 char *to_end;
840 const char *cp;
841 int prefix_len, len1, len2;
842 unsigned int from_nr, to_nr, cnt;
843 struct charseq *newp;
845 len1 = strlen (from);
847 if (to == NULL)
849 newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
850 newp->nbytes = nbytes;
851 memcpy (newp->bytes, bytes, nbytes);
852 newp->name = from;
854 newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
855 if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9))
857 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
858 xxxx and xxxxxxxx are hexadecimal numbers. In this case
859 we use the value of xxxx or xxxxxxxx as the UCS4 value of
860 this character and we don't have to consult the repertoire
861 map.
863 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
864 and xxxxxxxx also give the code point in UCS4 but this must
865 be in the private, i.e., unassigned, area. This should be
866 used for characters which do not (yet) have an equivalent
867 in ISO 10646 and Unicode. */
868 char *endp;
870 errno = 0;
871 newp->ucs4 = strtoul (from + 1, &endp, 16);
872 if (endp - from != len1
873 || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
874 || newp->ucs4 >= 0x80000000)
875 /* This wasn't successful. Signal this name cannot be a
876 correct UCS value. */
877 newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
880 insert_entry (ht, from, len1, newp);
881 insert_entry (bt, newp->bytes, nbytes, newp);
882 /* Please note that it isn't a bug if a symbol is defined more
883 than once. All later definitions are simply discarded. */
884 return;
887 /* We have a range: the names must have names with equal prefixes
888 and an equal number of digits, where the second number is greater
889 or equal than the first. */
890 len2 = strlen (to);
892 if (len1 != len2)
894 illegal_range:
895 lr_error (lr, _("invalid names for character range"));
896 return;
899 cp = &from[len1 - 1];
900 if (decimal_ellipsis)
901 while (isdigit (*cp) && cp >= from)
902 --cp;
903 else
904 while (isxdigit (*cp) && cp >= from)
906 if (!isdigit (*cp) && !isupper (*cp))
907 lr_error (lr, _("\
908 hexadecimal range format should use only capital characters"));
909 --cp;
912 prefix_len = (cp - from) + 1;
914 if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
915 goto illegal_range;
917 errno = 0;
918 from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16);
919 if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE)
920 || ((to_nr = strtoul (&to[prefix_len], &to_end,
921 decimal_ellipsis ? 10 : 16)) == ULONG_MAX
922 && errno == ERANGE)
923 || *to_end != '\0')
925 lr_error (lr, _("<%s> and <%s> are illegal names for range"), from, to);
926 return;
929 if (from_nr > to_nr)
931 lr_error (lr, _("upper limit in range is not higher then lower limit"));
932 return;
935 for (cnt = from_nr; cnt <= to_nr; cnt += step)
937 char *name_end;
938 obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X",
939 prefix_len, from, len1 - prefix_len, cnt);
940 obstack_1grow (ob, '\0');
941 name_end = obstack_finish (ob);
943 newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
944 newp->nbytes = nbytes;
945 memcpy (newp->bytes, bytes, nbytes);
946 newp->name = name_end;
948 newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
949 if ((name_end[0] == 'U' || name_end[0] == 'P')
950 && (len1 == 5 || len1 == 9))
952 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
953 xxxx and xxxxxxxx are hexadecimal numbers. In this case
954 we use the value of xxxx or xxxxxxxx as the UCS4 value of
955 this character and we don't have to consult the repertoire
956 map.
958 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
959 and xxxxxxxx also give the code point in UCS4 but this must
960 be in the private, i.e., unassigned, area. This should be
961 used for characters which do not (yet) have an equivalent
962 in ISO 10646 and Unicode. */
963 char *endp;
965 errno = 0;
966 newp->ucs4 = strtoul (name_end, &endp, 16);
967 if (endp - name_end != len1
968 || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
969 || newp->ucs4 >= 0x80000000)
970 /* This wasn't successful. Signal this name cannot be a
971 correct UCS value. */
972 newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
975 insert_entry (ht, name_end, len1, newp);
976 insert_entry (bt, newp->bytes, nbytes, newp);
977 /* Please note we don't examine the return value since it is no error
978 if we have two definitions for a symbol. */
980 /* Increment the value in the byte sequence. */
981 if (++bytes[nbytes - 1] == '\0')
983 int b = nbytes - 2;
986 if (b < 0)
988 lr_error (lr,
989 _("resulting bytes for range not representable."));
990 return;
992 while (++bytes[b--] == 0);
998 struct charseq *
999 charmap_find_symbol (const struct charmap_t *cm, const char *bytes,
1000 size_t nbytes)
1002 void *result;
1004 return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result)
1005 < 0 ? NULL : (struct charseq *) result);