Correctly handle m68k long double format.
[glibc/pb-stable.git] / locale / programs / charmap.c
blob14cb4f40892056185fc593df610f1218cc6b3100
1 /* Copyright (C) 1996,1998,1999,2000,2001 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <ctype.h>
25 #include <errno.h>
26 #include <libintl.h>
27 #include <limits.h>
28 #include <obstack.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
33 #include "error.h"
34 #include "linereader.h"
35 #include "charmap.h"
36 #include "charmap-dir.h"
37 #include "repertoire.h"
39 #include <assert.h>
42 /* Define the lookup function. */
43 #include "charmap-kw.h"
46 extern void *xmalloc (size_t __n);
48 /* Prototypes for local functions. */
49 static struct charmap_t *parse_charmap (struct linereader *cmfile,
50 int verbose, int be_quiet);
51 static void new_width (struct linereader *cmfile, struct charmap_t *result,
52 const char *from, const char *to,
53 unsigned long int width);
54 static void charmap_new_char (struct linereader *lr, struct charmap_t *cm,
55 int nbytes, char *bytes, const char *from,
56 const char *to, int decimal_ellipsis, int step);
59 static const char *null_pointer;
61 static struct linereader *
62 cmlr_open (const char *directory, const char *name, kw_hash_fct_t hf)
64 FILE *fp;
66 fp = charmap_open (directory, name);
67 if (fp == NULL)
68 return NULL;
69 else
71 size_t dlen = strlen (directory);
72 int add_slash = (dlen == 0 || directory[dlen - 1] != '/');
73 size_t nlen = strlen (name);
74 char *pathname;
75 char *p;
77 pathname = alloca (dlen + add_slash + nlen + 1);
78 p = stpcpy (pathname, directory);
79 if (add_slash)
80 *p++ = '/';
81 stpcpy (p, name);
83 return lr_create (fp, pathname, hf);
87 struct charmap_t *
88 charmap_read (const char *filename, int verbose, int be_quiet, int use_default)
90 struct charmap_t *result = NULL;
92 if (filename != NULL)
94 struct linereader *cmfile;
96 /* First try the name as found in the parameter. */
97 cmfile = lr_open (filename, charmap_hash);
98 if (cmfile == NULL)
100 /* No successful. So start looking through the directories
101 in the I18NPATH if this is a simple name. */
102 if (strchr (filename, '/') == NULL)
104 char *i18npath = getenv ("I18NPATH");
105 if (i18npath != NULL && *i18npath != '\0')
107 char path[strlen (i18npath) + sizeof ("/charmaps")];
108 char *next;
109 i18npath = strdupa (i18npath);
111 while (cmfile == NULL
112 && (next = strsep (&i18npath, ":")) != NULL)
114 stpcpy (stpcpy (path, next), "/charmaps");
115 cmfile = cmlr_open (path, filename, charmap_hash);
117 if (cmfile == NULL)
119 /* Try without the "/charmaps" part. */
120 cmfile = cmlr_open (next, filename, charmap_hash);
125 if (cmfile == NULL)
127 /* Try the default directory. */
128 cmfile = cmlr_open (CHARMAP_PATH, filename, charmap_hash);
133 if (cmfile != NULL)
135 result = parse_charmap (cmfile, verbose, be_quiet);
137 if (result == NULL && !be_quiet)
138 error (0, errno, _("character map file `%s' not found"), filename);
142 if (result == NULL && filename != NULL && strchr (filename, '/') == NULL)
144 /* OK, one more try. We also accept the names given to the
145 character sets in the files. Sometimes they differ from the
146 file name. */
147 CHARMAP_DIR *dir;
149 dir = charmap_opendir (CHARMAP_PATH);
150 if (dir != NULL)
152 const char *dirent;
154 while ((dirent = charmap_readdir (dir)) != NULL)
156 char **aliases;
157 char **p;
158 int found;
160 aliases = charmap_aliases (CHARMAP_PATH, dirent);
161 found = 0;
162 for (p = aliases; *p; p++)
163 if (strcasecmp (*p, filename) == 0)
165 found = 1;
166 break;
168 charmap_free_aliases (aliases);
170 if (found)
172 struct linereader *cmfile;
174 cmfile = cmlr_open (CHARMAP_PATH, dirent, charmap_hash);
175 if (cmfile != NULL)
176 result = parse_charmap (cmfile, verbose, be_quiet);
178 break;
182 charmap_closedir (dir);
186 if (result == NULL && DEFAULT_CHARMAP != NULL)
188 struct linereader *cmfile;
190 cmfile = cmlr_open (CHARMAP_PATH, DEFAULT_CHARMAP, charmap_hash);
191 if (cmfile != NULL)
192 result = parse_charmap (cmfile, verbose, be_quiet);
194 if (result == NULL)
195 error (4, errno, _("default character map file `%s' not found"),
196 DEFAULT_CHARMAP);
199 /* Test of ASCII compatibility of locale encoding.
201 Verify that the encoding to be used in a locale is ASCII compatible,
202 at least for the graphic characters, excluding the control characters,
203 '$' and '@'. This constraint comes from an ISO C 99 restriction.
205 ISO C 99 section 7.17.(2) (about wchar_t):
206 the null character shall have the code value zero and each member of
207 the basic character set shall have a code value equal to its value
208 when used as the lone character in an integer character constant.
209 ISO C 99 section 5.2.1.(3):
210 Both the basic source and basic execution character sets shall have
211 the following members: the 26 uppercase letters of the Latin alphabet
212 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
213 the 26 lowercase letters of the Latin alphabet
214 a b c d e f g h i j k l m n o p q r s t u v w x y z
215 the 10 decimal digits
216 0 1 2 3 4 5 6 7 8 9
217 the following 29 graphic characters
218 ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
219 the space character, and control characters representing horizontal
220 tab, vertical tab, and form feed.
222 Therefore, for all members of the "basic character set", the 'char' code
223 must have the same value as the 'wchar_t' code, which in glibc is the
224 same as the Unicode code, which for all of the enumerated characters
225 is identical to the ASCII code. */
226 if (result != NULL && use_default)
228 static const char basic_charset[] =
230 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
231 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
232 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
233 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
234 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
235 '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
236 '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
237 '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
239 int failed = 0;
240 const char *p = basic_charset;
244 struct charseq * seq = charmap_find_symbol (result, p, 1);
246 if (seq == NULL || seq->ucs4 != *p)
247 failed = 1;
249 while (*p++ != '\0');
251 if (failed)
252 fprintf (stderr, _("\
253 character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
254 result->code_set_name);
257 return result;
261 static struct charmap_t *
262 parse_charmap (struct linereader *cmfile, int verbose, int be_quiet)
264 struct charmap_t *result;
265 int state;
266 enum token_t expected_tok = tok_error;
267 const char *expected_str = NULL;
268 char *from_name = NULL;
269 char *to_name = NULL;
270 enum token_t ellipsis = 0;
271 int step = 1;
273 /* We don't want symbolic names in string to be translated. */
274 cmfile->translate_strings = 0;
276 /* Allocate room for result. */
277 result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t));
278 memset (result, '\0', sizeof (struct charmap_t));
279 /* The default DEFAULT_WIDTH is 1. */
280 result->width_default = 1;
282 #define obstack_chunk_alloc malloc
283 #define obstack_chunk_free free
284 obstack_init (&result->mem_pool);
286 if (init_hash (&result->char_table, 256)
287 || init_hash (&result->byte_table, 256))
289 free (result);
290 return NULL;
293 /* We use a state machine to describe the charmap description file
294 format. */
295 state = 1;
296 while (1)
298 /* What's on? */
299 struct token *now = lr_token (cmfile, NULL, NULL, verbose);
300 enum token_t nowtok = now->tok;
301 struct token *arg;
303 if (nowtok == tok_eof)
304 break;
306 switch (state)
308 case 1:
309 /* The beginning. We expect the special declarations, EOL or
310 `CHARMAP'. */
311 if (nowtok == tok_eol)
312 /* Ignore empty lines. */
313 continue;
315 if (nowtok == tok_charmap)
317 from_name = NULL;
318 to_name = NULL;
320 /* We have to set up the real work. Fill in some
321 default values. */
322 if (result->mb_cur_max == 0)
323 result->mb_cur_max = 1;
324 if (result->mb_cur_min == 0)
325 result->mb_cur_min = result->mb_cur_max;
326 if (result->mb_cur_min > result->mb_cur_max)
328 if (!be_quiet)
329 error (0, 0, _("\
330 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
331 cmfile->fname);
333 result->mb_cur_min = result->mb_cur_max;
336 lr_ignore_rest (cmfile, 1);
338 state = 2;
339 continue;
342 if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max
343 && nowtok != tok_mb_cur_min && nowtok != tok_escape_char
344 && nowtok != tok_comment_char && nowtok != tok_g0esc
345 && nowtok != tok_g1esc && nowtok != tok_g2esc
346 && nowtok != tok_g3esc && nowtok != tok_repertoiremap
347 && nowtok != tok_include)
349 lr_error (cmfile, _("syntax error in prolog: %s"),
350 _("invalid definition"));
352 lr_ignore_rest (cmfile, 0);
353 continue;
356 /* We know that we need an argument. */
357 arg = lr_token (cmfile, NULL, NULL, verbose);
359 switch (nowtok)
361 case tok_code_set_name:
362 case tok_repertoiremap:
363 if (arg->tok != tok_ident && arg->tok != tok_string)
365 badarg:
366 lr_error (cmfile, _("syntax error in prolog: %s"),
367 _("bad argument"));
369 lr_ignore_rest (cmfile, 0);
370 continue;
373 if (nowtok == tok_code_set_name)
374 result->code_set_name = obstack_copy0 (&result->mem_pool,
375 arg->val.str.startmb,
376 arg->val.str.lenmb);
377 else
378 result->repertoiremap = obstack_copy0 (&result->mem_pool,
379 arg->val.str.startmb,
380 arg->val.str.lenmb);
382 lr_ignore_rest (cmfile, 1);
383 continue;
385 case tok_mb_cur_max:
386 case tok_mb_cur_min:
387 if (arg->tok != tok_number)
388 goto badarg;
390 if (verbose
391 && ((nowtok == tok_mb_cur_max
392 && result->mb_cur_max != 0)
393 || (nowtok == tok_mb_cur_max
394 && result->mb_cur_max != 0)))
395 lr_error (cmfile, _("duplicate definition of <%s>"),
396 nowtok == tok_mb_cur_min
397 ? "mb_cur_min" : "mb_cur_max");
399 if (arg->val.num < 1)
401 lr_error (cmfile,
402 _("value for <%s> must be 1 or greater"),
403 nowtok == tok_mb_cur_min
404 ? "mb_cur_min" : "mb_cur_max");
406 lr_ignore_rest (cmfile, 0);
407 continue;
409 if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0
410 && (int) arg->val.num < result->mb_cur_min)
411 || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0
412 && (int) arg->val.num > result->mb_cur_max))
414 lr_error (cmfile, _("\
415 value of <%s> must be greater or equal than the value of <%s>"),
416 "mb_cur_max", "mb_cur_min");
418 lr_ignore_rest (cmfile, 0);
419 continue;
422 if (nowtok == tok_mb_cur_max)
423 result->mb_cur_max = arg->val.num;
424 else
425 result->mb_cur_min = arg->val.num;
427 lr_ignore_rest (cmfile, 1);
428 continue;
430 case tok_escape_char:
431 case tok_comment_char:
432 if (arg->tok != tok_ident)
433 goto badarg;
435 if (arg->val.str.lenmb != 1)
437 lr_error (cmfile, _("\
438 argument to <%s> must be a single character"),
439 nowtok == tok_escape_char ? "escape_char"
440 : "comment_char");
442 lr_ignore_rest (cmfile, 0);
443 continue;
446 if (nowtok == tok_escape_char)
447 cmfile->escape_char = *arg->val.str.startmb;
448 else
449 cmfile->comment_char = *arg->val.str.startmb;
451 lr_ignore_rest (cmfile, 1);
452 continue;
454 case tok_g0esc:
455 case tok_g1esc:
456 case tok_g2esc:
457 case tok_g3esc:
458 case tok_escseq:
459 lr_ignore_rest (cmfile, 0); /* XXX */
460 continue;
462 case tok_include:
463 lr_error (cmfile, _("\
464 character sets with locking states are not supported"));
465 exit (4);
467 default:
468 /* Cannot happen. */
469 assert (! "Should not happen");
471 break;
473 case 2:
474 /* We have seen `CHARMAP' and now are in the body. Each line
475 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
476 if (nowtok == tok_eol)
477 /* Ignore empty lines. */
478 continue;
480 if (nowtok == tok_end)
482 expected_tok = tok_charmap;
483 expected_str = "CHARMAP";
484 state = 90;
485 continue;
488 if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
490 lr_error (cmfile, _("syntax error in %s definition: %s"),
491 "CHARMAP", _("no symbolic name given"));
493 lr_ignore_rest (cmfile, 0);
494 continue;
497 /* If the previous line was not completely correct free the
498 used memory. */
499 if (from_name != NULL)
500 obstack_free (&result->mem_pool, from_name);
502 if (nowtok == tok_bsymbol)
503 from_name = (char *) obstack_copy0 (&result->mem_pool,
504 now->val.str.startmb,
505 now->val.str.lenmb);
506 else
508 obstack_printf (&result->mem_pool, "U%08X",
509 cmfile->token.val.ucs4);
510 obstack_1grow (&result->mem_pool, '\0');
511 from_name = (char *) obstack_finish (&result->mem_pool);
513 to_name = NULL;
515 state = 3;
516 continue;
518 case 3:
519 /* We have two possibilities: We can see an ellipsis or an
520 encoding value. */
521 if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
522 || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2
523 || nowtok == tok_ellipsis2_2)
525 ellipsis = nowtok;
526 if (nowtok == tok_ellipsis4_2)
528 step = 2;
529 nowtok = tok_ellipsis4;
531 else if (nowtok == tok_ellipsis2_2)
533 step = 2;
534 nowtok = tok_ellipsis2;
536 state = 4;
537 continue;
539 /* FALLTHROUGH */
541 case 5:
542 if (nowtok != tok_charcode)
544 lr_error (cmfile, _("syntax error in %s definition: %s"),
545 "CHARMAP", _("invalid encoding given"));
547 lr_ignore_rest (cmfile, 0);
549 state = 2;
550 continue;
553 if (now->val.charcode.nbytes < result->mb_cur_min)
554 lr_error (cmfile, _("too few bytes in character encoding"));
555 else if (now->val.charcode.nbytes > result->mb_cur_max)
556 lr_error (cmfile, _("too many bytes in character encoding"));
557 else
558 charmap_new_char (cmfile, result, now->val.charcode.nbytes,
559 now->val.charcode.bytes, from_name, to_name,
560 ellipsis != tok_ellipsis2, step);
562 /* Ignore trailing comment silently. */
563 lr_ignore_rest (cmfile, 0);
565 from_name = NULL;
566 to_name = NULL;
567 ellipsis = tok_none;
568 step = 1;
570 state = 2;
571 continue;
573 case 4:
574 if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
576 lr_error (cmfile, _("syntax error in %s definition: %s"),
577 "CHARMAP",
578 _("no symbolic name given for end of range"));
580 lr_ignore_rest (cmfile, 0);
581 continue;
584 /* Copy the to-name in a safe place. */
585 if (nowtok == tok_bsymbol)
586 to_name = (char *) obstack_copy0 (&result->mem_pool,
587 cmfile->token.val.str.startmb,
588 cmfile->token.val.str.lenmb);
589 else
591 obstack_printf (&result->mem_pool, "U%08X",
592 cmfile->token.val.ucs4);
593 obstack_1grow (&result->mem_pool, '\0');
594 to_name = (char *) obstack_finish (&result->mem_pool);
597 state = 5;
598 continue;
600 case 90:
601 if (nowtok != expected_tok)
602 lr_error (cmfile, _("\
603 `%1$s' definition does not end with `END %1$s'"), expected_str);
605 lr_ignore_rest (cmfile, nowtok == expected_tok);
606 state = 91;
607 continue;
609 case 91:
610 /* Waiting for WIDTH... */
611 if (nowtok == tok_eol)
612 /* Ignore empty lines. */
613 continue;
615 if (nowtok == tok_width_default)
617 state = 92;
618 continue;
621 if (nowtok == tok_width)
623 lr_ignore_rest (cmfile, 1);
624 state = 93;
625 continue;
628 if (nowtok == tok_width_variable)
630 lr_ignore_rest (cmfile, 1);
631 state = 98;
632 continue;
635 lr_error (cmfile, _("\
636 only WIDTH definitions are allowed to follow the CHARMAP definition"));
638 lr_ignore_rest (cmfile, 0);
639 continue;
641 case 92:
642 if (nowtok != tok_number)
643 lr_error (cmfile, _("value for %s must be an integer"),
644 "WIDTH_DEFAULT");
645 else
646 result->width_default = now->val.num;
648 lr_ignore_rest (cmfile, nowtok == tok_number);
650 state = 91;
651 continue;
653 case 93:
654 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
655 "%s...%s %d\n". */
656 if (nowtok == tok_eol)
657 /* ignore empty lines. */
658 continue;
660 if (nowtok == tok_end)
662 expected_tok = tok_width;
663 expected_str = "WIDTH";
664 state = 90;
665 continue;
668 if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
670 lr_error (cmfile, _("syntax error in %s definition: %s"),
671 "WIDTH", _("no symbolic name given"));
673 lr_ignore_rest (cmfile, 0);
674 continue;
677 if (from_name != NULL)
678 obstack_free (&result->mem_pool, from_name);
680 if (nowtok == tok_bsymbol)
681 from_name = (char *) obstack_copy0 (&result->mem_pool,
682 now->val.str.startmb,
683 now->val.str.lenmb);
684 else
686 obstack_printf (&result->mem_pool, "U%08X",
687 cmfile->token.val.ucs4);
688 obstack_1grow (&result->mem_pool, '\0');
689 from_name = (char *) obstack_finish (&result->mem_pool);
692 to_name = NULL;
694 state = 94;
695 continue;
697 case 94:
698 if (nowtok == tok_ellipsis3)
700 state = 95;
701 continue;
704 case 96:
705 if (nowtok != tok_number)
706 lr_error (cmfile, _("value for %s must be an integer"),
707 "WIDTH");
708 else
710 /* Store width for chars. */
711 new_width (cmfile, result, from_name, to_name, now->val.num);
713 from_name = NULL;
714 to_name = NULL;
717 lr_ignore_rest (cmfile, nowtok == tok_number);
719 state = 93;
720 continue;
722 case 95:
723 if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
725 lr_error (cmfile, _("syntax error in %s definition: %s"),
726 "WIDTH", _("no symbolic name given for end of range"));
728 lr_ignore_rest (cmfile, 0);
730 state = 93;
731 continue;
734 if (nowtok == tok_bsymbol)
735 to_name = (char *) obstack_copy0 (&result->mem_pool,
736 now->val.str.startmb,
737 now->val.str.lenmb);
738 else
740 obstack_printf (&result->mem_pool, "U%08X",
741 cmfile->token.val.ucs4);
742 obstack_1grow (&result->mem_pool, '\0');
743 to_name = (char *) obstack_finish (&result->mem_pool);
746 state = 96;
747 continue;
749 case 98:
750 /* We now expect `END WIDTH_VARIABLE' or lines of the format
751 "%s\n" or "%s...%s\n". */
752 if (nowtok == tok_eol)
753 /* ignore empty lines. */
754 continue;
756 if (nowtok == tok_end)
758 expected_tok = tok_width_variable;
759 expected_str = "WIDTH_VARIABLE";
760 state = 90;
761 continue;
764 if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
766 lr_error (cmfile, _("syntax error in %s definition: %s"),
767 "WIDTH_VARIABLE", _("no symbolic name given"));
769 lr_ignore_rest (cmfile, 0);
771 continue;
774 if (from_name != NULL)
775 obstack_free (&result->mem_pool, from_name);
777 if (nowtok == tok_bsymbol)
778 from_name = (char *) obstack_copy0 (&result->mem_pool,
779 now->val.str.startmb,
780 now->val.str.lenmb);
781 else
783 obstack_printf (&result->mem_pool, "U%08X",
784 cmfile->token.val.ucs4);
785 obstack_1grow (&result->mem_pool, '\0');
786 from_name = (char *) obstack_finish (&result->mem_pool);
788 to_name = NULL;
790 state = 99;
791 continue;
793 case 99:
794 if (nowtok == tok_ellipsis3)
795 state = 100;
797 /* Store info. */
798 from_name = NULL;
800 /* Warn */
801 state = 98;
802 continue;
804 case 100:
805 if (nowtok != tok_bsymbol && nowtok != tok_ucs4)
807 lr_error (cmfile, _("syntax error in %s definition: %s"),
808 "WIDTH_VARIABLE",
809 _("no symbolic name given for end of range"));
810 lr_ignore_rest (cmfile, 0);
811 continue;
814 if (nowtok == tok_bsymbol)
815 to_name = (char *) obstack_copy0 (&result->mem_pool,
816 now->val.str.startmb,
817 now->val.str.lenmb);
818 else
820 obstack_printf (&result->mem_pool, "U%08X",
821 cmfile->token.val.ucs4);
822 obstack_1grow (&result->mem_pool, '\0');
823 to_name = (char *) obstack_finish (&result->mem_pool);
826 /* XXX Enter value into table. */
828 lr_ignore_rest (cmfile, 1);
830 state = 98;
831 continue;
833 default:
834 error (5, 0, _("%s: error in state machine"), __FILE__);
835 /* NOTREACHED */
837 break;
840 if (state != 91 && !be_quiet)
841 error (0, 0, _("%s: premature end of file"), cmfile->fname);
843 lr_close (cmfile);
845 return result;
849 static void
850 new_width (struct linereader *cmfile, struct charmap_t *result,
851 const char *from, const char *to, unsigned long int width)
853 struct charseq *from_val;
854 struct charseq *to_val;
856 from_val = charmap_find_value (result, from, strlen (from));
857 if (from_val == NULL)
859 lr_error (cmfile, _("unknown character `%s'"), from);
860 return;
863 if (to == NULL)
864 to_val = from_val;
865 else
867 to_val = charmap_find_value (result, to, strlen (to));
868 if (to_val == NULL)
870 lr_error (cmfile, _("unknown character `%s'"), to);
871 return;
875 if (result->nwidth_rules >= result->nwidth_rules_max)
877 size_t new_size = result->nwidth_rules + 32;
878 struct width_rule *new_rules =
879 (struct width_rule *) obstack_alloc (&result->mem_pool,
880 (new_size
881 * sizeof (struct width_rule)));
883 memcpy (new_rules, result->width_rules,
884 result->nwidth_rules_max * sizeof (struct width_rule));
886 result->width_rules = new_rules;
887 result->nwidth_rules_max = new_size;
890 result->width_rules[result->nwidth_rules].from = from_val;
891 result->width_rules[result->nwidth_rules].to = to_val;
892 result->width_rules[result->nwidth_rules].width = (unsigned int) width;
893 ++result->nwidth_rules;
897 struct charseq *
898 charmap_find_value (const struct charmap_t *cm, const char *name, size_t len)
900 void *result;
902 return (find_entry ((hash_table *) &cm->char_table, name, len, &result)
903 < 0 ? NULL : (struct charseq *) result);
907 static void
908 charmap_new_char (struct linereader *lr, struct charmap_t *cm,
909 int nbytes, char *bytes, const char *from, const char *to,
910 int decimal_ellipsis, int step)
912 hash_table *ht = &cm->char_table;
913 hash_table *bt = &cm->byte_table;
914 struct obstack *ob = &cm->mem_pool;
915 char *from_end;
916 char *to_end;
917 const char *cp;
918 int prefix_len, len1, len2;
919 unsigned int from_nr, to_nr, cnt;
920 struct charseq *newp;
922 len1 = strlen (from);
924 if (to == NULL)
926 newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
927 newp->nbytes = nbytes;
928 memcpy (newp->bytes, bytes, nbytes);
929 newp->name = from;
931 newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
932 if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9))
934 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
935 xxxx and xxxxxxxx are hexadecimal numbers. In this case
936 we use the value of xxxx or xxxxxxxx as the UCS4 value of
937 this character and we don't have to consult the repertoire
938 map.
940 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
941 and xxxxxxxx also give the code point in UCS4 but this must
942 be in the private, i.e., unassigned, area. This should be
943 used for characters which do not (yet) have an equivalent
944 in ISO 10646 and Unicode. */
945 char *endp;
947 errno = 0;
948 newp->ucs4 = strtoul (from + 1, &endp, 16);
949 if (endp - from != len1
950 || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
951 || newp->ucs4 >= 0x80000000)
952 /* This wasn't successful. Signal this name cannot be a
953 correct UCS value. */
954 newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
957 insert_entry (ht, from, len1, newp);
958 insert_entry (bt, newp->bytes, nbytes, newp);
959 /* Please note that it isn't a bug if a symbol is defined more
960 than once. All later definitions are simply discarded. */
961 return;
964 /* We have a range: the names must have names with equal prefixes
965 and an equal number of digits, where the second number is greater
966 or equal than the first. */
967 len2 = strlen (to);
969 if (len1 != len2)
971 illegal_range:
972 lr_error (lr, _("invalid names for character range"));
973 return;
976 cp = &from[len1 - 1];
977 if (decimal_ellipsis)
978 while (isdigit (*cp) && cp >= from)
979 --cp;
980 else
981 while (isxdigit (*cp) && cp >= from)
983 if (!isdigit (*cp) && !isupper (*cp))
984 lr_error (lr, _("\
985 hexadecimal range format should use only capital characters"));
986 --cp;
989 prefix_len = (cp - from) + 1;
991 if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
992 goto illegal_range;
994 errno = 0;
995 from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16);
996 if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE)
997 || ((to_nr = strtoul (&to[prefix_len], &to_end,
998 decimal_ellipsis ? 10 : 16)) == ULONG_MAX
999 && errno == ERANGE)
1000 || *to_end != '\0')
1002 lr_error (lr, _("<%s> and <%s> are illegal names for range"), from, to);
1003 return;
1006 if (from_nr > to_nr)
1008 lr_error (lr, _("upper limit in range is not higher then lower limit"));
1009 return;
1012 for (cnt = from_nr; cnt <= to_nr; cnt += step)
1014 char *name_end;
1015 obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X",
1016 prefix_len, from, len1 - prefix_len, cnt);
1017 obstack_1grow (ob, '\0');
1018 name_end = obstack_finish (ob);
1020 newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes);
1021 newp->nbytes = nbytes;
1022 memcpy (newp->bytes, bytes, nbytes);
1023 newp->name = name_end;
1025 newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
1026 if ((name_end[0] == 'U' || name_end[0] == 'P')
1027 && (len1 == 5 || len1 == 9))
1029 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
1030 xxxx and xxxxxxxx are hexadecimal numbers. In this case
1031 we use the value of xxxx or xxxxxxxx as the UCS4 value of
1032 this character and we don't have to consult the repertoire
1033 map.
1035 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
1036 and xxxxxxxx also give the code point in UCS4 but this must
1037 be in the private, i.e., unassigned, area. This should be
1038 used for characters which do not (yet) have an equivalent
1039 in ISO 10646 and Unicode. */
1040 char *endp;
1042 errno = 0;
1043 newp->ucs4 = strtoul (name_end, &endp, 16);
1044 if (endp - name_end != len1
1045 || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
1046 || newp->ucs4 >= 0x80000000)
1047 /* This wasn't successful. Signal this name cannot be a
1048 correct UCS value. */
1049 newp->ucs4 = UNINITIALIZED_CHAR_VALUE;
1052 insert_entry (ht, name_end, len1, newp);
1053 insert_entry (bt, newp->bytes, nbytes, newp);
1054 /* Please note we don't examine the return value since it is no error
1055 if we have two definitions for a symbol. */
1057 /* Increment the value in the byte sequence. */
1058 if (++bytes[nbytes - 1] == '\0')
1060 int b = nbytes - 2;
1063 if (b < 0)
1065 lr_error (lr,
1066 _("resulting bytes for range not representable."));
1067 return;
1069 while (++bytes[b--] == 0);
1075 struct charseq *
1076 charmap_find_symbol (const struct charmap_t *cm, const char *bytes,
1077 size_t nbytes)
1079 void *result;
1081 return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result)
1082 < 0 ? NULL : (struct charseq *) result);