1 /* Copyright (C) 1996,1998,1999,2000,2001 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
34 #include "linereader.h"
36 #include "charmap-dir.h"
37 #include "repertoire.h"
42 /* Define the lookup function. */
43 #include "charmap-kw.h"
46 extern void *xmalloc (size_t __n
);
48 /* Prototypes for local functions. */
49 static struct charmap_t
*parse_charmap (struct linereader
*cmfile
,
50 int verbose
, int be_quiet
);
51 static void new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
52 const char *from
, const char *to
,
53 unsigned long int width
);
54 static void charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
55 int nbytes
, char *bytes
, const char *from
,
56 const char *to
, int decimal_ellipsis
, int step
);
59 static const char *null_pointer
;
61 static struct linereader
*
62 cmlr_open (const char *directory
, const char *name
, kw_hash_fct_t hf
)
66 fp
= charmap_open (directory
, name
);
71 size_t dlen
= strlen (directory
);
72 int add_slash
= (dlen
== 0 || directory
[dlen
- 1] != '/');
73 size_t nlen
= strlen (name
);
77 pathname
= alloca (dlen
+ add_slash
+ nlen
+ 1);
78 p
= stpcpy (pathname
, directory
);
83 return lr_create (fp
, pathname
, hf
);
88 charmap_read (const char *filename
, int verbose
, int be_quiet
, int use_default
)
90 struct charmap_t
*result
= NULL
;
94 struct linereader
*cmfile
;
96 /* First try the name as found in the parameter. */
97 cmfile
= lr_open (filename
, charmap_hash
);
100 /* No successful. So start looking through the directories
101 in the I18NPATH if this is a simple name. */
102 if (strchr (filename
, '/') == NULL
)
104 char *i18npath
= getenv ("I18NPATH");
105 if (i18npath
!= NULL
&& *i18npath
!= '\0')
107 char path
[strlen (i18npath
) + sizeof ("/charmaps")];
109 i18npath
= strdupa (i18npath
);
111 while (cmfile
== NULL
112 && (next
= strsep (&i18npath
, ":")) != NULL
)
114 stpcpy (stpcpy (path
, next
), "/charmaps");
115 cmfile
= cmlr_open (path
, filename
, charmap_hash
);
119 /* Try without the "/charmaps" part. */
120 cmfile
= cmlr_open (next
, filename
, charmap_hash
);
127 /* Try the default directory. */
128 cmfile
= cmlr_open (CHARMAP_PATH
, filename
, charmap_hash
);
135 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
137 if (result
== NULL
&& !be_quiet
)
138 error (0, errno
, _("character map file `%s' not found"), filename
);
142 if (result
== NULL
&& filename
!= NULL
&& strchr (filename
, '/') == NULL
)
144 /* OK, one more try. We also accept the names given to the
145 character sets in the files. Sometimes they differ from the
149 dir
= charmap_opendir (CHARMAP_PATH
);
154 while ((dirent
= charmap_readdir (dir
)) != NULL
)
160 aliases
= charmap_aliases (CHARMAP_PATH
, dirent
);
162 for (p
= aliases
; *p
; p
++)
163 if (strcasecmp (*p
, filename
) == 0)
168 charmap_free_aliases (aliases
);
172 struct linereader
*cmfile
;
174 cmfile
= cmlr_open (CHARMAP_PATH
, dirent
, charmap_hash
);
176 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
182 charmap_closedir (dir
);
186 if (result
== NULL
&& DEFAULT_CHARMAP
!= NULL
)
188 struct linereader
*cmfile
;
190 cmfile
= cmlr_open (CHARMAP_PATH
, DEFAULT_CHARMAP
, charmap_hash
);
192 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
195 error (4, errno
, _("default character map file `%s' not found"),
199 /* Test of ASCII compatibility of locale encoding.
201 Verify that the encoding to be used in a locale is ASCII compatible,
202 at least for the graphic characters, excluding the control characters,
203 '$' and '@'. This constraint comes from an ISO C 99 restriction.
205 ISO C 99 section 7.17.(2) (about wchar_t):
206 the null character shall have the code value zero and each member of
207 the basic character set shall have a code value equal to its value
208 when used as the lone character in an integer character constant.
209 ISO C 99 section 5.2.1.(3):
210 Both the basic source and basic execution character sets shall have
211 the following members: the 26 uppercase letters of the Latin alphabet
212 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
213 the 26 lowercase letters of the Latin alphabet
214 a b c d e f g h i j k l m n o p q r s t u v w x y z
215 the 10 decimal digits
217 the following 29 graphic characters
218 ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
219 the space character, and control characters representing horizontal
220 tab, vertical tab, and form feed.
222 Therefore, for all members of the "basic character set", the 'char' code
223 must have the same value as the 'wchar_t' code, which in glibc is the
224 same as the Unicode code, which for all of the enumerated characters
225 is identical to the ASCII code. */
226 if (result
!= NULL
&& use_default
)
228 static const char basic_charset
[] =
230 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
231 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
232 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
233 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
234 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
235 '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
236 '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
237 '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
240 const char *p
= basic_charset
;
244 struct charseq
* seq
= charmap_find_symbol (result
, p
, 1);
246 if (seq
== NULL
|| seq
->ucs4
!= *p
)
249 while (*p
++ != '\0');
252 fprintf (stderr
, _("\
253 character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
254 result
->code_set_name
);
261 static struct charmap_t
*
262 parse_charmap (struct linereader
*cmfile
, int verbose
, int be_quiet
)
264 struct charmap_t
*result
;
266 enum token_t expected_tok
= tok_error
;
267 const char *expected_str
= NULL
;
268 char *from_name
= NULL
;
269 char *to_name
= NULL
;
270 enum token_t ellipsis
= 0;
273 /* We don't want symbolic names in string to be translated. */
274 cmfile
->translate_strings
= 0;
276 /* Allocate room for result. */
277 result
= (struct charmap_t
*) xmalloc (sizeof (struct charmap_t
));
278 memset (result
, '\0', sizeof (struct charmap_t
));
279 /* The default DEFAULT_WIDTH is 1. */
280 result
->width_default
= 1;
282 #define obstack_chunk_alloc malloc
283 #define obstack_chunk_free free
284 obstack_init (&result
->mem_pool
);
286 if (init_hash (&result
->char_table
, 256)
287 || init_hash (&result
->byte_table
, 256))
293 /* We use a state machine to describe the charmap description file
299 struct token
*now
= lr_token (cmfile
, NULL
, NULL
, verbose
);
300 enum token_t nowtok
= now
->tok
;
303 if (nowtok
== tok_eof
)
309 /* The beginning. We expect the special declarations, EOL or
311 if (nowtok
== tok_eol
)
312 /* Ignore empty lines. */
315 if (nowtok
== tok_charmap
)
320 /* We have to set up the real work. Fill in some
322 if (result
->mb_cur_max
== 0)
323 result
->mb_cur_max
= 1;
324 if (result
->mb_cur_min
== 0)
325 result
->mb_cur_min
= result
->mb_cur_max
;
326 if (result
->mb_cur_min
> result
->mb_cur_max
)
330 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
333 result
->mb_cur_min
= result
->mb_cur_max
;
336 lr_ignore_rest (cmfile
, 1);
342 if (nowtok
!= tok_code_set_name
&& nowtok
!= tok_mb_cur_max
343 && nowtok
!= tok_mb_cur_min
&& nowtok
!= tok_escape_char
344 && nowtok
!= tok_comment_char
&& nowtok
!= tok_g0esc
345 && nowtok
!= tok_g1esc
&& nowtok
!= tok_g2esc
346 && nowtok
!= tok_g3esc
&& nowtok
!= tok_repertoiremap
347 && nowtok
!= tok_include
)
349 lr_error (cmfile
, _("syntax error in prolog: %s"),
350 _("invalid definition"));
352 lr_ignore_rest (cmfile
, 0);
356 /* We know that we need an argument. */
357 arg
= lr_token (cmfile
, NULL
, NULL
, verbose
);
361 case tok_code_set_name
:
362 case tok_repertoiremap
:
363 if (arg
->tok
!= tok_ident
&& arg
->tok
!= tok_string
)
366 lr_error (cmfile
, _("syntax error in prolog: %s"),
369 lr_ignore_rest (cmfile
, 0);
373 if (nowtok
== tok_code_set_name
)
374 result
->code_set_name
= obstack_copy0 (&result
->mem_pool
,
375 arg
->val
.str
.startmb
,
378 result
->repertoiremap
= obstack_copy0 (&result
->mem_pool
,
379 arg
->val
.str
.startmb
,
382 lr_ignore_rest (cmfile
, 1);
387 if (arg
->tok
!= tok_number
)
391 && ((nowtok
== tok_mb_cur_max
392 && result
->mb_cur_max
!= 0)
393 || (nowtok
== tok_mb_cur_max
394 && result
->mb_cur_max
!= 0)))
395 lr_error (cmfile
, _("duplicate definition of <%s>"),
396 nowtok
== tok_mb_cur_min
397 ? "mb_cur_min" : "mb_cur_max");
399 if (arg
->val
.num
< 1)
402 _("value for <%s> must be 1 or greater"),
403 nowtok
== tok_mb_cur_min
404 ? "mb_cur_min" : "mb_cur_max");
406 lr_ignore_rest (cmfile
, 0);
409 if ((nowtok
== tok_mb_cur_max
&& result
->mb_cur_min
!= 0
410 && (int) arg
->val
.num
< result
->mb_cur_min
)
411 || (nowtok
== tok_mb_cur_min
&& result
->mb_cur_max
!= 0
412 && (int) arg
->val
.num
> result
->mb_cur_max
))
414 lr_error (cmfile
, _("\
415 value of <%s> must be greater or equal than the value of <%s>"),
416 "mb_cur_max", "mb_cur_min");
418 lr_ignore_rest (cmfile
, 0);
422 if (nowtok
== tok_mb_cur_max
)
423 result
->mb_cur_max
= arg
->val
.num
;
425 result
->mb_cur_min
= arg
->val
.num
;
427 lr_ignore_rest (cmfile
, 1);
430 case tok_escape_char
:
431 case tok_comment_char
:
432 if (arg
->tok
!= tok_ident
)
435 if (arg
->val
.str
.lenmb
!= 1)
437 lr_error (cmfile
, _("\
438 argument to <%s> must be a single character"),
439 nowtok
== tok_escape_char
? "escape_char"
442 lr_ignore_rest (cmfile
, 0);
446 if (nowtok
== tok_escape_char
)
447 cmfile
->escape_char
= *arg
->val
.str
.startmb
;
449 cmfile
->comment_char
= *arg
->val
.str
.startmb
;
451 lr_ignore_rest (cmfile
, 1);
459 lr_ignore_rest (cmfile
, 0); /* XXX */
463 lr_error (cmfile
, _("\
464 character sets with locking states are not supported"));
469 assert (! "Should not happen");
474 /* We have seen `CHARMAP' and now are in the body. Each line
475 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
476 if (nowtok
== tok_eol
)
477 /* Ignore empty lines. */
480 if (nowtok
== tok_end
)
482 expected_tok
= tok_charmap
;
483 expected_str
= "CHARMAP";
488 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
490 lr_error (cmfile
, _("syntax error in %s definition: %s"),
491 "CHARMAP", _("no symbolic name given"));
493 lr_ignore_rest (cmfile
, 0);
497 /* If the previous line was not completely correct free the
499 if (from_name
!= NULL
)
500 obstack_free (&result
->mem_pool
, from_name
);
502 if (nowtok
== tok_bsymbol
)
503 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
504 now
->val
.str
.startmb
,
508 obstack_printf (&result
->mem_pool
, "U%08X",
509 cmfile
->token
.val
.ucs4
);
510 obstack_1grow (&result
->mem_pool
, '\0');
511 from_name
= (char *) obstack_finish (&result
->mem_pool
);
519 /* We have two possibilities: We can see an ellipsis or an
521 if (nowtok
== tok_ellipsis3
|| nowtok
== tok_ellipsis4
522 || nowtok
== tok_ellipsis2
|| nowtok
== tok_ellipsis4_2
523 || nowtok
== tok_ellipsis2_2
)
526 if (nowtok
== tok_ellipsis4_2
)
529 nowtok
= tok_ellipsis4
;
531 else if (nowtok
== tok_ellipsis2_2
)
534 nowtok
= tok_ellipsis2
;
542 if (nowtok
!= tok_charcode
)
544 lr_error (cmfile
, _("syntax error in %s definition: %s"),
545 "CHARMAP", _("invalid encoding given"));
547 lr_ignore_rest (cmfile
, 0);
553 if (now
->val
.charcode
.nbytes
< result
->mb_cur_min
)
554 lr_error (cmfile
, _("too few bytes in character encoding"));
555 else if (now
->val
.charcode
.nbytes
> result
->mb_cur_max
)
556 lr_error (cmfile
, _("too many bytes in character encoding"));
558 charmap_new_char (cmfile
, result
, now
->val
.charcode
.nbytes
,
559 now
->val
.charcode
.bytes
, from_name
, to_name
,
560 ellipsis
!= tok_ellipsis2
, step
);
562 /* Ignore trailing comment silently. */
563 lr_ignore_rest (cmfile
, 0);
574 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
576 lr_error (cmfile
, _("syntax error in %s definition: %s"),
578 _("no symbolic name given for end of range"));
580 lr_ignore_rest (cmfile
, 0);
584 /* Copy the to-name in a safe place. */
585 if (nowtok
== tok_bsymbol
)
586 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
587 cmfile
->token
.val
.str
.startmb
,
588 cmfile
->token
.val
.str
.lenmb
);
591 obstack_printf (&result
->mem_pool
, "U%08X",
592 cmfile
->token
.val
.ucs4
);
593 obstack_1grow (&result
->mem_pool
, '\0');
594 to_name
= (char *) obstack_finish (&result
->mem_pool
);
601 if (nowtok
!= expected_tok
)
602 lr_error (cmfile
, _("\
603 `%1$s' definition does not end with `END %1$s'"), expected_str
);
605 lr_ignore_rest (cmfile
, nowtok
== expected_tok
);
610 /* Waiting for WIDTH... */
611 if (nowtok
== tok_eol
)
612 /* Ignore empty lines. */
615 if (nowtok
== tok_width_default
)
621 if (nowtok
== tok_width
)
623 lr_ignore_rest (cmfile
, 1);
628 if (nowtok
== tok_width_variable
)
630 lr_ignore_rest (cmfile
, 1);
635 lr_error (cmfile
, _("\
636 only WIDTH definitions are allowed to follow the CHARMAP definition"));
638 lr_ignore_rest (cmfile
, 0);
642 if (nowtok
!= tok_number
)
643 lr_error (cmfile
, _("value for %s must be an integer"),
646 result
->width_default
= now
->val
.num
;
648 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
654 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
656 if (nowtok
== tok_eol
)
657 /* ignore empty lines. */
660 if (nowtok
== tok_end
)
662 expected_tok
= tok_width
;
663 expected_str
= "WIDTH";
668 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
670 lr_error (cmfile
, _("syntax error in %s definition: %s"),
671 "WIDTH", _("no symbolic name given"));
673 lr_ignore_rest (cmfile
, 0);
677 if (from_name
!= NULL
)
678 obstack_free (&result
->mem_pool
, from_name
);
680 if (nowtok
== tok_bsymbol
)
681 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
682 now
->val
.str
.startmb
,
686 obstack_printf (&result
->mem_pool
, "U%08X",
687 cmfile
->token
.val
.ucs4
);
688 obstack_1grow (&result
->mem_pool
, '\0');
689 from_name
= (char *) obstack_finish (&result
->mem_pool
);
698 if (nowtok
== tok_ellipsis3
)
705 if (nowtok
!= tok_number
)
706 lr_error (cmfile
, _("value for %s must be an integer"),
710 /* Store width for chars. */
711 new_width (cmfile
, result
, from_name
, to_name
, now
->val
.num
);
717 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
723 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
725 lr_error (cmfile
, _("syntax error in %s definition: %s"),
726 "WIDTH", _("no symbolic name given for end of range"));
728 lr_ignore_rest (cmfile
, 0);
734 if (nowtok
== tok_bsymbol
)
735 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
736 now
->val
.str
.startmb
,
740 obstack_printf (&result
->mem_pool
, "U%08X",
741 cmfile
->token
.val
.ucs4
);
742 obstack_1grow (&result
->mem_pool
, '\0');
743 to_name
= (char *) obstack_finish (&result
->mem_pool
);
750 /* We now expect `END WIDTH_VARIABLE' or lines of the format
751 "%s\n" or "%s...%s\n". */
752 if (nowtok
== tok_eol
)
753 /* ignore empty lines. */
756 if (nowtok
== tok_end
)
758 expected_tok
= tok_width_variable
;
759 expected_str
= "WIDTH_VARIABLE";
764 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
766 lr_error (cmfile
, _("syntax error in %s definition: %s"),
767 "WIDTH_VARIABLE", _("no symbolic name given"));
769 lr_ignore_rest (cmfile
, 0);
774 if (from_name
!= NULL
)
775 obstack_free (&result
->mem_pool
, from_name
);
777 if (nowtok
== tok_bsymbol
)
778 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
779 now
->val
.str
.startmb
,
783 obstack_printf (&result
->mem_pool
, "U%08X",
784 cmfile
->token
.val
.ucs4
);
785 obstack_1grow (&result
->mem_pool
, '\0');
786 from_name
= (char *) obstack_finish (&result
->mem_pool
);
794 if (nowtok
== tok_ellipsis3
)
805 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
807 lr_error (cmfile
, _("syntax error in %s definition: %s"),
809 _("no symbolic name given for end of range"));
810 lr_ignore_rest (cmfile
, 0);
814 if (nowtok
== tok_bsymbol
)
815 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
816 now
->val
.str
.startmb
,
820 obstack_printf (&result
->mem_pool
, "U%08X",
821 cmfile
->token
.val
.ucs4
);
822 obstack_1grow (&result
->mem_pool
, '\0');
823 to_name
= (char *) obstack_finish (&result
->mem_pool
);
826 /* XXX Enter value into table. */
828 lr_ignore_rest (cmfile
, 1);
834 error (5, 0, _("%s: error in state machine"), __FILE__
);
840 if (state
!= 91 && !be_quiet
)
841 error (0, 0, _("%s: premature end of file"), cmfile
->fname
);
850 new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
851 const char *from
, const char *to
, unsigned long int width
)
853 struct charseq
*from_val
;
854 struct charseq
*to_val
;
856 from_val
= charmap_find_value (result
, from
, strlen (from
));
857 if (from_val
== NULL
)
859 lr_error (cmfile
, _("unknown character `%s'"), from
);
867 to_val
= charmap_find_value (result
, to
, strlen (to
));
870 lr_error (cmfile
, _("unknown character `%s'"), to
);
875 if (result
->nwidth_rules
>= result
->nwidth_rules_max
)
877 size_t new_size
= result
->nwidth_rules
+ 32;
878 struct width_rule
*new_rules
=
879 (struct width_rule
*) obstack_alloc (&result
->mem_pool
,
881 * sizeof (struct width_rule
)));
883 memcpy (new_rules
, result
->width_rules
,
884 result
->nwidth_rules_max
* sizeof (struct width_rule
));
886 result
->width_rules
= new_rules
;
887 result
->nwidth_rules_max
= new_size
;
890 result
->width_rules
[result
->nwidth_rules
].from
= from_val
;
891 result
->width_rules
[result
->nwidth_rules
].to
= to_val
;
892 result
->width_rules
[result
->nwidth_rules
].width
= (unsigned int) width
;
893 ++result
->nwidth_rules
;
898 charmap_find_value (const struct charmap_t
*cm
, const char *name
, size_t len
)
902 return (find_entry ((hash_table
*) &cm
->char_table
, name
, len
, &result
)
903 < 0 ? NULL
: (struct charseq
*) result
);
908 charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
909 int nbytes
, char *bytes
, const char *from
, const char *to
,
910 int decimal_ellipsis
, int step
)
912 hash_table
*ht
= &cm
->char_table
;
913 hash_table
*bt
= &cm
->byte_table
;
914 struct obstack
*ob
= &cm
->mem_pool
;
918 int prefix_len
, len1
, len2
;
919 unsigned int from_nr
, to_nr
, cnt
;
920 struct charseq
*newp
;
922 len1
= strlen (from
);
926 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
927 newp
->nbytes
= nbytes
;
928 memcpy (newp
->bytes
, bytes
, nbytes
);
931 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
932 if ((from
[0] == 'U' || from
[0] == 'P') && (len1
== 5 || len1
== 9))
934 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
935 xxxx and xxxxxxxx are hexadecimal numbers. In this case
936 we use the value of xxxx or xxxxxxxx as the UCS4 value of
937 this character and we don't have to consult the repertoire
940 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
941 and xxxxxxxx also give the code point in UCS4 but this must
942 be in the private, i.e., unassigned, area. This should be
943 used for characters which do not (yet) have an equivalent
944 in ISO 10646 and Unicode. */
948 newp
->ucs4
= strtoul (from
+ 1, &endp
, 16);
949 if (endp
- from
!= len1
950 || (newp
->ucs4
== ULONG_MAX
&& errno
== ERANGE
)
951 || newp
->ucs4
>= 0x80000000)
952 /* This wasn't successful. Signal this name cannot be a
953 correct UCS value. */
954 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
957 insert_entry (ht
, from
, len1
, newp
);
958 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
959 /* Please note that it isn't a bug if a symbol is defined more
960 than once. All later definitions are simply discarded. */
964 /* We have a range: the names must have names with equal prefixes
965 and an equal number of digits, where the second number is greater
966 or equal than the first. */
972 lr_error (lr
, _("invalid names for character range"));
976 cp
= &from
[len1
- 1];
977 if (decimal_ellipsis
)
978 while (isdigit (*cp
) && cp
>= from
)
981 while (isxdigit (*cp
) && cp
>= from
)
983 if (!isdigit (*cp
) && !isupper (*cp
))
985 hexadecimal range format should use only capital characters"));
989 prefix_len
= (cp
- from
) + 1;
991 if (cp
== &from
[len1
- 1] || strncmp (from
, to
, prefix_len
) != 0)
995 from_nr
= strtoul (&from
[prefix_len
], &from_end
, decimal_ellipsis
? 10 : 16);
996 if (*from_end
!= '\0' || (from_nr
== ULONG_MAX
&& errno
== ERANGE
)
997 || ((to_nr
= strtoul (&to
[prefix_len
], &to_end
,
998 decimal_ellipsis
? 10 : 16)) == ULONG_MAX
1002 lr_error (lr
, _("<%s> and <%s> are illegal names for range"), from
, to
);
1006 if (from_nr
> to_nr
)
1008 lr_error (lr
, _("upper limit in range is not higher then lower limit"));
1012 for (cnt
= from_nr
; cnt
<= to_nr
; cnt
+= step
)
1015 obstack_printf (ob
, decimal_ellipsis
? "%.*s%0*d" : "%.*s%0*X",
1016 prefix_len
, from
, len1
- prefix_len
, cnt
);
1017 obstack_1grow (ob
, '\0');
1018 name_end
= obstack_finish (ob
);
1020 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
1021 newp
->nbytes
= nbytes
;
1022 memcpy (newp
->bytes
, bytes
, nbytes
);
1023 newp
->name
= name_end
;
1025 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1026 if ((name_end
[0] == 'U' || name_end
[0] == 'P')
1027 && (len1
== 5 || len1
== 9))
1029 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
1030 xxxx and xxxxxxxx are hexadecimal numbers. In this case
1031 we use the value of xxxx or xxxxxxxx as the UCS4 value of
1032 this character and we don't have to consult the repertoire
1035 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
1036 and xxxxxxxx also give the code point in UCS4 but this must
1037 be in the private, i.e., unassigned, area. This should be
1038 used for characters which do not (yet) have an equivalent
1039 in ISO 10646 and Unicode. */
1043 newp
->ucs4
= strtoul (name_end
, &endp
, 16);
1044 if (endp
- name_end
!= len1
1045 || (newp
->ucs4
== ULONG_MAX
&& errno
== ERANGE
)
1046 || newp
->ucs4
>= 0x80000000)
1047 /* This wasn't successful. Signal this name cannot be a
1048 correct UCS value. */
1049 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1052 insert_entry (ht
, name_end
, len1
, newp
);
1053 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
1054 /* Please note we don't examine the return value since it is no error
1055 if we have two definitions for a symbol. */
1057 /* Increment the value in the byte sequence. */
1058 if (++bytes
[nbytes
- 1] == '\0')
1066 _("resulting bytes for range not representable."));
1069 while (++bytes
[b
--] == 0);
1076 charmap_find_symbol (const struct charmap_t
*cm
, const char *bytes
,
1081 return (find_entry ((hash_table
*) &cm
->byte_table
, bytes
, nbytes
, &result
)
1082 < 0 ? NULL
: (struct charseq
*) result
);