1 /* Copyright (C) 1996,1998,1999,2000,2001 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
34 #include "linereader.h"
36 #include "charmap-dir.h"
37 #include "repertoire.h"
42 /* Define the lookup function. */
43 #include "charmap-kw.h"
46 extern void *xmalloc (size_t __n
);
48 /* Prototypes for local functions. */
49 static struct charmap_t
*parse_charmap (struct linereader
*cmfile
,
50 int verbose
, int be_quiet
);
51 static void new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
52 const char *from
, const char *to
,
53 unsigned long int width
);
54 static void charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
55 int nbytes
, char *bytes
, const char *from
,
56 const char *to
, int decimal_ellipsis
, int step
);
59 #ifdef NEED_NULL_POINTER
60 static const char *null_pointer
;
63 static struct linereader
*
64 cmlr_open (const char *directory
, const char *name
, kw_hash_fct_t hf
)
68 fp
= charmap_open (directory
, name
);
73 size_t dlen
= strlen (directory
);
74 int add_slash
= (dlen
== 0 || directory
[dlen
- 1] != '/');
75 size_t nlen
= strlen (name
);
79 pathname
= alloca (dlen
+ add_slash
+ nlen
+ 1);
80 p
= stpcpy (pathname
, directory
);
85 return lr_create (fp
, pathname
, hf
);
90 charmap_read (const char *filename
, int verbose
, int be_quiet
, int use_default
)
92 struct charmap_t
*result
= NULL
;
96 struct linereader
*cmfile
;
98 /* First try the name as found in the parameter. */
99 cmfile
= lr_open (filename
, charmap_hash
);
102 /* No successful. So start looking through the directories
103 in the I18NPATH if this is a simple name. */
104 if (strchr (filename
, '/') == NULL
)
106 char *i18npath
= getenv ("I18NPATH");
107 if (i18npath
!= NULL
&& *i18npath
!= '\0')
109 char path
[strlen (i18npath
) + sizeof ("/charmaps")];
111 i18npath
= strdupa (i18npath
);
113 while (cmfile
== NULL
114 && (next
= strsep (&i18npath
, ":")) != NULL
)
116 stpcpy (stpcpy (path
, next
), "/charmaps");
117 cmfile
= cmlr_open (path
, filename
, charmap_hash
);
120 /* Try without the "/charmaps" part. */
121 cmfile
= cmlr_open (next
, filename
, charmap_hash
);
126 /* Try the default directory. */
127 cmfile
= cmlr_open (CHARMAP_PATH
, filename
, charmap_hash
);
133 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
135 if (result
== NULL
&& !be_quiet
)
136 error (0, errno
, _("character map file `%s' not found"), filename
);
140 if (result
== NULL
&& filename
!= NULL
&& strchr (filename
, '/') == NULL
)
142 /* OK, one more try. We also accept the names given to the
143 character sets in the files. Sometimes they differ from the
147 dir
= charmap_opendir (CHARMAP_PATH
);
152 while ((dirent
= charmap_readdir (dir
)) != NULL
)
158 aliases
= charmap_aliases (CHARMAP_PATH
, dirent
);
160 for (p
= aliases
; *p
; p
++)
161 if (strcasecmp (*p
, filename
) == 0)
166 charmap_free_aliases (aliases
);
170 struct linereader
*cmfile
;
172 cmfile
= cmlr_open (CHARMAP_PATH
, dirent
, charmap_hash
);
174 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
180 charmap_closedir (dir
);
184 if (result
== NULL
&& DEFAULT_CHARMAP
!= NULL
)
186 struct linereader
*cmfile
;
188 cmfile
= cmlr_open (CHARMAP_PATH
, DEFAULT_CHARMAP
, charmap_hash
);
190 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
193 error (4, errno
, _("default character map file `%s' not found"),
197 /* Test of ASCII compatibility of locale encoding.
199 Verify that the encoding to be used in a locale is ASCII compatible,
200 at least for the graphic characters, excluding the control characters,
201 '$' and '@'. This constraint comes from an ISO C 99 restriction.
203 ISO C 99 section 7.17.(2) (about wchar_t):
204 the null character shall have the code value zero and each member of
205 the basic character set shall have a code value equal to its value
206 when used as the lone character in an integer character constant.
207 ISO C 99 section 5.2.1.(3):
208 Both the basic source and basic execution character sets shall have
209 the following members: the 26 uppercase letters of the Latin alphabet
210 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
211 the 26 lowercase letters of the Latin alphabet
212 a b c d e f g h i j k l m n o p q r s t u v w x y z
213 the 10 decimal digits
215 the following 29 graphic characters
216 ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
217 the space character, and control characters representing horizontal
218 tab, vertical tab, and form feed.
220 Therefore, for all members of the "basic character set", the 'char' code
221 must have the same value as the 'wchar_t' code, which in glibc is the
222 same as the Unicode code, which for all of the enumerated characters
223 is identical to the ASCII code. */
224 if (result
!= NULL
&& use_default
)
226 static const char basic_charset
[] =
228 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
229 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
230 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
231 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
232 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
233 '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
234 '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
235 '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
238 const char *p
= basic_charset
;
242 struct charseq
* seq
= charmap_find_symbol (result
, p
, 1);
244 if (seq
== NULL
|| seq
->ucs4
!= *p
)
247 while (*p
++ != '\0');
250 fprintf (stderr
, _("\
251 character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
252 result
->code_set_name
);
259 static struct charmap_t
*
260 parse_charmap (struct linereader
*cmfile
, int verbose
, int be_quiet
)
262 struct charmap_t
*result
;
264 enum token_t expected_tok
= tok_error
;
265 const char *expected_str
= NULL
;
266 char *from_name
= NULL
;
267 char *to_name
= NULL
;
268 enum token_t ellipsis
= 0;
271 /* We don't want symbolic names in string to be translated. */
272 cmfile
->translate_strings
= 0;
274 /* Allocate room for result. */
275 result
= (struct charmap_t
*) xmalloc (sizeof (struct charmap_t
));
276 memset (result
, '\0', sizeof (struct charmap_t
));
277 /* The default DEFAULT_WIDTH is 1. */
278 result
->width_default
= 1;
280 #define obstack_chunk_alloc malloc
281 #define obstack_chunk_free free
282 obstack_init (&result
->mem_pool
);
284 if (init_hash (&result
->char_table
, 256)
285 || init_hash (&result
->byte_table
, 256))
291 /* We use a state machine to describe the charmap description file
297 struct token
*now
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
298 enum token_t nowtok
= now
->tok
;
301 if (nowtok
== tok_eof
)
307 /* The beginning. We expect the special declarations, EOL or
309 if (nowtok
== tok_eol
)
310 /* Ignore empty lines. */
313 if (nowtok
== tok_charmap
)
318 /* We have to set up the real work. Fill in some
320 if (result
->mb_cur_max
== 0)
321 result
->mb_cur_max
= 1;
322 if (result
->mb_cur_min
== 0)
323 result
->mb_cur_min
= result
->mb_cur_max
;
324 if (result
->mb_cur_min
> result
->mb_cur_max
)
328 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
331 result
->mb_cur_min
= result
->mb_cur_max
;
334 lr_ignore_rest (cmfile
, 1);
340 if (nowtok
!= tok_code_set_name
&& nowtok
!= tok_mb_cur_max
341 && nowtok
!= tok_mb_cur_min
&& nowtok
!= tok_escape_char
342 && nowtok
!= tok_comment_char
&& nowtok
!= tok_g0esc
343 && nowtok
!= tok_g1esc
&& nowtok
!= tok_g2esc
344 && nowtok
!= tok_g3esc
&& nowtok
!= tok_repertoiremap
345 && nowtok
!= tok_include
)
347 lr_error (cmfile
, _("syntax error in prolog: %s"),
348 _("invalid definition"));
350 lr_ignore_rest (cmfile
, 0);
354 /* We know that we need an argument. */
355 arg
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
359 case tok_code_set_name
:
360 case tok_repertoiremap
:
361 if (arg
->tok
!= tok_ident
&& arg
->tok
!= tok_string
)
364 lr_error (cmfile
, _("syntax error in prolog: %s"),
367 lr_ignore_rest (cmfile
, 0);
371 if (nowtok
== tok_code_set_name
)
372 result
->code_set_name
= obstack_copy0 (&result
->mem_pool
,
373 arg
->val
.str
.startmb
,
376 result
->repertoiremap
= obstack_copy0 (&result
->mem_pool
,
377 arg
->val
.str
.startmb
,
380 lr_ignore_rest (cmfile
, 1);
385 if (arg
->tok
!= tok_number
)
389 && ((nowtok
== tok_mb_cur_max
390 && result
->mb_cur_max
!= 0)
391 || (nowtok
== tok_mb_cur_max
392 && result
->mb_cur_max
!= 0)))
393 lr_error (cmfile
, _("duplicate definition of <%s>"),
394 nowtok
== tok_mb_cur_min
395 ? "mb_cur_min" : "mb_cur_max");
397 if (arg
->val
.num
< 1)
400 _("value for <%s> must be 1 or greater"),
401 nowtok
== tok_mb_cur_min
402 ? "mb_cur_min" : "mb_cur_max");
404 lr_ignore_rest (cmfile
, 0);
407 if ((nowtok
== tok_mb_cur_max
&& result
->mb_cur_min
!= 0
408 && (int) arg
->val
.num
< result
->mb_cur_min
)
409 || (nowtok
== tok_mb_cur_min
&& result
->mb_cur_max
!= 0
410 && (int) arg
->val
.num
> result
->mb_cur_max
))
412 lr_error (cmfile
, _("\
413 value of <%s> must be greater or equal than the value of <%s>"),
414 "mb_cur_max", "mb_cur_min");
416 lr_ignore_rest (cmfile
, 0);
420 if (nowtok
== tok_mb_cur_max
)
421 result
->mb_cur_max
= arg
->val
.num
;
423 result
->mb_cur_min
= arg
->val
.num
;
425 lr_ignore_rest (cmfile
, 1);
428 case tok_escape_char
:
429 case tok_comment_char
:
430 if (arg
->tok
!= tok_ident
)
433 if (arg
->val
.str
.lenmb
!= 1)
435 lr_error (cmfile
, _("\
436 argument to <%s> must be a single character"),
437 nowtok
== tok_escape_char
? "escape_char"
440 lr_ignore_rest (cmfile
, 0);
444 if (nowtok
== tok_escape_char
)
445 cmfile
->escape_char
= *arg
->val
.str
.startmb
;
447 cmfile
->comment_char
= *arg
->val
.str
.startmb
;
449 lr_ignore_rest (cmfile
, 1);
457 lr_ignore_rest (cmfile
, 0); /* XXX */
461 lr_error (cmfile
, _("\
462 character sets with locking states are not supported"));
467 assert (! "Should not happen");
472 /* We have seen `CHARMAP' and now are in the body. Each line
473 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
474 if (nowtok
== tok_eol
)
475 /* Ignore empty lines. */
478 if (nowtok
== tok_end
)
480 expected_tok
= tok_charmap
;
481 expected_str
= "CHARMAP";
486 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
488 lr_error (cmfile
, _("syntax error in %s definition: %s"),
489 "CHARMAP", _("no symbolic name given"));
491 lr_ignore_rest (cmfile
, 0);
495 /* If the previous line was not completely correct free the
497 if (from_name
!= NULL
)
498 obstack_free (&result
->mem_pool
, from_name
);
500 if (nowtok
== tok_bsymbol
)
501 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
502 now
->val
.str
.startmb
,
506 obstack_printf (&result
->mem_pool
, "U%08X",
507 cmfile
->token
.val
.ucs4
);
508 obstack_1grow (&result
->mem_pool
, '\0');
509 from_name
= (char *) obstack_finish (&result
->mem_pool
);
517 /* We have two possibilities: We can see an ellipsis or an
519 if (nowtok
== tok_ellipsis3
|| nowtok
== tok_ellipsis4
520 || nowtok
== tok_ellipsis2
|| nowtok
== tok_ellipsis4_2
521 || nowtok
== tok_ellipsis2_2
)
524 if (nowtok
== tok_ellipsis4_2
)
527 nowtok
= tok_ellipsis4
;
529 else if (nowtok
== tok_ellipsis2_2
)
532 nowtok
= tok_ellipsis2
;
540 if (nowtok
!= tok_charcode
)
542 lr_error (cmfile
, _("syntax error in %s definition: %s"),
543 "CHARMAP", _("invalid encoding given"));
545 lr_ignore_rest (cmfile
, 0);
551 if (now
->val
.charcode
.nbytes
< result
->mb_cur_min
)
552 lr_error (cmfile
, _("too few bytes in character encoding"));
553 else if (now
->val
.charcode
.nbytes
> result
->mb_cur_max
)
554 lr_error (cmfile
, _("too many bytes in character encoding"));
556 charmap_new_char (cmfile
, result
, now
->val
.charcode
.nbytes
,
557 now
->val
.charcode
.bytes
, from_name
, to_name
,
558 ellipsis
!= tok_ellipsis2
, step
);
560 /* Ignore trailing comment silently. */
561 lr_ignore_rest (cmfile
, 0);
572 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
574 lr_error (cmfile
, _("syntax error in %s definition: %s"),
576 _("no symbolic name given for end of range"));
578 lr_ignore_rest (cmfile
, 0);
582 /* Copy the to-name in a safe place. */
583 if (nowtok
== tok_bsymbol
)
584 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
585 cmfile
->token
.val
.str
.startmb
,
586 cmfile
->token
.val
.str
.lenmb
);
589 obstack_printf (&result
->mem_pool
, "U%08X",
590 cmfile
->token
.val
.ucs4
);
591 obstack_1grow (&result
->mem_pool
, '\0');
592 to_name
= (char *) obstack_finish (&result
->mem_pool
);
599 if (nowtok
!= expected_tok
)
600 lr_error (cmfile
, _("\
601 `%1$s' definition does not end with `END %1$s'"), expected_str
);
603 lr_ignore_rest (cmfile
, nowtok
== expected_tok
);
608 /* Waiting for WIDTH... */
609 if (nowtok
== tok_eol
)
610 /* Ignore empty lines. */
613 if (nowtok
== tok_width_default
)
619 if (nowtok
== tok_width
)
621 lr_ignore_rest (cmfile
, 1);
626 if (nowtok
== tok_width_variable
)
628 lr_ignore_rest (cmfile
, 1);
633 lr_error (cmfile
, _("\
634 only WIDTH definitions are allowed to follow the CHARMAP definition"));
636 lr_ignore_rest (cmfile
, 0);
640 if (nowtok
!= tok_number
)
641 lr_error (cmfile
, _("value for %s must be an integer"),
644 result
->width_default
= now
->val
.num
;
646 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
652 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
654 if (nowtok
== tok_eol
)
655 /* ignore empty lines. */
658 if (nowtok
== tok_end
)
660 expected_tok
= tok_width
;
661 expected_str
= "WIDTH";
666 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
668 lr_error (cmfile
, _("syntax error in %s definition: %s"),
669 "WIDTH", _("no symbolic name given"));
671 lr_ignore_rest (cmfile
, 0);
675 if (from_name
!= NULL
)
676 obstack_free (&result
->mem_pool
, from_name
);
678 if (nowtok
== tok_bsymbol
)
679 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
680 now
->val
.str
.startmb
,
684 obstack_printf (&result
->mem_pool
, "U%08X",
685 cmfile
->token
.val
.ucs4
);
686 obstack_1grow (&result
->mem_pool
, '\0');
687 from_name
= (char *) obstack_finish (&result
->mem_pool
);
696 if (nowtok
== tok_ellipsis3
)
703 if (nowtok
!= tok_number
)
704 lr_error (cmfile
, _("value for %s must be an integer"),
708 /* Store width for chars. */
709 new_width (cmfile
, result
, from_name
, to_name
, now
->val
.num
);
715 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
721 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
723 lr_error (cmfile
, _("syntax error in %s definition: %s"),
724 "WIDTH", _("no symbolic name given for end of range"));
726 lr_ignore_rest (cmfile
, 0);
732 if (nowtok
== tok_bsymbol
)
733 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
734 now
->val
.str
.startmb
,
738 obstack_printf (&result
->mem_pool
, "U%08X",
739 cmfile
->token
.val
.ucs4
);
740 obstack_1grow (&result
->mem_pool
, '\0');
741 to_name
= (char *) obstack_finish (&result
->mem_pool
);
748 /* We now expect `END WIDTH_VARIABLE' or lines of the format
749 "%s\n" or "%s...%s\n". */
750 if (nowtok
== tok_eol
)
751 /* ignore empty lines. */
754 if (nowtok
== tok_end
)
756 expected_tok
= tok_width_variable
;
757 expected_str
= "WIDTH_VARIABLE";
762 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
764 lr_error (cmfile
, _("syntax error in %s definition: %s"),
765 "WIDTH_VARIABLE", _("no symbolic name given"));
767 lr_ignore_rest (cmfile
, 0);
772 if (from_name
!= NULL
)
773 obstack_free (&result
->mem_pool
, from_name
);
775 if (nowtok
== tok_bsymbol
)
776 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
777 now
->val
.str
.startmb
,
781 obstack_printf (&result
->mem_pool
, "U%08X",
782 cmfile
->token
.val
.ucs4
);
783 obstack_1grow (&result
->mem_pool
, '\0');
784 from_name
= (char *) obstack_finish (&result
->mem_pool
);
792 if (nowtok
== tok_ellipsis3
)
803 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
805 lr_error (cmfile
, _("syntax error in %s definition: %s"),
807 _("no symbolic name given for end of range"));
808 lr_ignore_rest (cmfile
, 0);
812 if (nowtok
== tok_bsymbol
)
813 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
814 now
->val
.str
.startmb
,
818 obstack_printf (&result
->mem_pool
, "U%08X",
819 cmfile
->token
.val
.ucs4
);
820 obstack_1grow (&result
->mem_pool
, '\0');
821 to_name
= (char *) obstack_finish (&result
->mem_pool
);
824 /* XXX Enter value into table. */
826 lr_ignore_rest (cmfile
, 1);
832 error (5, 0, _("%s: error in state machine"), __FILE__
);
838 if (state
!= 91 && !be_quiet
)
839 error (0, 0, _("%s: premature end of file"), cmfile
->fname
);
848 new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
849 const char *from
, const char *to
, unsigned long int width
)
851 struct charseq
*from_val
;
852 struct charseq
*to_val
;
854 from_val
= charmap_find_value (result
, from
, strlen (from
));
855 if (from_val
== NULL
)
857 lr_error (cmfile
, _("unknown character `%s'"), from
);
865 to_val
= charmap_find_value (result
, to
, strlen (to
));
868 lr_error (cmfile
, _("unknown character `%s'"), to
);
873 if (result
->nwidth_rules
>= result
->nwidth_rules_max
)
875 size_t new_size
= result
->nwidth_rules
+ 32;
876 struct width_rule
*new_rules
=
877 (struct width_rule
*) obstack_alloc (&result
->mem_pool
,
879 * sizeof (struct width_rule
)));
881 memcpy (new_rules
, result
->width_rules
,
882 result
->nwidth_rules_max
* sizeof (struct width_rule
));
884 result
->width_rules
= new_rules
;
885 result
->nwidth_rules_max
= new_size
;
888 result
->width_rules
[result
->nwidth_rules
].from
= from_val
;
889 result
->width_rules
[result
->nwidth_rules
].to
= to_val
;
890 result
->width_rules
[result
->nwidth_rules
].width
= (unsigned int) width
;
891 ++result
->nwidth_rules
;
896 charmap_find_value (const struct charmap_t
*cm
, const char *name
, size_t len
)
900 return (find_entry ((hash_table
*) &cm
->char_table
, name
, len
, &result
)
901 < 0 ? NULL
: (struct charseq
*) result
);
906 charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
907 int nbytes
, char *bytes
, const char *from
, const char *to
,
908 int decimal_ellipsis
, int step
)
910 hash_table
*ht
= &cm
->char_table
;
911 hash_table
*bt
= &cm
->byte_table
;
912 struct obstack
*ob
= &cm
->mem_pool
;
916 int prefix_len
, len1
, len2
;
917 unsigned int from_nr
, to_nr
, cnt
;
918 struct charseq
*newp
;
920 len1
= strlen (from
);
924 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
925 newp
->nbytes
= nbytes
;
926 memcpy (newp
->bytes
, bytes
, nbytes
);
929 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
930 if ((from
[0] == 'U' || from
[0] == 'P') && (len1
== 5 || len1
== 9))
932 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
933 xxxx and xxxxxxxx are hexadecimal numbers. In this case
934 we use the value of xxxx or xxxxxxxx as the UCS4 value of
935 this character and we don't have to consult the repertoire
938 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
939 and xxxxxxxx also give the code point in UCS4 but this must
940 be in the private, i.e., unassigned, area. This should be
941 used for characters which do not (yet) have an equivalent
942 in ISO 10646 and Unicode. */
946 newp
->ucs4
= strtoul (from
+ 1, &endp
, 16);
947 if (endp
- from
!= len1
948 || (newp
->ucs4
== ULONG_MAX
&& errno
== ERANGE
)
949 || newp
->ucs4
>= 0x80000000)
950 /* This wasn't successful. Signal this name cannot be a
951 correct UCS value. */
952 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
955 insert_entry (ht
, from
, len1
, newp
);
956 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
957 /* Please note that it isn't a bug if a symbol is defined more
958 than once. All later definitions are simply discarded. */
962 /* We have a range: the names must have names with equal prefixes
963 and an equal number of digits, where the second number is greater
964 or equal than the first. */
970 lr_error (lr
, _("invalid names for character range"));
974 cp
= &from
[len1
- 1];
975 if (decimal_ellipsis
)
976 while (isdigit (*cp
) && cp
>= from
)
979 while (isxdigit (*cp
) && cp
>= from
)
981 if (!isdigit (*cp
) && !isupper (*cp
))
983 hexadecimal range format should use only capital characters"));
987 prefix_len
= (cp
- from
) + 1;
989 if (cp
== &from
[len1
- 1] || strncmp (from
, to
, prefix_len
) != 0)
993 from_nr
= strtoul (&from
[prefix_len
], &from_end
, decimal_ellipsis
? 10 : 16);
994 if (*from_end
!= '\0' || (from_nr
== ULONG_MAX
&& errno
== ERANGE
)
995 || ((to_nr
= strtoul (&to
[prefix_len
], &to_end
,
996 decimal_ellipsis
? 10 : 16)) == ULONG_MAX
1000 lr_error (lr
, _("<%s> and <%s> are illegal names for range"), from
, to
);
1004 if (from_nr
> to_nr
)
1006 lr_error (lr
, _("upper limit in range is not higher then lower limit"));
1010 for (cnt
= from_nr
; cnt
<= to_nr
; cnt
+= step
)
1013 obstack_printf (ob
, decimal_ellipsis
? "%.*s%0*d" : "%.*s%0*X",
1014 prefix_len
, from
, len1
- prefix_len
, cnt
);
1015 obstack_1grow (ob
, '\0');
1016 name_end
= obstack_finish (ob
);
1018 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
1019 newp
->nbytes
= nbytes
;
1020 memcpy (newp
->bytes
, bytes
, nbytes
);
1021 newp
->name
= name_end
;
1023 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1024 if ((name_end
[0] == 'U' || name_end
[0] == 'P')
1025 && (len1
== 5 || len1
== 9))
1027 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
1028 xxxx and xxxxxxxx are hexadecimal numbers. In this case
1029 we use the value of xxxx or xxxxxxxx as the UCS4 value of
1030 this character and we don't have to consult the repertoire
1033 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
1034 and xxxxxxxx also give the code point in UCS4 but this must
1035 be in the private, i.e., unassigned, area. This should be
1036 used for characters which do not (yet) have an equivalent
1037 in ISO 10646 and Unicode. */
1041 newp
->ucs4
= strtoul (name_end
+ 1, &endp
, 16);
1042 if (endp
- name_end
!= len1
1043 || (newp
->ucs4
== ULONG_MAX
&& errno
== ERANGE
)
1044 || newp
->ucs4
>= 0x80000000)
1045 /* This wasn't successful. Signal this name cannot be a
1046 correct UCS value. */
1047 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1050 insert_entry (ht
, name_end
, len1
, newp
);
1051 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
1052 /* Please note we don't examine the return value since it is no error
1053 if we have two definitions for a symbol. */
1055 /* Increment the value in the byte sequence. */
1056 if (++bytes
[nbytes
- 1] == '\0')
1064 _("resulting bytes for range not representable."));
1067 while (++bytes
[b
--] == 0);
1074 charmap_find_symbol (const struct charmap_t
*cm
, const char *bytes
,
1079 return (find_entry ((hash_table
*) &cm
->byte_table
, bytes
, nbytes
, &result
)
1080 < 0 ? NULL
: (struct charseq
*) result
);