1 /* Copyright (C) 1996,1998,1999,2000,2001 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
34 #include "linereader.h"
36 #include "charmap-dir.h"
37 #include "repertoire.h"
42 /* Define the lookup function. */
43 #include "charmap-kw.h"
46 extern void *xmalloc (size_t __n
);
48 /* Prototypes for local functions. */
49 static struct charmap_t
*parse_charmap (struct linereader
*cmfile
,
50 int verbose
, int be_quiet
);
51 static void new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
52 const char *from
, const char *to
,
53 unsigned long int width
);
54 static void charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
55 int nbytes
, char *bytes
, const char *from
,
56 const char *to
, int decimal_ellipsis
, int step
);
59 #ifdef NEED_NULL_POINTER
60 static const char *null_pointer
;
63 static struct linereader
*
64 cmlr_open (const char *directory
, const char *name
, kw_hash_fct_t hf
)
68 fp
= charmap_open (directory
, name
);
73 size_t dlen
= strlen (directory
);
74 int add_slash
= (dlen
== 0 || directory
[dlen
- 1] != '/');
75 size_t nlen
= strlen (name
);
79 pathname
= alloca (dlen
+ add_slash
+ nlen
+ 1);
80 p
= stpcpy (pathname
, directory
);
85 return lr_create (fp
, pathname
, hf
);
90 charmap_read (const char *filename
, int verbose
, int be_quiet
, int use_default
)
92 struct charmap_t
*result
= NULL
;
96 struct linereader
*cmfile
;
98 /* First try the name as found in the parameter. */
99 cmfile
= lr_open (filename
, charmap_hash
);
102 /* No successful. So start looking through the directories
103 in the I18NPATH if this is a simple name. */
104 if (strchr (filename
, '/') == NULL
)
106 char *i18npath
= getenv ("I18NPATH");
107 if (i18npath
!= NULL
&& *i18npath
!= '\0')
109 char path
[strlen (i18npath
) + sizeof ("/charmaps")];
111 i18npath
= strdupa (i18npath
);
113 while (cmfile
== NULL
114 && (next
= strsep (&i18npath
, ":")) != NULL
)
116 stpcpy (stpcpy (path
, next
), "/charmaps");
117 cmfile
= cmlr_open (path
, filename
, charmap_hash
);
121 /* Try without the "/charmaps" part. */
122 cmfile
= cmlr_open (next
, filename
, charmap_hash
);
129 /* Try the default directory. */
130 cmfile
= cmlr_open (CHARMAP_PATH
, filename
, charmap_hash
);
137 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
139 if (result
== NULL
&& !be_quiet
)
140 error (0, errno
, _("character map file `%s' not found"), filename
);
144 if (result
== NULL
&& filename
!= NULL
&& strchr (filename
, '/') == NULL
)
146 /* OK, one more try. We also accept the names given to the
147 character sets in the files. Sometimes they differ from the
151 dir
= charmap_opendir (CHARMAP_PATH
);
156 while ((dirent
= charmap_readdir (dir
)) != NULL
)
162 aliases
= charmap_aliases (CHARMAP_PATH
, dirent
);
164 for (p
= aliases
; *p
; p
++)
165 if (strcasecmp (*p
, filename
) == 0)
170 charmap_free_aliases (aliases
);
174 struct linereader
*cmfile
;
176 cmfile
= cmlr_open (CHARMAP_PATH
, dirent
, charmap_hash
);
178 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
184 charmap_closedir (dir
);
188 if (result
== NULL
&& DEFAULT_CHARMAP
!= NULL
)
190 struct linereader
*cmfile
;
192 cmfile
= cmlr_open (CHARMAP_PATH
, DEFAULT_CHARMAP
, charmap_hash
);
194 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
197 error (4, errno
, _("default character map file `%s' not found"),
201 /* Test of ASCII compatibility of locale encoding.
203 Verify that the encoding to be used in a locale is ASCII compatible,
204 at least for the graphic characters, excluding the control characters,
205 '$' and '@'. This constraint comes from an ISO C 99 restriction.
207 ISO C 99 section 7.17.(2) (about wchar_t):
208 the null character shall have the code value zero and each member of
209 the basic character set shall have a code value equal to its value
210 when used as the lone character in an integer character constant.
211 ISO C 99 section 5.2.1.(3):
212 Both the basic source and basic execution character sets shall have
213 the following members: the 26 uppercase letters of the Latin alphabet
214 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
215 the 26 lowercase letters of the Latin alphabet
216 a b c d e f g h i j k l m n o p q r s t u v w x y z
217 the 10 decimal digits
219 the following 29 graphic characters
220 ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
221 the space character, and control characters representing horizontal
222 tab, vertical tab, and form feed.
224 Therefore, for all members of the "basic character set", the 'char' code
225 must have the same value as the 'wchar_t' code, which in glibc is the
226 same as the Unicode code, which for all of the enumerated characters
227 is identical to the ASCII code. */
228 if (result
!= NULL
&& use_default
)
230 static const char basic_charset
[] =
232 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
233 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
234 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
235 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
236 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
237 '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
238 '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
239 '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
242 const char *p
= basic_charset
;
246 struct charseq
* seq
= charmap_find_symbol (result
, p
, 1);
248 if (seq
== NULL
|| seq
->ucs4
!= *p
)
251 while (*p
++ != '\0');
254 fprintf (stderr
, _("\
255 character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
256 result
->code_set_name
);
263 static struct charmap_t
*
264 parse_charmap (struct linereader
*cmfile
, int verbose
, int be_quiet
)
266 struct charmap_t
*result
;
268 enum token_t expected_tok
= tok_error
;
269 const char *expected_str
= NULL
;
270 char *from_name
= NULL
;
271 char *to_name
= NULL
;
272 enum token_t ellipsis
= 0;
275 /* We don't want symbolic names in string to be translated. */
276 cmfile
->translate_strings
= 0;
278 /* Allocate room for result. */
279 result
= (struct charmap_t
*) xmalloc (sizeof (struct charmap_t
));
280 memset (result
, '\0', sizeof (struct charmap_t
));
281 /* The default DEFAULT_WIDTH is 1. */
282 result
->width_default
= 1;
284 #define obstack_chunk_alloc malloc
285 #define obstack_chunk_free free
286 obstack_init (&result
->mem_pool
);
288 if (init_hash (&result
->char_table
, 256)
289 || init_hash (&result
->byte_table
, 256))
295 /* We use a state machine to describe the charmap description file
301 struct token
*now
= lr_token (cmfile
, NULL
, NULL
, verbose
);
302 enum token_t nowtok
= now
->tok
;
305 if (nowtok
== tok_eof
)
311 /* The beginning. We expect the special declarations, EOL or
313 if (nowtok
== tok_eol
)
314 /* Ignore empty lines. */
317 if (nowtok
== tok_charmap
)
322 /* We have to set up the real work. Fill in some
324 if (result
->mb_cur_max
== 0)
325 result
->mb_cur_max
= 1;
326 if (result
->mb_cur_min
== 0)
327 result
->mb_cur_min
= result
->mb_cur_max
;
328 if (result
->mb_cur_min
> result
->mb_cur_max
)
332 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
335 result
->mb_cur_min
= result
->mb_cur_max
;
338 lr_ignore_rest (cmfile
, 1);
344 if (nowtok
!= tok_code_set_name
&& nowtok
!= tok_mb_cur_max
345 && nowtok
!= tok_mb_cur_min
&& nowtok
!= tok_escape_char
346 && nowtok
!= tok_comment_char
&& nowtok
!= tok_g0esc
347 && nowtok
!= tok_g1esc
&& nowtok
!= tok_g2esc
348 && nowtok
!= tok_g3esc
&& nowtok
!= tok_repertoiremap
349 && nowtok
!= tok_include
)
351 lr_error (cmfile
, _("syntax error in prolog: %s"),
352 _("invalid definition"));
354 lr_ignore_rest (cmfile
, 0);
358 /* We know that we need an argument. */
359 arg
= lr_token (cmfile
, NULL
, NULL
, verbose
);
363 case tok_code_set_name
:
364 case tok_repertoiremap
:
365 if (arg
->tok
!= tok_ident
&& arg
->tok
!= tok_string
)
368 lr_error (cmfile
, _("syntax error in prolog: %s"),
371 lr_ignore_rest (cmfile
, 0);
375 if (nowtok
== tok_code_set_name
)
376 result
->code_set_name
= obstack_copy0 (&result
->mem_pool
,
377 arg
->val
.str
.startmb
,
380 result
->repertoiremap
= obstack_copy0 (&result
->mem_pool
,
381 arg
->val
.str
.startmb
,
384 lr_ignore_rest (cmfile
, 1);
389 if (arg
->tok
!= tok_number
)
393 && ((nowtok
== tok_mb_cur_max
394 && result
->mb_cur_max
!= 0)
395 || (nowtok
== tok_mb_cur_max
396 && result
->mb_cur_max
!= 0)))
397 lr_error (cmfile
, _("duplicate definition of <%s>"),
398 nowtok
== tok_mb_cur_min
399 ? "mb_cur_min" : "mb_cur_max");
401 if (arg
->val
.num
< 1)
404 _("value for <%s> must be 1 or greater"),
405 nowtok
== tok_mb_cur_min
406 ? "mb_cur_min" : "mb_cur_max");
408 lr_ignore_rest (cmfile
, 0);
411 if ((nowtok
== tok_mb_cur_max
&& result
->mb_cur_min
!= 0
412 && (int) arg
->val
.num
< result
->mb_cur_min
)
413 || (nowtok
== tok_mb_cur_min
&& result
->mb_cur_max
!= 0
414 && (int) arg
->val
.num
> result
->mb_cur_max
))
416 lr_error (cmfile
, _("\
417 value of <%s> must be greater or equal than the value of <%s>"),
418 "mb_cur_max", "mb_cur_min");
420 lr_ignore_rest (cmfile
, 0);
424 if (nowtok
== tok_mb_cur_max
)
425 result
->mb_cur_max
= arg
->val
.num
;
427 result
->mb_cur_min
= arg
->val
.num
;
429 lr_ignore_rest (cmfile
, 1);
432 case tok_escape_char
:
433 case tok_comment_char
:
434 if (arg
->tok
!= tok_ident
)
437 if (arg
->val
.str
.lenmb
!= 1)
439 lr_error (cmfile
, _("\
440 argument to <%s> must be a single character"),
441 nowtok
== tok_escape_char
? "escape_char"
444 lr_ignore_rest (cmfile
, 0);
448 if (nowtok
== tok_escape_char
)
449 cmfile
->escape_char
= *arg
->val
.str
.startmb
;
451 cmfile
->comment_char
= *arg
->val
.str
.startmb
;
453 lr_ignore_rest (cmfile
, 1);
461 lr_ignore_rest (cmfile
, 0); /* XXX */
465 lr_error (cmfile
, _("\
466 character sets with locking states are not supported"));
471 assert (! "Should not happen");
476 /* We have seen `CHARMAP' and now are in the body. Each line
477 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
478 if (nowtok
== tok_eol
)
479 /* Ignore empty lines. */
482 if (nowtok
== tok_end
)
484 expected_tok
= tok_charmap
;
485 expected_str
= "CHARMAP";
490 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
492 lr_error (cmfile
, _("syntax error in %s definition: %s"),
493 "CHARMAP", _("no symbolic name given"));
495 lr_ignore_rest (cmfile
, 0);
499 /* If the previous line was not completely correct free the
501 if (from_name
!= NULL
)
502 obstack_free (&result
->mem_pool
, from_name
);
504 if (nowtok
== tok_bsymbol
)
505 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
506 now
->val
.str
.startmb
,
510 obstack_printf (&result
->mem_pool
, "U%08X",
511 cmfile
->token
.val
.ucs4
);
512 obstack_1grow (&result
->mem_pool
, '\0');
513 from_name
= (char *) obstack_finish (&result
->mem_pool
);
521 /* We have two possibilities: We can see an ellipsis or an
523 if (nowtok
== tok_ellipsis3
|| nowtok
== tok_ellipsis4
524 || nowtok
== tok_ellipsis2
|| nowtok
== tok_ellipsis4_2
525 || nowtok
== tok_ellipsis2_2
)
528 if (nowtok
== tok_ellipsis4_2
)
531 nowtok
= tok_ellipsis4
;
533 else if (nowtok
== tok_ellipsis2_2
)
536 nowtok
= tok_ellipsis2
;
544 if (nowtok
!= tok_charcode
)
546 lr_error (cmfile
, _("syntax error in %s definition: %s"),
547 "CHARMAP", _("invalid encoding given"));
549 lr_ignore_rest (cmfile
, 0);
555 if (now
->val
.charcode
.nbytes
< result
->mb_cur_min
)
556 lr_error (cmfile
, _("too few bytes in character encoding"));
557 else if (now
->val
.charcode
.nbytes
> result
->mb_cur_max
)
558 lr_error (cmfile
, _("too many bytes in character encoding"));
560 charmap_new_char (cmfile
, result
, now
->val
.charcode
.nbytes
,
561 now
->val
.charcode
.bytes
, from_name
, to_name
,
562 ellipsis
!= tok_ellipsis2
, step
);
564 /* Ignore trailing comment silently. */
565 lr_ignore_rest (cmfile
, 0);
576 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
578 lr_error (cmfile
, _("syntax error in %s definition: %s"),
580 _("no symbolic name given for end of range"));
582 lr_ignore_rest (cmfile
, 0);
586 /* Copy the to-name in a safe place. */
587 if (nowtok
== tok_bsymbol
)
588 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
589 cmfile
->token
.val
.str
.startmb
,
590 cmfile
->token
.val
.str
.lenmb
);
593 obstack_printf (&result
->mem_pool
, "U%08X",
594 cmfile
->token
.val
.ucs4
);
595 obstack_1grow (&result
->mem_pool
, '\0');
596 to_name
= (char *) obstack_finish (&result
->mem_pool
);
603 if (nowtok
!= expected_tok
)
604 lr_error (cmfile
, _("\
605 `%1$s' definition does not end with `END %1$s'"), expected_str
);
607 lr_ignore_rest (cmfile
, nowtok
== expected_tok
);
612 /* Waiting for WIDTH... */
613 if (nowtok
== tok_eol
)
614 /* Ignore empty lines. */
617 if (nowtok
== tok_width_default
)
623 if (nowtok
== tok_width
)
625 lr_ignore_rest (cmfile
, 1);
630 if (nowtok
== tok_width_variable
)
632 lr_ignore_rest (cmfile
, 1);
637 lr_error (cmfile
, _("\
638 only WIDTH definitions are allowed to follow the CHARMAP definition"));
640 lr_ignore_rest (cmfile
, 0);
644 if (nowtok
!= tok_number
)
645 lr_error (cmfile
, _("value for %s must be an integer"),
648 result
->width_default
= now
->val
.num
;
650 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
656 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
658 if (nowtok
== tok_eol
)
659 /* ignore empty lines. */
662 if (nowtok
== tok_end
)
664 expected_tok
= tok_width
;
665 expected_str
= "WIDTH";
670 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
672 lr_error (cmfile
, _("syntax error in %s definition: %s"),
673 "WIDTH", _("no symbolic name given"));
675 lr_ignore_rest (cmfile
, 0);
679 if (from_name
!= NULL
)
680 obstack_free (&result
->mem_pool
, from_name
);
682 if (nowtok
== tok_bsymbol
)
683 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
684 now
->val
.str
.startmb
,
688 obstack_printf (&result
->mem_pool
, "U%08X",
689 cmfile
->token
.val
.ucs4
);
690 obstack_1grow (&result
->mem_pool
, '\0');
691 from_name
= (char *) obstack_finish (&result
->mem_pool
);
700 if (nowtok
== tok_ellipsis3
)
707 if (nowtok
!= tok_number
)
708 lr_error (cmfile
, _("value for %s must be an integer"),
712 /* Store width for chars. */
713 new_width (cmfile
, result
, from_name
, to_name
, now
->val
.num
);
719 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
725 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
727 lr_error (cmfile
, _("syntax error in %s definition: %s"),
728 "WIDTH", _("no symbolic name given for end of range"));
730 lr_ignore_rest (cmfile
, 0);
736 if (nowtok
== tok_bsymbol
)
737 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
738 now
->val
.str
.startmb
,
742 obstack_printf (&result
->mem_pool
, "U%08X",
743 cmfile
->token
.val
.ucs4
);
744 obstack_1grow (&result
->mem_pool
, '\0');
745 to_name
= (char *) obstack_finish (&result
->mem_pool
);
752 /* We now expect `END WIDTH_VARIABLE' or lines of the format
753 "%s\n" or "%s...%s\n". */
754 if (nowtok
== tok_eol
)
755 /* ignore empty lines. */
758 if (nowtok
== tok_end
)
760 expected_tok
= tok_width_variable
;
761 expected_str
= "WIDTH_VARIABLE";
766 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
768 lr_error (cmfile
, _("syntax error in %s definition: %s"),
769 "WIDTH_VARIABLE", _("no symbolic name given"));
771 lr_ignore_rest (cmfile
, 0);
776 if (from_name
!= NULL
)
777 obstack_free (&result
->mem_pool
, from_name
);
779 if (nowtok
== tok_bsymbol
)
780 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
781 now
->val
.str
.startmb
,
785 obstack_printf (&result
->mem_pool
, "U%08X",
786 cmfile
->token
.val
.ucs4
);
787 obstack_1grow (&result
->mem_pool
, '\0');
788 from_name
= (char *) obstack_finish (&result
->mem_pool
);
796 if (nowtok
== tok_ellipsis3
)
807 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
809 lr_error (cmfile
, _("syntax error in %s definition: %s"),
811 _("no symbolic name given for end of range"));
812 lr_ignore_rest (cmfile
, 0);
816 if (nowtok
== tok_bsymbol
)
817 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
818 now
->val
.str
.startmb
,
822 obstack_printf (&result
->mem_pool
, "U%08X",
823 cmfile
->token
.val
.ucs4
);
824 obstack_1grow (&result
->mem_pool
, '\0');
825 to_name
= (char *) obstack_finish (&result
->mem_pool
);
828 /* XXX Enter value into table. */
830 lr_ignore_rest (cmfile
, 1);
836 error (5, 0, _("%s: error in state machine"), __FILE__
);
842 if (state
!= 91 && !be_quiet
)
843 error (0, 0, _("%s: premature end of file"), cmfile
->fname
);
852 new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
853 const char *from
, const char *to
, unsigned long int width
)
855 struct charseq
*from_val
;
856 struct charseq
*to_val
;
858 from_val
= charmap_find_value (result
, from
, strlen (from
));
859 if (from_val
== NULL
)
861 lr_error (cmfile
, _("unknown character `%s'"), from
);
869 to_val
= charmap_find_value (result
, to
, strlen (to
));
872 lr_error (cmfile
, _("unknown character `%s'"), to
);
877 if (result
->nwidth_rules
>= result
->nwidth_rules_max
)
879 size_t new_size
= result
->nwidth_rules
+ 32;
880 struct width_rule
*new_rules
=
881 (struct width_rule
*) obstack_alloc (&result
->mem_pool
,
883 * sizeof (struct width_rule
)));
885 memcpy (new_rules
, result
->width_rules
,
886 result
->nwidth_rules_max
* sizeof (struct width_rule
));
888 result
->width_rules
= new_rules
;
889 result
->nwidth_rules_max
= new_size
;
892 result
->width_rules
[result
->nwidth_rules
].from
= from_val
;
893 result
->width_rules
[result
->nwidth_rules
].to
= to_val
;
894 result
->width_rules
[result
->nwidth_rules
].width
= (unsigned int) width
;
895 ++result
->nwidth_rules
;
900 charmap_find_value (const struct charmap_t
*cm
, const char *name
, size_t len
)
904 return (find_entry ((hash_table
*) &cm
->char_table
, name
, len
, &result
)
905 < 0 ? NULL
: (struct charseq
*) result
);
910 charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
911 int nbytes
, char *bytes
, const char *from
, const char *to
,
912 int decimal_ellipsis
, int step
)
914 hash_table
*ht
= &cm
->char_table
;
915 hash_table
*bt
= &cm
->byte_table
;
916 struct obstack
*ob
= &cm
->mem_pool
;
920 int prefix_len
, len1
, len2
;
921 unsigned int from_nr
, to_nr
, cnt
;
922 struct charseq
*newp
;
924 len1
= strlen (from
);
928 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
929 newp
->nbytes
= nbytes
;
930 memcpy (newp
->bytes
, bytes
, nbytes
);
933 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
934 if ((from
[0] == 'U' || from
[0] == 'P') && (len1
== 5 || len1
== 9))
936 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
937 xxxx and xxxxxxxx are hexadecimal numbers. In this case
938 we use the value of xxxx or xxxxxxxx as the UCS4 value of
939 this character and we don't have to consult the repertoire
942 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
943 and xxxxxxxx also give the code point in UCS4 but this must
944 be in the private, i.e., unassigned, area. This should be
945 used for characters which do not (yet) have an equivalent
946 in ISO 10646 and Unicode. */
950 newp
->ucs4
= strtoul (from
+ 1, &endp
, 16);
951 if (endp
- from
!= len1
952 || (newp
->ucs4
== ULONG_MAX
&& errno
== ERANGE
)
953 || newp
->ucs4
>= 0x80000000)
954 /* This wasn't successful. Signal this name cannot be a
955 correct UCS value. */
956 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
959 insert_entry (ht
, from
, len1
, newp
);
960 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
961 /* Please note that it isn't a bug if a symbol is defined more
962 than once. All later definitions are simply discarded. */
966 /* We have a range: the names must have names with equal prefixes
967 and an equal number of digits, where the second number is greater
968 or equal than the first. */
974 lr_error (lr
, _("invalid names for character range"));
978 cp
= &from
[len1
- 1];
979 if (decimal_ellipsis
)
980 while (isdigit (*cp
) && cp
>= from
)
983 while (isxdigit (*cp
) && cp
>= from
)
985 if (!isdigit (*cp
) && !isupper (*cp
))
987 hexadecimal range format should use only capital characters"));
991 prefix_len
= (cp
- from
) + 1;
993 if (cp
== &from
[len1
- 1] || strncmp (from
, to
, prefix_len
) != 0)
997 from_nr
= strtoul (&from
[prefix_len
], &from_end
, decimal_ellipsis
? 10 : 16);
998 if (*from_end
!= '\0' || (from_nr
== ULONG_MAX
&& errno
== ERANGE
)
999 || ((to_nr
= strtoul (&to
[prefix_len
], &to_end
,
1000 decimal_ellipsis
? 10 : 16)) == ULONG_MAX
1004 lr_error (lr
, _("<%s> and <%s> are illegal names for range"), from
, to
);
1008 if (from_nr
> to_nr
)
1010 lr_error (lr
, _("upper limit in range is not higher then lower limit"));
1014 for (cnt
= from_nr
; cnt
<= to_nr
; cnt
+= step
)
1017 obstack_printf (ob
, decimal_ellipsis
? "%.*s%0*d" : "%.*s%0*X",
1018 prefix_len
, from
, len1
- prefix_len
, cnt
);
1019 obstack_1grow (ob
, '\0');
1020 name_end
= obstack_finish (ob
);
1022 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
1023 newp
->nbytes
= nbytes
;
1024 memcpy (newp
->bytes
, bytes
, nbytes
);
1025 newp
->name
= name_end
;
1027 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1028 if ((name_end
[0] == 'U' || name_end
[0] == 'P')
1029 && (len1
== 5 || len1
== 9))
1031 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
1032 xxxx and xxxxxxxx are hexadecimal numbers. In this case
1033 we use the value of xxxx or xxxxxxxx as the UCS4 value of
1034 this character and we don't have to consult the repertoire
1037 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
1038 and xxxxxxxx also give the code point in UCS4 but this must
1039 be in the private, i.e., unassigned, area. This should be
1040 used for characters which do not (yet) have an equivalent
1041 in ISO 10646 and Unicode. */
1045 newp
->ucs4
= strtoul (name_end
+ 1, &endp
, 16);
1046 if (endp
- name_end
!= len1
1047 || (newp
->ucs4
== ULONG_MAX
&& errno
== ERANGE
)
1048 || newp
->ucs4
>= 0x80000000)
1049 /* This wasn't successful. Signal this name cannot be a
1050 correct UCS value. */
1051 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1054 insert_entry (ht
, name_end
, len1
, newp
);
1055 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
1056 /* Please note we don't examine the return value since it is no error
1057 if we have two definitions for a symbol. */
1059 /* Increment the value in the byte sequence. */
1060 if (++bytes
[nbytes
- 1] == '\0')
1068 _("resulting bytes for range not representable."));
1071 while (++bytes
[b
--] == 0);
1078 charmap_find_symbol (const struct charmap_t
*cm
, const char *bytes
,
1083 return (find_entry ((hash_table
*) &cm
->byte_table
, bytes
, nbytes
, &result
)
1084 < 0 ? NULL
: (struct charseq
*) result
);