1 /* Copyright (C) 1996, 1998-2002, 2003, 2004 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
33 #include "localedef.h"
34 #include "linereader.h"
36 #include "charmap-dir.h"
41 /* Define the lookup function. */
42 #include "charmap-kw.h"
45 /* Prototypes for local functions. */
46 static struct charmap_t
*parse_charmap (struct linereader
*cmfile
,
47 int verbose
, int be_quiet
);
48 static void new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
49 const char *from
, const char *to
,
50 unsigned long int width
);
51 static void charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
52 size_t nbytes
, unsigned char *bytes
,
53 const char *from
, const char *to
,
54 int decimal_ellipsis
, int step
);
57 bool enc_not_ascii_compatible
;
60 #ifdef NEED_NULL_POINTER
61 static const char *null_pointer
;
64 static struct linereader
*
65 cmlr_open (const char *directory
, const char *name
, kw_hash_fct_t hf
)
69 fp
= charmap_open (directory
, name
);
74 size_t dlen
= strlen (directory
);
75 int add_slash
= (dlen
== 0 || directory
[dlen
- 1] != '/');
76 size_t nlen
= strlen (name
);
80 pathname
= alloca (dlen
+ add_slash
+ nlen
+ 1);
81 p
= stpcpy (pathname
, directory
);
86 return lr_create (fp
, pathname
, hf
);
91 charmap_read (const char *filename
, int verbose
, int be_quiet
, int use_default
)
93 struct charmap_t
*result
= NULL
;
97 struct linereader
*cmfile
;
99 /* First try the name as found in the parameter. */
100 cmfile
= lr_open (filename
, charmap_hash
);
103 /* No successful. So start looking through the directories
104 in the I18NPATH if this is a simple name. */
105 if (strchr (filename
, '/') == NULL
)
107 char *i18npath
= getenv ("I18NPATH");
108 if (i18npath
!= NULL
&& *i18npath
!= '\0')
110 const size_t pathlen
= strlen (i18npath
);
111 char i18npathbuf
[pathlen
+ 1];
112 char path
[pathlen
+ sizeof ("/charmaps")];
114 i18npath
= memcpy (i18npathbuf
, i18npath
, pathlen
+ 1);
116 while (cmfile
== NULL
117 && (next
= strsep (&i18npath
, ":")) != NULL
)
119 stpcpy (stpcpy (path
, next
), "/charmaps");
120 cmfile
= cmlr_open (path
, filename
, charmap_hash
);
123 /* Try without the "/charmaps" part. */
124 cmfile
= cmlr_open (next
, filename
, charmap_hash
);
129 /* Try the default directory. */
130 cmfile
= cmlr_open (CHARMAP_PATH
, filename
, charmap_hash
);
136 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
138 if (result
== NULL
&& !be_quiet
)
139 WITH_CUR_LOCALE (error (0, errno
, _("\
140 character map file `%s' not found"), filename
));
144 if (result
== NULL
&& filename
!= NULL
&& strchr (filename
, '/') == NULL
)
146 /* OK, one more try. We also accept the names given to the
147 character sets in the files. Sometimes they differ from the
151 dir
= charmap_opendir (CHARMAP_PATH
);
156 while ((dirent
= charmap_readdir (dir
)) != NULL
)
162 aliases
= charmap_aliases (CHARMAP_PATH
, dirent
);
164 for (p
= aliases
; *p
; p
++)
165 if (strcasecmp (*p
, filename
) == 0)
170 charmap_free_aliases (aliases
);
174 struct linereader
*cmfile
;
176 cmfile
= cmlr_open (CHARMAP_PATH
, dirent
, charmap_hash
);
178 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
184 charmap_closedir (dir
);
188 if (result
== NULL
&& DEFAULT_CHARMAP
!= NULL
)
190 struct linereader
*cmfile
;
192 cmfile
= cmlr_open (CHARMAP_PATH
, DEFAULT_CHARMAP
, charmap_hash
);
194 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
197 WITH_CUR_LOCALE (error (4, errno
, _("\
198 default character map file `%s' not found"), DEFAULT_CHARMAP
));
201 if (result
!= NULL
&& result
->code_set_name
== NULL
)
202 /* The input file does not specify a code set name. This
203 shouldn't happen but we should cope with it. */
204 result
->code_set_name
= basename (filename
);
206 /* Test of ASCII compatibility of locale encoding.
208 Verify that the encoding to be used in a locale is ASCII compatible,
209 at least for the graphic characters, excluding the control characters,
210 '$' and '@'. This constraint comes from an ISO C 99 restriction.
212 ISO C 99 section 7.17.(2) (about wchar_t):
213 the null character shall have the code value zero and each member of
214 the basic character set shall have a code value equal to its value
215 when used as the lone character in an integer character constant.
216 ISO C 99 section 5.2.1.(3):
217 Both the basic source and basic execution character sets shall have
218 the following members: the 26 uppercase letters of the Latin alphabet
219 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
220 the 26 lowercase letters of the Latin alphabet
221 a b c d e f g h i j k l m n o p q r s t u v w x y z
222 the 10 decimal digits
224 the following 29 graphic characters
225 ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
226 the space character, and control characters representing horizontal
227 tab, vertical tab, and form feed.
229 Therefore, for all members of the "basic character set", the 'char' code
230 must have the same value as the 'wchar_t' code, which in glibc is the
231 same as the Unicode code, which for all of the enumerated characters
232 is identical to the ASCII code. */
233 if (result
!= NULL
&& use_default
)
235 static const char basic_charset
[] =
237 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
238 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
239 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
240 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
241 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
242 '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
243 '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
244 '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
247 const char *p
= basic_charset
;
251 struct charseq
*seq
= charmap_find_symbol (result
, p
, 1);
253 if (seq
== NULL
|| seq
->ucs4
!= (uint32_t) *p
)
256 while (*p
++ != '\0');
260 WITH_CUR_LOCALE (fprintf (stderr
, _("\
261 character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
262 result
->code_set_name
));
263 enc_not_ascii_compatible
= true;
271 static struct charmap_t
*
272 parse_charmap (struct linereader
*cmfile
, int verbose
, int be_quiet
)
274 struct charmap_t
*result
;
276 enum token_t expected_tok
= tok_error
;
277 const char *expected_str
= NULL
;
278 char *from_name
= NULL
;
279 char *to_name
= NULL
;
280 enum token_t ellipsis
= 0;
283 /* We don't want symbolic names in string to be translated. */
284 cmfile
->translate_strings
= 0;
286 /* Allocate room for result. */
287 result
= (struct charmap_t
*) xmalloc (sizeof (struct charmap_t
));
288 memset (result
, '\0', sizeof (struct charmap_t
));
289 /* The default DEFAULT_WIDTH is 1. */
290 result
->width_default
= 1;
292 #define obstack_chunk_alloc malloc
293 #define obstack_chunk_free free
294 obstack_init (&result
->mem_pool
);
296 if (init_hash (&result
->char_table
, 256)
297 || init_hash (&result
->byte_table
, 256))
303 /* We use a state machine to describe the charmap description file
309 struct token
*now
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
310 enum token_t nowtok
= now
->tok
;
313 if (nowtok
== tok_eof
)
319 /* The beginning. We expect the special declarations, EOL or
321 if (nowtok
== tok_eol
)
322 /* Ignore empty lines. */
325 if (nowtok
== tok_charmap
)
330 /* We have to set up the real work. Fill in some
332 if (result
->mb_cur_max
== 0)
333 result
->mb_cur_max
= 1;
334 if (result
->mb_cur_min
== 0)
335 result
->mb_cur_min
= result
->mb_cur_max
;
336 if (result
->mb_cur_min
> result
->mb_cur_max
)
339 WITH_CUR_LOCALE (error (0, 0, _("\
340 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
343 result
->mb_cur_min
= result
->mb_cur_max
;
346 lr_ignore_rest (cmfile
, 1);
352 if (nowtok
!= tok_code_set_name
&& nowtok
!= tok_mb_cur_max
353 && nowtok
!= tok_mb_cur_min
&& nowtok
!= tok_escape_char
354 && nowtok
!= tok_comment_char
&& nowtok
!= tok_g0esc
355 && nowtok
!= tok_g1esc
&& nowtok
!= tok_g2esc
356 && nowtok
!= tok_g3esc
&& nowtok
!= tok_repertoiremap
357 && nowtok
!= tok_include
)
359 lr_error (cmfile
, _("syntax error in prolog: %s"),
360 _("invalid definition"));
362 lr_ignore_rest (cmfile
, 0);
366 /* We know that we need an argument. */
367 arg
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
371 case tok_code_set_name
:
372 case tok_repertoiremap
:
373 if (arg
->tok
!= tok_ident
&& arg
->tok
!= tok_string
)
376 lr_error (cmfile
, _("syntax error in prolog: %s"),
379 lr_ignore_rest (cmfile
, 0);
383 if (nowtok
== tok_code_set_name
)
384 result
->code_set_name
= obstack_copy0 (&result
->mem_pool
,
385 arg
->val
.str
.startmb
,
388 result
->repertoiremap
= obstack_copy0 (&result
->mem_pool
,
389 arg
->val
.str
.startmb
,
392 lr_ignore_rest (cmfile
, 1);
397 if (arg
->tok
!= tok_number
)
401 && ((nowtok
== tok_mb_cur_max
402 && result
->mb_cur_max
!= 0)
403 || (nowtok
== tok_mb_cur_max
404 && result
->mb_cur_max
!= 0)))
405 lr_error (cmfile
, _("duplicate definition of <%s>"),
406 nowtok
== tok_mb_cur_min
407 ? "mb_cur_min" : "mb_cur_max");
409 if (arg
->val
.num
< 1)
412 _("value for <%s> must be 1 or greater"),
413 nowtok
== tok_mb_cur_min
414 ? "mb_cur_min" : "mb_cur_max");
416 lr_ignore_rest (cmfile
, 0);
419 if ((nowtok
== tok_mb_cur_max
&& result
->mb_cur_min
!= 0
420 && (int) arg
->val
.num
< result
->mb_cur_min
)
421 || (nowtok
== tok_mb_cur_min
&& result
->mb_cur_max
!= 0
422 && (int) arg
->val
.num
> result
->mb_cur_max
))
424 lr_error (cmfile
, _("\
425 value of <%s> must be greater or equal than the value of <%s>"),
426 "mb_cur_max", "mb_cur_min");
428 lr_ignore_rest (cmfile
, 0);
432 if (nowtok
== tok_mb_cur_max
)
433 result
->mb_cur_max
= arg
->val
.num
;
435 result
->mb_cur_min
= arg
->val
.num
;
437 lr_ignore_rest (cmfile
, 1);
440 case tok_escape_char
:
441 case tok_comment_char
:
442 if (arg
->tok
!= tok_ident
)
445 if (arg
->val
.str
.lenmb
!= 1)
447 lr_error (cmfile
, _("\
448 argument to <%s> must be a single character"),
449 nowtok
== tok_escape_char
? "escape_char"
452 lr_ignore_rest (cmfile
, 0);
456 if (nowtok
== tok_escape_char
)
457 cmfile
->escape_char
= *arg
->val
.str
.startmb
;
459 cmfile
->comment_char
= *arg
->val
.str
.startmb
;
461 lr_ignore_rest (cmfile
, 1);
469 lr_ignore_rest (cmfile
, 0); /* XXX */
473 lr_error (cmfile
, _("\
474 character sets with locking states are not supported"));
479 assert (! "Should not happen");
484 /* We have seen `CHARMAP' and now are in the body. Each line
485 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
486 if (nowtok
== tok_eol
)
487 /* Ignore empty lines. */
490 if (nowtok
== tok_end
)
492 expected_tok
= tok_charmap
;
493 expected_str
= "CHARMAP";
498 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
500 lr_error (cmfile
, _("syntax error in %s definition: %s"),
501 "CHARMAP", _("no symbolic name given"));
503 lr_ignore_rest (cmfile
, 0);
507 /* If the previous line was not completely correct free the
509 if (from_name
!= NULL
)
510 obstack_free (&result
->mem_pool
, from_name
);
512 if (nowtok
== tok_bsymbol
)
513 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
514 now
->val
.str
.startmb
,
518 obstack_printf (&result
->mem_pool
, "U%08X",
519 cmfile
->token
.val
.ucs4
);
520 obstack_1grow (&result
->mem_pool
, '\0');
521 from_name
= (char *) obstack_finish (&result
->mem_pool
);
529 /* We have two possibilities: We can see an ellipsis or an
531 if (nowtok
== tok_ellipsis3
|| nowtok
== tok_ellipsis4
532 || nowtok
== tok_ellipsis2
|| nowtok
== tok_ellipsis4_2
533 || nowtok
== tok_ellipsis2_2
)
536 if (nowtok
== tok_ellipsis4_2
)
539 nowtok
= tok_ellipsis4
;
541 else if (nowtok
== tok_ellipsis2_2
)
544 nowtok
= tok_ellipsis2
;
552 if (nowtok
!= tok_charcode
)
554 lr_error (cmfile
, _("syntax error in %s definition: %s"),
555 "CHARMAP", _("invalid encoding given"));
557 lr_ignore_rest (cmfile
, 0);
563 if (now
->val
.charcode
.nbytes
< result
->mb_cur_min
)
564 lr_error (cmfile
, _("too few bytes in character encoding"));
565 else if (now
->val
.charcode
.nbytes
> result
->mb_cur_max
)
566 lr_error (cmfile
, _("too many bytes in character encoding"));
568 charmap_new_char (cmfile
, result
, now
->val
.charcode
.nbytes
,
569 now
->val
.charcode
.bytes
, from_name
, to_name
,
570 ellipsis
!= tok_ellipsis2
, step
);
572 /* Ignore trailing comment silently. */
573 lr_ignore_rest (cmfile
, 0);
584 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
586 lr_error (cmfile
, _("syntax error in %s definition: %s"),
588 _("no symbolic name given for end of range"));
590 lr_ignore_rest (cmfile
, 0);
594 /* Copy the to-name in a safe place. */
595 if (nowtok
== tok_bsymbol
)
596 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
597 cmfile
->token
.val
.str
.startmb
,
598 cmfile
->token
.val
.str
.lenmb
);
601 obstack_printf (&result
->mem_pool
, "U%08X",
602 cmfile
->token
.val
.ucs4
);
603 obstack_1grow (&result
->mem_pool
, '\0');
604 to_name
= (char *) obstack_finish (&result
->mem_pool
);
611 if (nowtok
!= expected_tok
)
612 lr_error (cmfile
, _("\
613 `%1$s' definition does not end with `END %1$s'"), expected_str
);
615 lr_ignore_rest (cmfile
, nowtok
== expected_tok
);
620 /* Waiting for WIDTH... */
621 if (nowtok
== tok_eol
)
622 /* Ignore empty lines. */
625 if (nowtok
== tok_width_default
)
631 if (nowtok
== tok_width
)
633 lr_ignore_rest (cmfile
, 1);
638 if (nowtok
== tok_width_variable
)
640 lr_ignore_rest (cmfile
, 1);
645 lr_error (cmfile
, _("\
646 only WIDTH definitions are allowed to follow the CHARMAP definition"));
648 lr_ignore_rest (cmfile
, 0);
652 if (nowtok
!= tok_number
)
653 lr_error (cmfile
, _("value for %s must be an integer"),
656 result
->width_default
= now
->val
.num
;
658 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
664 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
666 if (nowtok
== tok_eol
)
667 /* ignore empty lines. */
670 if (nowtok
== tok_end
)
672 expected_tok
= tok_width
;
673 expected_str
= "WIDTH";
678 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
680 lr_error (cmfile
, _("syntax error in %s definition: %s"),
681 "WIDTH", _("no symbolic name given"));
683 lr_ignore_rest (cmfile
, 0);
687 if (from_name
!= NULL
)
688 obstack_free (&result
->mem_pool
, from_name
);
690 if (nowtok
== tok_bsymbol
)
691 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
692 now
->val
.str
.startmb
,
696 obstack_printf (&result
->mem_pool
, "U%08X",
697 cmfile
->token
.val
.ucs4
);
698 obstack_1grow (&result
->mem_pool
, '\0');
699 from_name
= (char *) obstack_finish (&result
->mem_pool
);
708 if (nowtok
== tok_ellipsis3
)
715 if (nowtok
!= tok_number
)
716 lr_error (cmfile
, _("value for %s must be an integer"),
720 /* Store width for chars. */
721 new_width (cmfile
, result
, from_name
, to_name
, now
->val
.num
);
727 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
733 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
735 lr_error (cmfile
, _("syntax error in %s definition: %s"),
736 "WIDTH", _("no symbolic name given for end of range"));
738 lr_ignore_rest (cmfile
, 0);
744 if (nowtok
== tok_bsymbol
)
745 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
746 now
->val
.str
.startmb
,
750 obstack_printf (&result
->mem_pool
, "U%08X",
751 cmfile
->token
.val
.ucs4
);
752 obstack_1grow (&result
->mem_pool
, '\0');
753 to_name
= (char *) obstack_finish (&result
->mem_pool
);
760 /* We now expect `END WIDTH_VARIABLE' or lines of the format
761 "%s\n" or "%s...%s\n". */
762 if (nowtok
== tok_eol
)
763 /* ignore empty lines. */
766 if (nowtok
== tok_end
)
768 expected_tok
= tok_width_variable
;
769 expected_str
= "WIDTH_VARIABLE";
774 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
776 lr_error (cmfile
, _("syntax error in %s definition: %s"),
777 "WIDTH_VARIABLE", _("no symbolic name given"));
779 lr_ignore_rest (cmfile
, 0);
784 if (from_name
!= NULL
)
785 obstack_free (&result
->mem_pool
, from_name
);
787 if (nowtok
== tok_bsymbol
)
788 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
789 now
->val
.str
.startmb
,
793 obstack_printf (&result
->mem_pool
, "U%08X",
794 cmfile
->token
.val
.ucs4
);
795 obstack_1grow (&result
->mem_pool
, '\0');
796 from_name
= (char *) obstack_finish (&result
->mem_pool
);
804 if (nowtok
== tok_ellipsis3
)
815 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
817 lr_error (cmfile
, _("syntax error in %s definition: %s"),
819 _("no symbolic name given for end of range"));
820 lr_ignore_rest (cmfile
, 0);
824 if (nowtok
== tok_bsymbol
)
825 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
826 now
->val
.str
.startmb
,
830 obstack_printf (&result
->mem_pool
, "U%08X",
831 cmfile
->token
.val
.ucs4
);
832 obstack_1grow (&result
->mem_pool
, '\0');
833 to_name
= (char *) obstack_finish (&result
->mem_pool
);
836 /* XXX Enter value into table. */
838 lr_ignore_rest (cmfile
, 1);
844 WITH_CUR_LOCALE (error (5, 0, _("%s: error in state machine"),
851 if (state
!= 91 && !be_quiet
)
852 WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"),
862 new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
863 const char *from
, const char *to
, unsigned long int width
)
865 struct charseq
*from_val
;
866 struct charseq
*to_val
;
868 from_val
= charmap_find_value (result
, from
, strlen (from
));
869 if (from_val
== NULL
)
871 lr_error (cmfile
, _("unknown character `%s'"), from
);
879 to_val
= charmap_find_value (result
, to
, strlen (to
));
882 lr_error (cmfile
, _("unknown character `%s'"), to
);
886 /* Make sure the number of bytes for the end points of the range
888 if (from_val
->nbytes
!= to_val
->nbytes
)
890 lr_error (cmfile
, _("\
891 number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"),
892 from_val
->nbytes
, to_val
->nbytes
);
897 if (result
->nwidth_rules
>= result
->nwidth_rules_max
)
899 size_t new_size
= result
->nwidth_rules
+ 32;
900 struct width_rule
*new_rules
=
901 (struct width_rule
*) obstack_alloc (&result
->mem_pool
,
903 * sizeof (struct width_rule
)));
905 memcpy (new_rules
, result
->width_rules
,
906 result
->nwidth_rules_max
* sizeof (struct width_rule
));
908 result
->width_rules
= new_rules
;
909 result
->nwidth_rules_max
= new_size
;
912 result
->width_rules
[result
->nwidth_rules
].from
= from_val
;
913 result
->width_rules
[result
->nwidth_rules
].to
= to_val
;
914 result
->width_rules
[result
->nwidth_rules
].width
= (unsigned int) width
;
915 ++result
->nwidth_rules
;
920 charmap_find_value (const struct charmap_t
*cm
, const char *name
, size_t len
)
924 return (find_entry ((hash_table
*) &cm
->char_table
, name
, len
, &result
)
925 < 0 ? NULL
: (struct charseq
*) result
);
930 charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
931 size_t nbytes
, unsigned char *bytes
,
932 const char *from
, const char *to
,
933 int decimal_ellipsis
, int step
)
935 hash_table
*ht
= &cm
->char_table
;
936 hash_table
*bt
= &cm
->byte_table
;
937 struct obstack
*ob
= &cm
->mem_pool
;
941 int prefix_len
, len1
, len2
;
942 unsigned int from_nr
, to_nr
, cnt
;
943 struct charseq
*newp
;
945 len1
= strlen (from
);
949 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
950 newp
->nbytes
= nbytes
;
951 memcpy (newp
->bytes
, bytes
, nbytes
);
954 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
955 if ((from
[0] == 'U' || from
[0] == 'P') && (len1
== 5 || len1
== 9))
957 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
958 xxxx and xxxxxxxx are hexadecimal numbers. In this case
959 we use the value of xxxx or xxxxxxxx as the UCS4 value of
960 this character and we don't have to consult the repertoire
963 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
964 and xxxxxxxx also give the code point in UCS4 but this must
965 be in the private, i.e., unassigned, area. This should be
966 used for characters which do not (yet) have an equivalent
967 in ISO 10646 and Unicode. */
971 newp
->ucs4
= strtoul (from
+ 1, &endp
, 16);
972 if (endp
- from
!= len1
973 || (newp
->ucs4
== ~((uint32_t) 0) && errno
== ERANGE
)
974 || newp
->ucs4
>= 0x80000000)
975 /* This wasn't successful. Signal this name cannot be a
976 correct UCS value. */
977 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
980 insert_entry (ht
, from
, len1
, newp
);
981 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
982 /* Please note that it isn't a bug if a symbol is defined more
983 than once. All later definitions are simply discarded. */
987 /* We have a range: the names must have names with equal prefixes
988 and an equal number of digits, where the second number is greater
989 or equal than the first. */
995 lr_error (lr
, _("invalid names for character range"));
999 cp
= &from
[len1
- 1];
1000 if (decimal_ellipsis
)
1001 while (isdigit (*cp
) && cp
>= from
)
1004 while (isxdigit (*cp
) && cp
>= from
)
1006 if (!isdigit (*cp
) && !isupper (*cp
))
1008 hexadecimal range format should use only capital characters"));
1012 prefix_len
= (cp
- from
) + 1;
1014 if (cp
== &from
[len1
- 1] || strncmp (from
, to
, prefix_len
) != 0)
1018 from_nr
= strtoul (&from
[prefix_len
], &from_end
, decimal_ellipsis
? 10 : 16);
1019 if (*from_end
!= '\0' || (from_nr
== UINT_MAX
&& errno
== ERANGE
)
1020 || ((to_nr
= strtoul (&to
[prefix_len
], &to_end
,
1021 decimal_ellipsis
? 10 : 16)) == UINT_MAX
1025 lr_error (lr
, _("<%s> and <%s> are illegal names for range"), from
, to
);
1029 if (from_nr
> to_nr
)
1031 lr_error (lr
, _("upper limit in range is not higher then lower limit"));
1035 for (cnt
= from_nr
; cnt
<= to_nr
; cnt
+= step
)
1038 obstack_printf (ob
, decimal_ellipsis
? "%.*s%0*d" : "%.*s%0*X",
1039 prefix_len
, from
, len1
- prefix_len
, cnt
);
1040 obstack_1grow (ob
, '\0');
1041 name_end
= obstack_finish (ob
);
1043 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
1044 newp
->nbytes
= nbytes
;
1045 memcpy (newp
->bytes
, bytes
, nbytes
);
1046 newp
->name
= name_end
;
1048 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1049 if ((name_end
[0] == 'U' || name_end
[0] == 'P')
1050 && (len1
== 5 || len1
== 9))
1052 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
1053 xxxx and xxxxxxxx are hexadecimal numbers. In this case
1054 we use the value of xxxx or xxxxxxxx as the UCS4 value of
1055 this character and we don't have to consult the repertoire
1058 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
1059 and xxxxxxxx also give the code point in UCS4 but this must
1060 be in the private, i.e., unassigned, area. This should be
1061 used for characters which do not (yet) have an equivalent
1062 in ISO 10646 and Unicode. */
1066 newp
->ucs4
= strtoul (name_end
+ 1, &endp
, 16);
1067 if (endp
- name_end
!= len1
1068 || (newp
->ucs4
== ~((uint32_t) 0) && errno
== ERANGE
)
1069 || newp
->ucs4
>= 0x80000000)
1070 /* This wasn't successful. Signal this name cannot be a
1071 correct UCS value. */
1072 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1075 insert_entry (ht
, name_end
, len1
, newp
);
1076 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
1077 /* Please note we don't examine the return value since it is no error
1078 if we have two definitions for a symbol. */
1080 /* Increment the value in the byte sequence. */
1081 if (++bytes
[nbytes
- 1] == '\0')
1089 _("resulting bytes for range not representable."));
1092 while (++bytes
[b
--] == 0);
1099 charmap_find_symbol (const struct charmap_t
*cm
, const char *bytes
,
1104 return (find_entry ((hash_table
*) &cm
->byte_table
, bytes
, nbytes
, &result
)
1105 < 0 ? NULL
: (struct charseq
*) result
);