1 /* Copyright (C) 1996, 1998-2004,2005 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
31 #include "localedef.h"
32 #include "linereader.h"
34 #include "charmap-dir.h"
39 /* Define the lookup function. */
40 #include "charmap-kw.h"
43 /* Prototypes for local functions. */
44 static struct charmap_t
*parse_charmap (struct linereader
*cmfile
,
45 int verbose
, int be_quiet
);
46 static void new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
47 const char *from
, const char *to
,
48 unsigned long int width
);
49 static void charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
50 size_t nbytes
, unsigned char *bytes
,
51 const char *from
, const char *to
,
52 int decimal_ellipsis
, int step
);
55 bool enc_not_ascii_compatible
;
58 #ifdef NEED_NULL_POINTER
59 static const char *null_pointer
;
62 static struct linereader
*
63 cmlr_open (const char *directory
, const char *name
, kw_hash_fct_t hf
)
67 fp
= charmap_open (directory
, name
);
72 size_t dlen
= strlen (directory
);
73 int add_slash
= (dlen
== 0 || directory
[dlen
- 1] != '/');
74 size_t nlen
= strlen (name
);
78 pathname
= alloca (dlen
+ add_slash
+ nlen
+ 1);
79 p
= stpcpy (pathname
, directory
);
84 return lr_create (fp
, pathname
, hf
);
89 charmap_read (const char *filename
, int verbose
, int be_quiet
, int use_default
)
91 struct charmap_t
*result
= NULL
;
95 struct linereader
*cmfile
;
97 /* First try the name as found in the parameter. */
98 cmfile
= lr_open (filename
, charmap_hash
);
101 /* No successful. So start looking through the directories
102 in the I18NPATH if this is a simple name. */
103 if (strchr (filename
, '/') == NULL
)
105 char *i18npath
= getenv ("I18NPATH");
106 if (i18npath
!= NULL
&& *i18npath
!= '\0')
108 const size_t pathlen
= strlen (i18npath
);
109 char i18npathbuf
[pathlen
+ 1];
110 char path
[pathlen
+ sizeof ("/charmaps")];
112 i18npath
= memcpy (i18npathbuf
, i18npath
, pathlen
+ 1);
114 while (cmfile
== NULL
115 && (next
= strsep (&i18npath
, ":")) != NULL
)
117 stpcpy (stpcpy (path
, next
), "/charmaps");
118 cmfile
= cmlr_open (path
, filename
, charmap_hash
);
121 /* Try without the "/charmaps" part. */
122 cmfile
= cmlr_open (next
, filename
, charmap_hash
);
127 /* Try the default directory. */
128 cmfile
= cmlr_open (CHARMAP_PATH
, filename
, charmap_hash
);
134 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
136 if (result
== NULL
&& !be_quiet
)
137 WITH_CUR_LOCALE (error (0, errno
, _("\
138 character map file `%s' not found"), filename
));
142 if (result
== NULL
&& filename
!= NULL
&& strchr (filename
, '/') == NULL
)
144 /* OK, one more try. We also accept the names given to the
145 character sets in the files. Sometimes they differ from the
149 dir
= charmap_opendir (CHARMAP_PATH
);
154 while ((dirent
= charmap_readdir (dir
)) != NULL
)
160 aliases
= charmap_aliases (CHARMAP_PATH
, dirent
);
162 for (p
= aliases
; *p
; p
++)
163 if (strcasecmp (*p
, filename
) == 0)
168 charmap_free_aliases (aliases
);
172 struct linereader
*cmfile
;
174 cmfile
= cmlr_open (CHARMAP_PATH
, dirent
, charmap_hash
);
176 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
182 charmap_closedir (dir
);
186 if (result
== NULL
&& DEFAULT_CHARMAP
!= NULL
)
188 struct linereader
*cmfile
;
190 cmfile
= cmlr_open (CHARMAP_PATH
, DEFAULT_CHARMAP
, charmap_hash
);
192 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
195 WITH_CUR_LOCALE (error (4, errno
, _("\
196 default character map file `%s' not found"), DEFAULT_CHARMAP
));
199 if (result
!= NULL
&& result
->code_set_name
== NULL
)
200 /* The input file does not specify a code set name. This
201 shouldn't happen but we should cope with it. */
202 result
->code_set_name
= basename (filename
);
204 /* Test of ASCII compatibility of locale encoding.
206 Verify that the encoding to be used in a locale is ASCII compatible,
207 at least for the graphic characters, excluding the control characters,
208 '$' and '@'. This constraint comes from an ISO C 99 restriction.
210 ISO C 99 section 7.17.(2) (about wchar_t):
211 the null character shall have the code value zero and each member of
212 the basic character set shall have a code value equal to its value
213 when used as the lone character in an integer character constant.
214 ISO C 99 section 5.2.1.(3):
215 Both the basic source and basic execution character sets shall have
216 the following members: the 26 uppercase letters of the Latin alphabet
217 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
218 the 26 lowercase letters of the Latin alphabet
219 a b c d e f g h i j k l m n o p q r s t u v w x y z
220 the 10 decimal digits
222 the following 29 graphic characters
223 ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
224 the space character, and control characters representing horizontal
225 tab, vertical tab, and form feed.
227 Therefore, for all members of the "basic character set", the 'char' code
228 must have the same value as the 'wchar_t' code, which in glibc is the
229 same as the Unicode code, which for all of the enumerated characters
230 is identical to the ASCII code. */
231 if (result
!= NULL
&& use_default
)
233 static const char basic_charset
[] =
235 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
236 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
237 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
238 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
239 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
240 '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
241 '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
242 '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
245 const char *p
= basic_charset
;
249 struct charseq
*seq
= charmap_find_symbol (result
, p
, 1);
251 if (seq
== NULL
|| seq
->ucs4
!= (uint32_t) *p
)
254 while (*p
++ != '\0');
258 WITH_CUR_LOCALE (fprintf (stderr
, _("\
259 character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
260 result
->code_set_name
));
261 enc_not_ascii_compatible
= true;
269 static struct charmap_t
*
270 parse_charmap (struct linereader
*cmfile
, int verbose
, int be_quiet
)
272 struct charmap_t
*result
;
274 enum token_t expected_tok
= tok_error
;
275 const char *expected_str
= NULL
;
276 char *from_name
= NULL
;
277 char *to_name
= NULL
;
278 enum token_t ellipsis
= 0;
281 /* We don't want symbolic names in string to be translated. */
282 cmfile
->translate_strings
= 0;
284 /* Allocate room for result. */
285 result
= (struct charmap_t
*) xmalloc (sizeof (struct charmap_t
));
286 memset (result
, '\0', sizeof (struct charmap_t
));
287 /* The default DEFAULT_WIDTH is 1. */
288 result
->width_default
= 1;
290 #define obstack_chunk_alloc malloc
291 #define obstack_chunk_free free
292 obstack_init (&result
->mem_pool
);
294 if (init_hash (&result
->char_table
, 256)
295 || init_hash (&result
->byte_table
, 256))
301 /* We use a state machine to describe the charmap description file
307 struct token
*now
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
308 enum token_t nowtok
= now
->tok
;
311 if (nowtok
== tok_eof
)
317 /* The beginning. We expect the special declarations, EOL or
319 if (nowtok
== tok_eol
)
320 /* Ignore empty lines. */
323 if (nowtok
== tok_charmap
)
328 /* We have to set up the real work. Fill in some
330 if (result
->mb_cur_max
== 0)
331 result
->mb_cur_max
= 1;
332 if (result
->mb_cur_min
== 0)
333 result
->mb_cur_min
= result
->mb_cur_max
;
334 if (result
->mb_cur_min
> result
->mb_cur_max
)
337 WITH_CUR_LOCALE (error (0, 0, _("\
338 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
341 result
->mb_cur_min
= result
->mb_cur_max
;
344 lr_ignore_rest (cmfile
, 1);
350 if (nowtok
!= tok_code_set_name
&& nowtok
!= tok_mb_cur_max
351 && nowtok
!= tok_mb_cur_min
&& nowtok
!= tok_escape_char
352 && nowtok
!= tok_comment_char
&& nowtok
!= tok_g0esc
353 && nowtok
!= tok_g1esc
&& nowtok
!= tok_g2esc
354 && nowtok
!= tok_g3esc
&& nowtok
!= tok_repertoiremap
355 && nowtok
!= tok_include
)
357 lr_error (cmfile
, _("syntax error in prolog: %s"),
358 _("invalid definition"));
360 lr_ignore_rest (cmfile
, 0);
364 /* We know that we need an argument. */
365 arg
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
369 case tok_code_set_name
:
370 case tok_repertoiremap
:
371 if (arg
->tok
!= tok_ident
&& arg
->tok
!= tok_string
)
374 lr_error (cmfile
, _("syntax error in prolog: %s"),
377 lr_ignore_rest (cmfile
, 0);
381 if (nowtok
== tok_code_set_name
)
382 result
->code_set_name
= obstack_copy0 (&result
->mem_pool
,
383 arg
->val
.str
.startmb
,
386 result
->repertoiremap
= obstack_copy0 (&result
->mem_pool
,
387 arg
->val
.str
.startmb
,
390 lr_ignore_rest (cmfile
, 1);
395 if (arg
->tok
!= tok_number
)
399 && ((nowtok
== tok_mb_cur_max
400 && result
->mb_cur_max
!= 0)
401 || (nowtok
== tok_mb_cur_max
402 && result
->mb_cur_max
!= 0)))
403 lr_error (cmfile
, _("duplicate definition of <%s>"),
404 nowtok
== tok_mb_cur_min
405 ? "mb_cur_min" : "mb_cur_max");
407 if (arg
->val
.num
< 1)
410 _("value for <%s> must be 1 or greater"),
411 nowtok
== tok_mb_cur_min
412 ? "mb_cur_min" : "mb_cur_max");
414 lr_ignore_rest (cmfile
, 0);
417 if ((nowtok
== tok_mb_cur_max
&& result
->mb_cur_min
!= 0
418 && (int) arg
->val
.num
< result
->mb_cur_min
)
419 || (nowtok
== tok_mb_cur_min
&& result
->mb_cur_max
!= 0
420 && (int) arg
->val
.num
> result
->mb_cur_max
))
422 lr_error (cmfile
, _("\
423 value of <%s> must be greater or equal than the value of <%s>"),
424 "mb_cur_max", "mb_cur_min");
426 lr_ignore_rest (cmfile
, 0);
430 if (nowtok
== tok_mb_cur_max
)
431 result
->mb_cur_max
= arg
->val
.num
;
433 result
->mb_cur_min
= arg
->val
.num
;
435 lr_ignore_rest (cmfile
, 1);
438 case tok_escape_char
:
439 case tok_comment_char
:
440 if (arg
->tok
!= tok_ident
)
443 if (arg
->val
.str
.lenmb
!= 1)
445 lr_error (cmfile
, _("\
446 argument to <%s> must be a single character"),
447 nowtok
== tok_escape_char
? "escape_char"
450 lr_ignore_rest (cmfile
, 0);
454 if (nowtok
== tok_escape_char
)
455 cmfile
->escape_char
= *arg
->val
.str
.startmb
;
457 cmfile
->comment_char
= *arg
->val
.str
.startmb
;
459 lr_ignore_rest (cmfile
, 1);
467 lr_ignore_rest (cmfile
, 0); /* XXX */
471 lr_error (cmfile
, _("\
472 character sets with locking states are not supported"));
477 assert (! "Should not happen");
482 /* We have seen `CHARMAP' and now are in the body. Each line
483 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
484 if (nowtok
== tok_eol
)
485 /* Ignore empty lines. */
488 if (nowtok
== tok_end
)
490 expected_tok
= tok_charmap
;
491 expected_str
= "CHARMAP";
496 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
498 lr_error (cmfile
, _("syntax error in %s definition: %s"),
499 "CHARMAP", _("no symbolic name given"));
501 lr_ignore_rest (cmfile
, 0);
505 /* If the previous line was not completely correct free the
507 if (from_name
!= NULL
)
508 obstack_free (&result
->mem_pool
, from_name
);
510 if (nowtok
== tok_bsymbol
)
511 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
512 now
->val
.str
.startmb
,
516 obstack_printf (&result
->mem_pool
, "U%08X",
517 cmfile
->token
.val
.ucs4
);
518 obstack_1grow (&result
->mem_pool
, '\0');
519 from_name
= (char *) obstack_finish (&result
->mem_pool
);
527 /* We have two possibilities: We can see an ellipsis or an
529 if (nowtok
== tok_ellipsis3
|| nowtok
== tok_ellipsis4
530 || nowtok
== tok_ellipsis2
|| nowtok
== tok_ellipsis4_2
531 || nowtok
== tok_ellipsis2_2
)
534 if (nowtok
== tok_ellipsis4_2
)
537 nowtok
= tok_ellipsis4
;
539 else if (nowtok
== tok_ellipsis2_2
)
542 nowtok
= tok_ellipsis2
;
550 if (nowtok
!= tok_charcode
)
552 lr_error (cmfile
, _("syntax error in %s definition: %s"),
553 "CHARMAP", _("invalid encoding given"));
555 lr_ignore_rest (cmfile
, 0);
561 if (now
->val
.charcode
.nbytes
< result
->mb_cur_min
)
562 lr_error (cmfile
, _("too few bytes in character encoding"));
563 else if (now
->val
.charcode
.nbytes
> result
->mb_cur_max
)
564 lr_error (cmfile
, _("too many bytes in character encoding"));
566 charmap_new_char (cmfile
, result
, now
->val
.charcode
.nbytes
,
567 now
->val
.charcode
.bytes
, from_name
, to_name
,
568 ellipsis
!= tok_ellipsis2
, step
);
570 /* Ignore trailing comment silently. */
571 lr_ignore_rest (cmfile
, 0);
582 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
584 lr_error (cmfile
, _("syntax error in %s definition: %s"),
586 _("no symbolic name given for end of range"));
588 lr_ignore_rest (cmfile
, 0);
592 /* Copy the to-name in a safe place. */
593 if (nowtok
== tok_bsymbol
)
594 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
595 cmfile
->token
.val
.str
.startmb
,
596 cmfile
->token
.val
.str
.lenmb
);
599 obstack_printf (&result
->mem_pool
, "U%08X",
600 cmfile
->token
.val
.ucs4
);
601 obstack_1grow (&result
->mem_pool
, '\0');
602 to_name
= (char *) obstack_finish (&result
->mem_pool
);
609 if (nowtok
!= expected_tok
)
610 lr_error (cmfile
, _("\
611 `%1$s' definition does not end with `END %1$s'"), expected_str
);
613 lr_ignore_rest (cmfile
, nowtok
== expected_tok
);
618 /* Waiting for WIDTH... */
619 if (nowtok
== tok_eol
)
620 /* Ignore empty lines. */
623 if (nowtok
== tok_width_default
)
629 if (nowtok
== tok_width
)
631 lr_ignore_rest (cmfile
, 1);
636 if (nowtok
== tok_width_variable
)
638 lr_ignore_rest (cmfile
, 1);
643 lr_error (cmfile
, _("\
644 only WIDTH definitions are allowed to follow the CHARMAP definition"));
646 lr_ignore_rest (cmfile
, 0);
650 if (nowtok
!= tok_number
)
651 lr_error (cmfile
, _("value for %s must be an integer"),
654 result
->width_default
= now
->val
.num
;
656 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
662 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
664 if (nowtok
== tok_eol
)
665 /* ignore empty lines. */
668 if (nowtok
== tok_end
)
670 expected_tok
= tok_width
;
671 expected_str
= "WIDTH";
676 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
678 lr_error (cmfile
, _("syntax error in %s definition: %s"),
679 "WIDTH", _("no symbolic name given"));
681 lr_ignore_rest (cmfile
, 0);
685 if (from_name
!= NULL
)
686 obstack_free (&result
->mem_pool
, from_name
);
688 if (nowtok
== tok_bsymbol
)
689 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
690 now
->val
.str
.startmb
,
694 obstack_printf (&result
->mem_pool
, "U%08X",
695 cmfile
->token
.val
.ucs4
);
696 obstack_1grow (&result
->mem_pool
, '\0');
697 from_name
= (char *) obstack_finish (&result
->mem_pool
);
706 if (nowtok
== tok_ellipsis3
)
713 if (nowtok
!= tok_number
)
714 lr_error (cmfile
, _("value for %s must be an integer"),
718 /* Store width for chars. */
719 new_width (cmfile
, result
, from_name
, to_name
, now
->val
.num
);
725 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
731 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
733 lr_error (cmfile
, _("syntax error in %s definition: %s"),
734 "WIDTH", _("no symbolic name given for end of range"));
736 lr_ignore_rest (cmfile
, 0);
742 if (nowtok
== tok_bsymbol
)
743 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
744 now
->val
.str
.startmb
,
748 obstack_printf (&result
->mem_pool
, "U%08X",
749 cmfile
->token
.val
.ucs4
);
750 obstack_1grow (&result
->mem_pool
, '\0');
751 to_name
= (char *) obstack_finish (&result
->mem_pool
);
758 /* We now expect `END WIDTH_VARIABLE' or lines of the format
759 "%s\n" or "%s...%s\n". */
760 if (nowtok
== tok_eol
)
761 /* ignore empty lines. */
764 if (nowtok
== tok_end
)
766 expected_tok
= tok_width_variable
;
767 expected_str
= "WIDTH_VARIABLE";
772 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
774 lr_error (cmfile
, _("syntax error in %s definition: %s"),
775 "WIDTH_VARIABLE", _("no symbolic name given"));
777 lr_ignore_rest (cmfile
, 0);
782 if (from_name
!= NULL
)
783 obstack_free (&result
->mem_pool
, from_name
);
785 if (nowtok
== tok_bsymbol
)
786 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
787 now
->val
.str
.startmb
,
791 obstack_printf (&result
->mem_pool
, "U%08X",
792 cmfile
->token
.val
.ucs4
);
793 obstack_1grow (&result
->mem_pool
, '\0');
794 from_name
= (char *) obstack_finish (&result
->mem_pool
);
802 if (nowtok
== tok_ellipsis3
)
813 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
815 lr_error (cmfile
, _("syntax error in %s definition: %s"),
817 _("no symbolic name given for end of range"));
818 lr_ignore_rest (cmfile
, 0);
822 if (nowtok
== tok_bsymbol
)
823 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
824 now
->val
.str
.startmb
,
828 obstack_printf (&result
->mem_pool
, "U%08X",
829 cmfile
->token
.val
.ucs4
);
830 obstack_1grow (&result
->mem_pool
, '\0');
831 to_name
= (char *) obstack_finish (&result
->mem_pool
);
834 /* XXX Enter value into table. */
836 lr_ignore_rest (cmfile
, 1);
842 WITH_CUR_LOCALE (error (5, 0, _("%s: error in state machine"),
849 if (state
!= 91 && !be_quiet
)
850 WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"),
860 new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
861 const char *from
, const char *to
, unsigned long int width
)
863 struct charseq
*from_val
;
864 struct charseq
*to_val
;
866 from_val
= charmap_find_value (result
, from
, strlen (from
));
867 if (from_val
== NULL
)
869 lr_error (cmfile
, _("unknown character `%s'"), from
);
877 to_val
= charmap_find_value (result
, to
, strlen (to
));
880 lr_error (cmfile
, _("unknown character `%s'"), to
);
884 /* Make sure the number of bytes for the end points of the range
886 if (from_val
->nbytes
!= to_val
->nbytes
)
888 lr_error (cmfile
, _("\
889 number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"),
890 from_val
->nbytes
, to_val
->nbytes
);
895 if (result
->nwidth_rules
>= result
->nwidth_rules_max
)
897 size_t new_size
= result
->nwidth_rules
+ 32;
898 struct width_rule
*new_rules
=
899 (struct width_rule
*) obstack_alloc (&result
->mem_pool
,
901 * sizeof (struct width_rule
)));
903 memcpy (new_rules
, result
->width_rules
,
904 result
->nwidth_rules_max
* sizeof (struct width_rule
));
906 result
->width_rules
= new_rules
;
907 result
->nwidth_rules_max
= new_size
;
910 result
->width_rules
[result
->nwidth_rules
].from
= from_val
;
911 result
->width_rules
[result
->nwidth_rules
].to
= to_val
;
912 result
->width_rules
[result
->nwidth_rules
].width
= (unsigned int) width
;
913 ++result
->nwidth_rules
;
918 charmap_find_value (const struct charmap_t
*cm
, const char *name
, size_t len
)
922 return (find_entry ((hash_table
*) &cm
->char_table
, name
, len
, &result
)
923 < 0 ? NULL
: (struct charseq
*) result
);
928 charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
929 size_t nbytes
, unsigned char *bytes
,
930 const char *from
, const char *to
,
931 int decimal_ellipsis
, int step
)
933 hash_table
*ht
= &cm
->char_table
;
934 hash_table
*bt
= &cm
->byte_table
;
935 struct obstack
*ob
= &cm
->mem_pool
;
939 int prefix_len
, len1
, len2
;
940 unsigned int from_nr
, to_nr
, cnt
;
941 struct charseq
*newp
;
943 len1
= strlen (from
);
947 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
948 newp
->nbytes
= nbytes
;
949 memcpy (newp
->bytes
, bytes
, nbytes
);
952 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
953 if ((from
[0] == 'U' || from
[0] == 'P') && (len1
== 5 || len1
== 9))
955 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
956 xxxx and xxxxxxxx are hexadecimal numbers. In this case
957 we use the value of xxxx or xxxxxxxx as the UCS4 value of
958 this character and we don't have to consult the repertoire
961 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
962 and xxxxxxxx also give the code point in UCS4 but this must
963 be in the private, i.e., unassigned, area. This should be
964 used for characters which do not (yet) have an equivalent
965 in ISO 10646 and Unicode. */
969 newp
->ucs4
= strtoul (from
+ 1, &endp
, 16);
970 if (endp
- from
!= len1
971 || (newp
->ucs4
== ~((uint32_t) 0) && errno
== ERANGE
)
972 || newp
->ucs4
>= 0x80000000)
973 /* This wasn't successful. Signal this name cannot be a
974 correct UCS value. */
975 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
978 insert_entry (ht
, from
, len1
, newp
);
979 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
980 /* Please note that it isn't a bug if a symbol is defined more
981 than once. All later definitions are simply discarded. */
985 /* We have a range: the names must have names with equal prefixes
986 and an equal number of digits, where the second number is greater
987 or equal than the first. */
993 lr_error (lr
, _("invalid names for character range"));
997 cp
= &from
[len1
- 1];
998 if (decimal_ellipsis
)
999 while (isdigit (*cp
) && cp
>= from
)
1002 while (isxdigit (*cp
) && cp
>= from
)
1004 if (!isdigit (*cp
) && !isupper (*cp
))
1006 hexadecimal range format should use only capital characters"));
1010 prefix_len
= (cp
- from
) + 1;
1012 if (cp
== &from
[len1
- 1] || strncmp (from
, to
, prefix_len
) != 0)
1016 from_nr
= strtoul (&from
[prefix_len
], &from_end
, decimal_ellipsis
? 10 : 16);
1017 if (*from_end
!= '\0' || (from_nr
== UINT_MAX
&& errno
== ERANGE
)
1018 || ((to_nr
= strtoul (&to
[prefix_len
], &to_end
,
1019 decimal_ellipsis
? 10 : 16)) == UINT_MAX
1023 lr_error (lr
, _("<%s> and <%s> are illegal names for range"), from
, to
);
1027 if (from_nr
> to_nr
)
1029 lr_error (lr
, _("upper limit in range is not higher then lower limit"));
1033 for (cnt
= from_nr
; cnt
<= to_nr
; cnt
+= step
)
1036 obstack_printf (ob
, decimal_ellipsis
? "%.*s%0*d" : "%.*s%0*X",
1037 prefix_len
, from
, len1
- prefix_len
, cnt
);
1038 obstack_1grow (ob
, '\0');
1039 name_end
= obstack_finish (ob
);
1041 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
1042 newp
->nbytes
= nbytes
;
1043 memcpy (newp
->bytes
, bytes
, nbytes
);
1044 newp
->name
= name_end
;
1046 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1047 if ((name_end
[0] == 'U' || name_end
[0] == 'P')
1048 && (len1
== 5 || len1
== 9))
1050 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
1051 xxxx and xxxxxxxx are hexadecimal numbers. In this case
1052 we use the value of xxxx or xxxxxxxx as the UCS4 value of
1053 this character and we don't have to consult the repertoire
1056 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
1057 and xxxxxxxx also give the code point in UCS4 but this must
1058 be in the private, i.e., unassigned, area. This should be
1059 used for characters which do not (yet) have an equivalent
1060 in ISO 10646 and Unicode. */
1064 newp
->ucs4
= strtoul (name_end
+ 1, &endp
, 16);
1065 if (endp
- name_end
!= len1
1066 || (newp
->ucs4
== ~((uint32_t) 0) && errno
== ERANGE
)
1067 || newp
->ucs4
>= 0x80000000)
1068 /* This wasn't successful. Signal this name cannot be a
1069 correct UCS value. */
1070 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1073 insert_entry (ht
, name_end
, len1
, newp
);
1074 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
1075 /* Please note we don't examine the return value since it is no error
1076 if we have two definitions for a symbol. */
1078 /* Increment the value in the byte sequence. */
1079 if (++bytes
[nbytes
- 1] == '\0')
1087 _("resulting bytes for range not representable."));
1090 while (++bytes
[b
--] == 0);
1097 charmap_find_symbol (const struct charmap_t
*cm
, const char *bytes
,
1102 return (find_entry ((hash_table
*) &cm
->byte_table
, bytes
, nbytes
, &result
)
1103 < 0 ? NULL
: (struct charseq
*) result
);