1 /* Copyright (C) 1996-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
31 #include "localedef.h"
32 #include "linereader.h"
34 #include "charmap-dir.h"
39 /* Define the lookup function. */
40 #include "charmap-kw.h"
43 /* Prototypes for local functions. */
44 static struct charmap_t
*parse_charmap (struct linereader
*cmfile
,
45 int verbose
, int be_quiet
);
46 static void new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
47 const char *from
, const char *to
,
48 unsigned long int width
);
49 static void charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
50 size_t nbytes
, unsigned char *bytes
,
51 const char *from
, const char *to
,
52 int decimal_ellipsis
, int step
);
55 bool enc_not_ascii_compatible
;
58 #ifdef NEED_NULL_POINTER
59 static const char *null_pointer
;
62 static struct linereader
*
63 cmlr_open (const char *directory
, const char *name
, kw_hash_fct_t hf
)
67 fp
= charmap_open (directory
, name
);
72 size_t dlen
= strlen (directory
);
73 int add_slash
= (dlen
== 0 || directory
[dlen
- 1] != '/');
74 size_t nlen
= strlen (name
);
78 pathname
= alloca (dlen
+ add_slash
+ nlen
+ 1);
79 p
= stpcpy (pathname
, directory
);
84 return lr_create (fp
, pathname
, hf
);
89 charmap_read (const char *filename
, int verbose
, int error_not_found
,
90 int be_quiet
, int use_default
)
92 struct charmap_t
*result
= NULL
;
96 struct linereader
*cmfile
;
98 /* First try the name as found in the parameter. */
99 cmfile
= lr_open (filename
, charmap_hash
);
102 /* No successful. So start looking through the directories
103 in the I18NPATH if this is a simple name. */
104 if (strchr (filename
, '/') == NULL
)
106 char *i18npath
= getenv ("I18NPATH");
107 if (i18npath
!= NULL
&& *i18npath
!= '\0')
109 const size_t pathlen
= strlen (i18npath
);
110 char i18npathbuf
[pathlen
+ 1];
111 char path
[pathlen
+ sizeof ("/charmaps")];
113 i18npath
= memcpy (i18npathbuf
, i18npath
, pathlen
+ 1);
115 while (cmfile
== NULL
116 && (next
= strsep (&i18npath
, ":")) != NULL
)
118 stpcpy (stpcpy (path
, next
), "/charmaps");
119 cmfile
= cmlr_open (path
, filename
, charmap_hash
);
122 /* Try without the "/charmaps" part. */
123 cmfile
= cmlr_open (next
, filename
, charmap_hash
);
128 /* Try the default directory. */
129 cmfile
= cmlr_open (CHARMAP_PATH
, filename
, charmap_hash
);
134 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
136 if (result
== NULL
&& error_not_found
)
137 record_error (0, errno
,
138 _("character map file `%s' not found"),
142 if (result
== NULL
&& filename
!= NULL
&& strchr (filename
, '/') == NULL
)
144 /* OK, one more try. We also accept the names given to the
145 character sets in the files. Sometimes they differ from the
149 dir
= charmap_opendir (CHARMAP_PATH
);
154 while ((dirent
= charmap_readdir (dir
)) != NULL
)
160 aliases
= charmap_aliases (CHARMAP_PATH
, dirent
);
162 for (p
= aliases
; *p
; p
++)
163 if (strcasecmp (*p
, filename
) == 0)
168 charmap_free_aliases (aliases
);
172 struct linereader
*cmfile
;
174 cmfile
= cmlr_open (CHARMAP_PATH
, dirent
, charmap_hash
);
176 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
182 charmap_closedir (dir
);
186 if (result
== NULL
&& DEFAULT_CHARMAP
!= NULL
)
188 struct linereader
*cmfile
;
190 cmfile
= cmlr_open (CHARMAP_PATH
, DEFAULT_CHARMAP
, charmap_hash
);
192 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
195 record_error (4, errno
,
196 _("default character map file `%s' not found"),
200 if (result
!= NULL
&& result
->code_set_name
== NULL
)
201 /* The input file does not specify a code set name. This
202 shouldn't happen but we should cope with it. */
203 result
->code_set_name
= basename (filename
);
205 /* Test of ASCII compatibility of locale encoding.
207 Verify that the encoding to be used in a locale is ASCII compatible,
208 at least for the graphic characters, excluding the control characters,
209 '$' and '@'. This constraint comes from an ISO C 99 restriction.
211 ISO C 99 section 7.17.(2) (about wchar_t):
212 the null character shall have the code value zero and each member of
213 the basic character set shall have a code value equal to its value
214 when used as the lone character in an integer character constant.
215 ISO C 99 section 5.2.1.(3):
216 Both the basic source and basic execution character sets shall have
217 the following members: the 26 uppercase letters of the Latin alphabet
218 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
219 the 26 lowercase letters of the Latin alphabet
220 a b c d e f g h i j k l m n o p q r s t u v w x y z
221 the 10 decimal digits
223 the following 29 graphic characters
224 ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
225 the space character, and control characters representing horizontal
226 tab, vertical tab, and form feed.
228 Therefore, for all members of the "basic character set", the 'char' code
229 must have the same value as the 'wchar_t' code, which in glibc is the
230 same as the Unicode code, which for all of the enumerated characters
231 is identical to the ASCII code. */
232 if (result
!= NULL
&& use_default
)
234 static const char basic_charset
[] =
236 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
237 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
238 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
239 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
240 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
241 '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
242 '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
243 '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
246 const char *p
= basic_charset
;
250 struct charseq
*seq
= charmap_find_symbol (result
, p
, 1);
252 if (seq
== NULL
|| seq
->ucs4
!= (uint32_t) *p
)
255 while (*p
++ != '\0');
260 character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
261 result
->code_set_name
);
262 enc_not_ascii_compatible
= true;
270 static struct charmap_t
*
271 parse_charmap (struct linereader
*cmfile
, int verbose
, int be_quiet
)
273 struct charmap_t
*result
;
275 enum token_t expected_tok
= tok_error
;
276 const char *expected_str
= NULL
;
277 char *from_name
= NULL
;
278 char *to_name
= NULL
;
279 enum token_t ellipsis
= 0;
282 /* We don't want symbolic names in string to be translated. */
283 cmfile
->translate_strings
= 0;
285 /* Allocate room for result. */
286 result
= (struct charmap_t
*) xmalloc (sizeof (struct charmap_t
));
287 memset (result
, '\0', sizeof (struct charmap_t
));
288 /* The default DEFAULT_WIDTH is 1. */
289 result
->width_default
= 1;
291 #define obstack_chunk_alloc malloc
292 #define obstack_chunk_free free
293 obstack_init (&result
->mem_pool
);
295 if (init_hash (&result
->char_table
, 256)
296 || init_hash (&result
->byte_table
, 256))
302 /* We use a state machine to describe the charmap description file
308 struct token
*now
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
309 enum token_t nowtok
= now
->tok
;
312 if (nowtok
== tok_eof
)
318 /* The beginning. We expect the special declarations, EOL or
320 if (nowtok
== tok_eol
)
321 /* Ignore empty lines. */
324 if (nowtok
== tok_charmap
)
329 /* We have to set up the real work. Fill in some
331 if (result
->mb_cur_max
== 0)
332 result
->mb_cur_max
= 1;
333 if (result
->mb_cur_min
== 0)
334 result
->mb_cur_min
= result
->mb_cur_max
;
335 if (result
->mb_cur_min
> result
->mb_cur_max
)
337 record_error (0, 0, _("\
338 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
341 result
->mb_cur_min
= result
->mb_cur_max
;
344 lr_ignore_rest (cmfile
, 1);
350 if (nowtok
!= tok_code_set_name
&& nowtok
!= tok_mb_cur_max
351 && nowtok
!= tok_mb_cur_min
&& nowtok
!= tok_escape_char
352 && nowtok
!= tok_comment_char
&& nowtok
!= tok_g0esc
353 && nowtok
!= tok_g1esc
&& nowtok
!= tok_g2esc
354 && nowtok
!= tok_g3esc
&& nowtok
!= tok_repertoiremap
355 && nowtok
!= tok_include
)
357 lr_error (cmfile
, _("syntax error in prolog: %s"),
358 _("invalid definition"));
360 lr_ignore_rest (cmfile
, 0);
364 /* We know that we need an argument. */
365 arg
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
369 case tok_code_set_name
:
370 case tok_repertoiremap
:
371 if (arg
->tok
!= tok_ident
&& arg
->tok
!= tok_string
)
374 lr_error (cmfile
, _("syntax error in prolog: %s"),
377 lr_ignore_rest (cmfile
, 0);
381 if (nowtok
== tok_code_set_name
)
382 result
->code_set_name
= obstack_copy0 (&result
->mem_pool
,
383 arg
->val
.str
.startmb
,
386 result
->repertoiremap
= obstack_copy0 (&result
->mem_pool
,
387 arg
->val
.str
.startmb
,
390 lr_ignore_rest (cmfile
, 1);
395 if (arg
->tok
!= tok_number
)
398 if ((nowtok
== tok_mb_cur_max
399 && result
->mb_cur_max
!= 0)
400 || (nowtok
== tok_mb_cur_max
401 && result
->mb_cur_max
!= 0))
402 lr_error (cmfile
, _("duplicate definition of <%s>"),
403 nowtok
== tok_mb_cur_min
404 ? "mb_cur_min" : "mb_cur_max");
406 if (arg
->val
.num
< 1)
409 _("value for <%s> must be 1 or greater"),
410 nowtok
== tok_mb_cur_min
411 ? "mb_cur_min" : "mb_cur_max");
413 lr_ignore_rest (cmfile
, 0);
416 if ((nowtok
== tok_mb_cur_max
&& result
->mb_cur_min
!= 0
417 && (int) arg
->val
.num
< result
->mb_cur_min
)
418 || (nowtok
== tok_mb_cur_min
&& result
->mb_cur_max
!= 0
419 && (int) arg
->val
.num
> result
->mb_cur_max
))
421 lr_error (cmfile
, _("\
422 value of <%s> must be greater or equal than the value of <%s>"),
423 "mb_cur_max", "mb_cur_min");
425 lr_ignore_rest (cmfile
, 0);
429 if (nowtok
== tok_mb_cur_max
)
430 result
->mb_cur_max
= arg
->val
.num
;
432 result
->mb_cur_min
= arg
->val
.num
;
434 lr_ignore_rest (cmfile
, 1);
437 case tok_escape_char
:
438 case tok_comment_char
:
439 if (arg
->tok
!= tok_ident
)
442 if (arg
->val
.str
.lenmb
!= 1)
444 lr_error (cmfile
, _("\
445 argument to <%s> must be a single character"),
446 nowtok
== tok_escape_char
? "escape_char"
449 lr_ignore_rest (cmfile
, 0);
453 if (nowtok
== tok_escape_char
)
454 cmfile
->escape_char
= *arg
->val
.str
.startmb
;
456 cmfile
->comment_char
= *arg
->val
.str
.startmb
;
458 lr_ignore_rest (cmfile
, 1);
466 lr_ignore_rest (cmfile
, 0); /* XXX */
470 lr_error (cmfile
, _("\
471 character sets with locking states are not supported"));
476 assert (! "Should not happen");
481 /* We have seen `CHARMAP' and now are in the body. Each line
482 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
483 if (nowtok
== tok_eol
)
484 /* Ignore empty lines. */
487 if (nowtok
== tok_end
)
489 expected_tok
= tok_charmap
;
490 expected_str
= "CHARMAP";
495 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
497 lr_error (cmfile
, _("syntax error in %s definition: %s"),
498 "CHARMAP", _("no symbolic name given"));
500 lr_ignore_rest (cmfile
, 0);
504 /* If the previous line was not completely correct free the
506 if (from_name
!= NULL
)
507 obstack_free (&result
->mem_pool
, from_name
);
509 if (nowtok
== tok_bsymbol
)
510 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
511 now
->val
.str
.startmb
,
515 obstack_printf (&result
->mem_pool
, "U%08X",
516 cmfile
->token
.val
.ucs4
);
517 obstack_1grow (&result
->mem_pool
, '\0');
518 from_name
= (char *) obstack_finish (&result
->mem_pool
);
526 /* We have two possibilities: We can see an ellipsis or an
528 if (nowtok
== tok_ellipsis3
|| nowtok
== tok_ellipsis4
529 || nowtok
== tok_ellipsis2
|| nowtok
== tok_ellipsis4_2
530 || nowtok
== tok_ellipsis2_2
)
533 if (nowtok
== tok_ellipsis4_2
)
536 nowtok
= tok_ellipsis4
;
538 else if (nowtok
== tok_ellipsis2_2
)
541 nowtok
= tok_ellipsis2
;
549 if (nowtok
!= tok_charcode
)
551 lr_error (cmfile
, _("syntax error in %s definition: %s"),
552 "CHARMAP", _("invalid encoding given"));
554 lr_ignore_rest (cmfile
, 0);
560 if (now
->val
.charcode
.nbytes
< result
->mb_cur_min
)
561 lr_error (cmfile
, _("too few bytes in character encoding"));
562 else if (now
->val
.charcode
.nbytes
> result
->mb_cur_max
)
563 lr_error (cmfile
, _("too many bytes in character encoding"));
565 charmap_new_char (cmfile
, result
, now
->val
.charcode
.nbytes
,
566 now
->val
.charcode
.bytes
, from_name
, to_name
,
567 ellipsis
!= tok_ellipsis2
, step
);
569 /* Ignore trailing comment silently. */
570 lr_ignore_rest (cmfile
, 0);
581 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
583 lr_error (cmfile
, _("syntax error in %s definition: %s"),
585 _("no symbolic name given for end of range"));
587 lr_ignore_rest (cmfile
, 0);
591 /* Copy the to-name in a safe place. */
592 if (nowtok
== tok_bsymbol
)
593 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
594 cmfile
->token
.val
.str
.startmb
,
595 cmfile
->token
.val
.str
.lenmb
);
598 obstack_printf (&result
->mem_pool
, "U%08X",
599 cmfile
->token
.val
.ucs4
);
600 obstack_1grow (&result
->mem_pool
, '\0');
601 to_name
= (char *) obstack_finish (&result
->mem_pool
);
608 if (nowtok
!= expected_tok
)
609 lr_error (cmfile
, _("\
610 %1$s: definition does not end with `END %1$s'"), expected_str
);
612 lr_ignore_rest (cmfile
, nowtok
== expected_tok
);
617 /* Waiting for WIDTH... */
618 if (nowtok
== tok_eol
)
619 /* Ignore empty lines. */
622 if (nowtok
== tok_width_default
)
628 if (nowtok
== tok_width
)
630 lr_ignore_rest (cmfile
, 1);
635 if (nowtok
== tok_width_variable
)
637 lr_ignore_rest (cmfile
, 1);
642 lr_error (cmfile
, _("\
643 only WIDTH definitions are allowed to follow the CHARMAP definition"));
645 lr_ignore_rest (cmfile
, 0);
649 if (nowtok
!= tok_number
)
650 lr_error (cmfile
, _("value for %s must be an integer"),
653 result
->width_default
= now
->val
.num
;
655 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
661 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
663 if (nowtok
== tok_eol
)
664 /* ignore empty lines. */
667 if (nowtok
== tok_end
)
669 expected_tok
= tok_width
;
670 expected_str
= "WIDTH";
675 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
677 lr_error (cmfile
, _("syntax error in %s definition: %s"),
678 "WIDTH", _("no symbolic name given"));
680 lr_ignore_rest (cmfile
, 0);
684 if (from_name
!= NULL
)
685 obstack_free (&result
->mem_pool
, from_name
);
687 if (nowtok
== tok_bsymbol
)
688 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
689 now
->val
.str
.startmb
,
693 obstack_printf (&result
->mem_pool
, "U%08X",
694 cmfile
->token
.val
.ucs4
);
695 obstack_1grow (&result
->mem_pool
, '\0');
696 from_name
= (char *) obstack_finish (&result
->mem_pool
);
705 if (nowtok
== tok_ellipsis3
)
712 if (nowtok
!= tok_number
)
713 lr_error (cmfile
, _("value for %s must be an integer"),
717 /* Store width for chars. */
718 new_width (cmfile
, result
, from_name
, to_name
, now
->val
.num
);
724 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
730 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
732 lr_error (cmfile
, _("syntax error in %s definition: %s"),
733 "WIDTH", _("no symbolic name given for end of range"));
735 lr_ignore_rest (cmfile
, 0);
741 if (nowtok
== tok_bsymbol
)
742 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
743 now
->val
.str
.startmb
,
747 obstack_printf (&result
->mem_pool
, "U%08X",
748 cmfile
->token
.val
.ucs4
);
749 obstack_1grow (&result
->mem_pool
, '\0');
750 to_name
= (char *) obstack_finish (&result
->mem_pool
);
757 /* We now expect `END WIDTH_VARIABLE' or lines of the format
758 "%s\n" or "%s...%s\n". */
759 if (nowtok
== tok_eol
)
760 /* ignore empty lines. */
763 if (nowtok
== tok_end
)
765 expected_tok
= tok_width_variable
;
766 expected_str
= "WIDTH_VARIABLE";
771 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
773 lr_error (cmfile
, _("syntax error in %s definition: %s"),
774 "WIDTH_VARIABLE", _("no symbolic name given"));
776 lr_ignore_rest (cmfile
, 0);
781 if (from_name
!= NULL
)
782 obstack_free (&result
->mem_pool
, from_name
);
784 if (nowtok
== tok_bsymbol
)
785 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
786 now
->val
.str
.startmb
,
790 obstack_printf (&result
->mem_pool
, "U%08X",
791 cmfile
->token
.val
.ucs4
);
792 obstack_1grow (&result
->mem_pool
, '\0');
793 from_name
= (char *) obstack_finish (&result
->mem_pool
);
801 if (nowtok
== tok_ellipsis3
)
812 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
814 lr_error (cmfile
, _("syntax error in %s definition: %s"),
816 _("no symbolic name given for end of range"));
817 lr_ignore_rest (cmfile
, 0);
821 if (nowtok
== tok_bsymbol
)
822 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
823 now
->val
.str
.startmb
,
827 obstack_printf (&result
->mem_pool
, "U%08X",
828 cmfile
->token
.val
.ucs4
);
829 obstack_1grow (&result
->mem_pool
, '\0');
830 to_name
= (char *) obstack_finish (&result
->mem_pool
);
833 /* XXX Enter value into table. */
835 lr_ignore_rest (cmfile
, 1);
841 record_error (5, 0, _("%s: error in state machine"),
849 record_error (0, 0, _("%s: premature end of file"),
859 new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
860 const char *from
, const char *to
, unsigned long int width
)
862 struct charseq
*from_val
;
863 struct charseq
*to_val
;
865 from_val
= charmap_find_value (result
, from
, strlen (from
));
866 if (from_val
== NULL
)
868 lr_error (cmfile
, _("unknown character `%s'"), from
);
876 to_val
= charmap_find_value (result
, to
, strlen (to
));
879 lr_error (cmfile
, _("unknown character `%s'"), to
);
883 /* Make sure the number of bytes for the end points of the range
885 if (from_val
->nbytes
!= to_val
->nbytes
)
887 lr_error (cmfile
, _("\
888 number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"),
889 from_val
->nbytes
, to_val
->nbytes
);
894 if (result
->nwidth_rules
>= result
->nwidth_rules_max
)
896 size_t new_size
= result
->nwidth_rules
+ 32;
897 struct width_rule
*new_rules
=
898 (struct width_rule
*) obstack_alloc (&result
->mem_pool
,
900 * sizeof (struct width_rule
)));
902 memcpy (new_rules
, result
->width_rules
,
903 result
->nwidth_rules_max
* sizeof (struct width_rule
));
905 result
->width_rules
= new_rules
;
906 result
->nwidth_rules_max
= new_size
;
909 result
->width_rules
[result
->nwidth_rules
].from
= from_val
;
910 result
->width_rules
[result
->nwidth_rules
].to
= to_val
;
911 result
->width_rules
[result
->nwidth_rules
].width
= (unsigned int) width
;
912 ++result
->nwidth_rules
;
917 charmap_find_value (const struct charmap_t
*cm
, const char *name
, size_t len
)
921 return (find_entry ((hash_table
*) &cm
->char_table
, name
, len
, &result
)
922 < 0 ? NULL
: (struct charseq
*) result
);
927 charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
928 size_t nbytes
, unsigned char *bytes
,
929 const char *from
, const char *to
,
930 int decimal_ellipsis
, int step
)
932 hash_table
*ht
= &cm
->char_table
;
933 hash_table
*bt
= &cm
->byte_table
;
934 struct obstack
*ob
= &cm
->mem_pool
;
938 int prefix_len
, len1
, len2
;
939 unsigned int from_nr
, to_nr
, cnt
;
940 struct charseq
*newp
;
942 len1
= strlen (from
);
946 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
947 newp
->nbytes
= nbytes
;
948 memcpy (newp
->bytes
, bytes
, nbytes
);
951 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
952 if ((from
[0] == 'U' || from
[0] == 'P') && (len1
== 5 || len1
== 9))
954 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
955 xxxx and xxxxxxxx are hexadecimal numbers. In this case
956 we use the value of xxxx or xxxxxxxx as the UCS4 value of
957 this character and we don't have to consult the repertoire
960 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
961 and xxxxxxxx also give the code point in UCS4 but this must
962 be in the private, i.e., unassigned, area. This should be
963 used for characters which do not (yet) have an equivalent
964 in ISO 10646 and Unicode. */
968 newp
->ucs4
= strtoul (from
+ 1, &endp
, 16);
969 if (endp
- from
!= len1
970 || (newp
->ucs4
== ~((uint32_t) 0) && errno
== ERANGE
)
971 || newp
->ucs4
>= 0x80000000)
972 /* This wasn't successful. Signal this name cannot be a
973 correct UCS value. */
974 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
977 insert_entry (ht
, from
, len1
, newp
);
978 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
979 /* Please note that it isn't a bug if a symbol is defined more
980 than once. All later definitions are simply discarded. */
984 /* We have a range: the names must have names with equal prefixes
985 and an equal number of digits, where the second number is greater
986 or equal than the first. */
992 lr_error (lr
, _("invalid names for character range"));
996 cp
= &from
[len1
- 1];
997 if (decimal_ellipsis
)
998 while (isdigit (*cp
) && cp
>= from
)
1001 while (isxdigit (*cp
) && cp
>= from
)
1003 if (!isdigit (*cp
) && !isupper (*cp
))
1005 hexadecimal range format should use only capital characters"));
1009 prefix_len
= (cp
- from
) + 1;
1011 if (cp
== &from
[len1
- 1] || strncmp (from
, to
, prefix_len
) != 0)
1015 from_nr
= strtoul (&from
[prefix_len
], &from_end
, decimal_ellipsis
? 10 : 16);
1016 if (*from_end
!= '\0' || (from_nr
== UINT_MAX
&& errno
== ERANGE
)
1017 || ((to_nr
= strtoul (&to
[prefix_len
], &to_end
,
1018 decimal_ellipsis
? 10 : 16)) == UINT_MAX
1022 lr_error (lr
, _("<%s> and <%s> are invalid names for range"), from
, to
);
1026 if (from_nr
> to_nr
)
1028 lr_error (lr
, _("upper limit in range is smaller than lower limit"));
1032 for (cnt
= from_nr
; cnt
<= to_nr
; cnt
+= step
)
1035 obstack_printf (ob
, decimal_ellipsis
? "%.*s%0*d" : "%.*s%0*X",
1036 prefix_len
, from
, len1
- prefix_len
, cnt
);
1037 obstack_1grow (ob
, '\0');
1038 name_end
= obstack_finish (ob
);
1040 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
1041 newp
->nbytes
= nbytes
;
1042 memcpy (newp
->bytes
, bytes
, nbytes
);
1043 newp
->name
= name_end
;
1045 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1046 if ((name_end
[0] == 'U' || name_end
[0] == 'P')
1047 && (len1
== 5 || len1
== 9))
1049 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
1050 xxxx and xxxxxxxx are hexadecimal numbers. In this case
1051 we use the value of xxxx or xxxxxxxx as the UCS4 value of
1052 this character and we don't have to consult the repertoire
1055 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
1056 and xxxxxxxx also give the code point in UCS4 but this must
1057 be in the private, i.e., unassigned, area. This should be
1058 used for characters which do not (yet) have an equivalent
1059 in ISO 10646 and Unicode. */
1063 newp
->ucs4
= strtoul (name_end
+ 1, &endp
, 16);
1064 if (endp
- name_end
!= len1
1065 || (newp
->ucs4
== ~((uint32_t) 0) && errno
== ERANGE
)
1066 || newp
->ucs4
>= 0x80000000)
1067 /* This wasn't successful. Signal this name cannot be a
1068 correct UCS value. */
1069 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1072 insert_entry (ht
, name_end
, len1
, newp
);
1073 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
1074 /* Please note we don't examine the return value since it is no error
1075 if we have two definitions for a symbol. */
1077 /* Increment the value in the byte sequence. */
1078 if (++bytes
[nbytes
- 1] == '\0')
1086 _("resulting bytes for range not representable."));
1089 while (++bytes
[b
--] == 0);
1096 charmap_find_symbol (const struct charmap_t
*cm
, const char *bytes
,
1101 return (find_entry ((hash_table
*) &cm
->byte_table
, bytes
, nbytes
, &result
)
1102 < 0 ? NULL
: (struct charseq
*) result
);