1 /* Copyright (C) 1996, 1998-2004,2005, 2006 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
31 #include "localedef.h"
32 #include "linereader.h"
34 #include "charmap-dir.h"
39 /* Define the lookup function. */
40 #include "charmap-kw.h"
43 /* Prototypes for local functions. */
44 static struct charmap_t
*parse_charmap (struct linereader
*cmfile
,
45 int verbose
, int be_quiet
);
46 static void new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
47 const char *from
, const char *to
,
48 unsigned long int width
);
49 static void charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
50 size_t nbytes
, unsigned char *bytes
,
51 const char *from
, const char *to
,
52 int decimal_ellipsis
, int step
);
55 bool enc_not_ascii_compatible
;
58 #ifdef NEED_NULL_POINTER
59 static const char *null_pointer
;
62 static struct linereader
*
63 cmlr_open (const char *directory
, const char *name
, kw_hash_fct_t hf
)
67 fp
= charmap_open (directory
, name
);
72 size_t dlen
= strlen (directory
);
73 int add_slash
= (dlen
== 0 || directory
[dlen
- 1] != '/');
74 size_t nlen
= strlen (name
);
78 pathname
= alloca (dlen
+ add_slash
+ nlen
+ 1);
79 p
= stpcpy (pathname
, directory
);
84 return lr_create (fp
, pathname
, hf
);
89 charmap_read (const char *filename
, int verbose
, int error_not_found
,
90 int be_quiet
, int use_default
)
92 struct charmap_t
*result
= NULL
;
96 struct linereader
*cmfile
;
98 /* First try the name as found in the parameter. */
99 cmfile
= lr_open (filename
, charmap_hash
);
102 /* No successful. So start looking through the directories
103 in the I18NPATH if this is a simple name. */
104 if (strchr (filename
, '/') == NULL
)
106 char *i18npath
= getenv ("I18NPATH");
107 if (i18npath
!= NULL
&& *i18npath
!= '\0')
109 const size_t pathlen
= strlen (i18npath
);
110 char i18npathbuf
[pathlen
+ 1];
111 char path
[pathlen
+ sizeof ("/charmaps")];
113 i18npath
= memcpy (i18npathbuf
, i18npath
, pathlen
+ 1);
115 while (cmfile
== NULL
116 && (next
= strsep (&i18npath
, ":")) != NULL
)
118 stpcpy (stpcpy (path
, next
), "/charmaps");
119 cmfile
= cmlr_open (path
, filename
, charmap_hash
);
122 /* Try without the "/charmaps" part. */
123 cmfile
= cmlr_open (next
, filename
, charmap_hash
);
128 /* Try the default directory. */
129 cmfile
= cmlr_open (CHARMAP_PATH
, filename
, charmap_hash
);
134 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
136 if (result
== NULL
&& error_not_found
)
137 WITH_CUR_LOCALE (error (0, errno
, _("\
138 character map file `%s' not found"), filename
));
141 if (result
== NULL
&& filename
!= NULL
&& strchr (filename
, '/') == NULL
)
143 /* OK, one more try. We also accept the names given to the
144 character sets in the files. Sometimes they differ from the
148 dir
= charmap_opendir (CHARMAP_PATH
);
153 while ((dirent
= charmap_readdir (dir
)) != NULL
)
159 aliases
= charmap_aliases (CHARMAP_PATH
, dirent
);
161 for (p
= aliases
; *p
; p
++)
162 if (strcasecmp (*p
, filename
) == 0)
167 charmap_free_aliases (aliases
);
171 struct linereader
*cmfile
;
173 cmfile
= cmlr_open (CHARMAP_PATH
, dirent
, charmap_hash
);
175 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
181 charmap_closedir (dir
);
185 if (result
== NULL
&& DEFAULT_CHARMAP
!= NULL
)
187 struct linereader
*cmfile
;
189 cmfile
= cmlr_open (CHARMAP_PATH
, DEFAULT_CHARMAP
, charmap_hash
);
191 result
= parse_charmap (cmfile
, verbose
, be_quiet
);
194 WITH_CUR_LOCALE (error (4, errno
, _("\
195 default character map file `%s' not found"), DEFAULT_CHARMAP
));
198 if (result
!= NULL
&& result
->code_set_name
== NULL
)
199 /* The input file does not specify a code set name. This
200 shouldn't happen but we should cope with it. */
201 result
->code_set_name
= basename (filename
);
203 /* Test of ASCII compatibility of locale encoding.
205 Verify that the encoding to be used in a locale is ASCII compatible,
206 at least for the graphic characters, excluding the control characters,
207 '$' and '@'. This constraint comes from an ISO C 99 restriction.
209 ISO C 99 section 7.17.(2) (about wchar_t):
210 the null character shall have the code value zero and each member of
211 the basic character set shall have a code value equal to its value
212 when used as the lone character in an integer character constant.
213 ISO C 99 section 5.2.1.(3):
214 Both the basic source and basic execution character sets shall have
215 the following members: the 26 uppercase letters of the Latin alphabet
216 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
217 the 26 lowercase letters of the Latin alphabet
218 a b c d e f g h i j k l m n o p q r s t u v w x y z
219 the 10 decimal digits
221 the following 29 graphic characters
222 ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
223 the space character, and control characters representing horizontal
224 tab, vertical tab, and form feed.
226 Therefore, for all members of the "basic character set", the 'char' code
227 must have the same value as the 'wchar_t' code, which in glibc is the
228 same as the Unicode code, which for all of the enumerated characters
229 is identical to the ASCII code. */
230 if (result
!= NULL
&& use_default
)
232 static const char basic_charset
[] =
234 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
235 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
236 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
237 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
238 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
239 '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
240 '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^',
241 '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0'
244 const char *p
= basic_charset
;
248 struct charseq
*seq
= charmap_find_symbol (result
, p
, 1);
250 if (seq
== NULL
|| seq
->ucs4
!= (uint32_t) *p
)
253 while (*p
++ != '\0');
257 WITH_CUR_LOCALE (fprintf (stderr
, _("\
258 character map `%s' is not ASCII compatible, locale not ISO C compliant\n"),
259 result
->code_set_name
));
260 enc_not_ascii_compatible
= true;
268 static struct charmap_t
*
269 parse_charmap (struct linereader
*cmfile
, int verbose
, int be_quiet
)
271 struct charmap_t
*result
;
273 enum token_t expected_tok
= tok_error
;
274 const char *expected_str
= NULL
;
275 char *from_name
= NULL
;
276 char *to_name
= NULL
;
277 enum token_t ellipsis
= 0;
280 /* We don't want symbolic names in string to be translated. */
281 cmfile
->translate_strings
= 0;
283 /* Allocate room for result. */
284 result
= (struct charmap_t
*) xmalloc (sizeof (struct charmap_t
));
285 memset (result
, '\0', sizeof (struct charmap_t
));
286 /* The default DEFAULT_WIDTH is 1. */
287 result
->width_default
= 1;
289 #define obstack_chunk_alloc malloc
290 #define obstack_chunk_free free
291 obstack_init (&result
->mem_pool
);
293 if (init_hash (&result
->char_table
, 256)
294 || init_hash (&result
->byte_table
, 256))
300 /* We use a state machine to describe the charmap description file
306 struct token
*now
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
307 enum token_t nowtok
= now
->tok
;
310 if (nowtok
== tok_eof
)
316 /* The beginning. We expect the special declarations, EOL or
318 if (nowtok
== tok_eol
)
319 /* Ignore empty lines. */
322 if (nowtok
== tok_charmap
)
327 /* We have to set up the real work. Fill in some
329 if (result
->mb_cur_max
== 0)
330 result
->mb_cur_max
= 1;
331 if (result
->mb_cur_min
== 0)
332 result
->mb_cur_min
= result
->mb_cur_max
;
333 if (result
->mb_cur_min
> result
->mb_cur_max
)
336 WITH_CUR_LOCALE (error (0, 0, _("\
337 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
340 result
->mb_cur_min
= result
->mb_cur_max
;
343 lr_ignore_rest (cmfile
, 1);
349 if (nowtok
!= tok_code_set_name
&& nowtok
!= tok_mb_cur_max
350 && nowtok
!= tok_mb_cur_min
&& nowtok
!= tok_escape_char
351 && nowtok
!= tok_comment_char
&& nowtok
!= tok_g0esc
352 && nowtok
!= tok_g1esc
&& nowtok
!= tok_g2esc
353 && nowtok
!= tok_g3esc
&& nowtok
!= tok_repertoiremap
354 && nowtok
!= tok_include
)
356 lr_error (cmfile
, _("syntax error in prolog: %s"),
357 _("invalid definition"));
359 lr_ignore_rest (cmfile
, 0);
363 /* We know that we need an argument. */
364 arg
= lr_token (cmfile
, NULL
, NULL
, NULL
, verbose
);
368 case tok_code_set_name
:
369 case tok_repertoiremap
:
370 if (arg
->tok
!= tok_ident
&& arg
->tok
!= tok_string
)
373 lr_error (cmfile
, _("syntax error in prolog: %s"),
376 lr_ignore_rest (cmfile
, 0);
380 if (nowtok
== tok_code_set_name
)
381 result
->code_set_name
= obstack_copy0 (&result
->mem_pool
,
382 arg
->val
.str
.startmb
,
385 result
->repertoiremap
= obstack_copy0 (&result
->mem_pool
,
386 arg
->val
.str
.startmb
,
389 lr_ignore_rest (cmfile
, 1);
394 if (arg
->tok
!= tok_number
)
398 && ((nowtok
== tok_mb_cur_max
399 && result
->mb_cur_max
!= 0)
400 || (nowtok
== tok_mb_cur_max
401 && result
->mb_cur_max
!= 0)))
402 lr_error (cmfile
, _("duplicate definition of <%s>"),
403 nowtok
== tok_mb_cur_min
404 ? "mb_cur_min" : "mb_cur_max");
406 if (arg
->val
.num
< 1)
409 _("value for <%s> must be 1 or greater"),
410 nowtok
== tok_mb_cur_min
411 ? "mb_cur_min" : "mb_cur_max");
413 lr_ignore_rest (cmfile
, 0);
416 if ((nowtok
== tok_mb_cur_max
&& result
->mb_cur_min
!= 0
417 && (int) arg
->val
.num
< result
->mb_cur_min
)
418 || (nowtok
== tok_mb_cur_min
&& result
->mb_cur_max
!= 0
419 && (int) arg
->val
.num
> result
->mb_cur_max
))
421 lr_error (cmfile
, _("\
422 value of <%s> must be greater or equal than the value of <%s>"),
423 "mb_cur_max", "mb_cur_min");
425 lr_ignore_rest (cmfile
, 0);
429 if (nowtok
== tok_mb_cur_max
)
430 result
->mb_cur_max
= arg
->val
.num
;
432 result
->mb_cur_min
= arg
->val
.num
;
434 lr_ignore_rest (cmfile
, 1);
437 case tok_escape_char
:
438 case tok_comment_char
:
439 if (arg
->tok
!= tok_ident
)
442 if (arg
->val
.str
.lenmb
!= 1)
444 lr_error (cmfile
, _("\
445 argument to <%s> must be a single character"),
446 nowtok
== tok_escape_char
? "escape_char"
449 lr_ignore_rest (cmfile
, 0);
453 if (nowtok
== tok_escape_char
)
454 cmfile
->escape_char
= *arg
->val
.str
.startmb
;
456 cmfile
->comment_char
= *arg
->val
.str
.startmb
;
458 lr_ignore_rest (cmfile
, 1);
466 lr_ignore_rest (cmfile
, 0); /* XXX */
470 lr_error (cmfile
, _("\
471 character sets with locking states are not supported"));
476 assert (! "Should not happen");
481 /* We have seen `CHARMAP' and now are in the body. Each line
482 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
483 if (nowtok
== tok_eol
)
484 /* Ignore empty lines. */
487 if (nowtok
== tok_end
)
489 expected_tok
= tok_charmap
;
490 expected_str
= "CHARMAP";
495 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
497 lr_error (cmfile
, _("syntax error in %s definition: %s"),
498 "CHARMAP", _("no symbolic name given"));
500 lr_ignore_rest (cmfile
, 0);
504 /* If the previous line was not completely correct free the
506 if (from_name
!= NULL
)
507 obstack_free (&result
->mem_pool
, from_name
);
509 if (nowtok
== tok_bsymbol
)
510 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
511 now
->val
.str
.startmb
,
515 obstack_printf (&result
->mem_pool
, "U%08X",
516 cmfile
->token
.val
.ucs4
);
517 obstack_1grow (&result
->mem_pool
, '\0');
518 from_name
= (char *) obstack_finish (&result
->mem_pool
);
526 /* We have two possibilities: We can see an ellipsis or an
528 if (nowtok
== tok_ellipsis3
|| nowtok
== tok_ellipsis4
529 || nowtok
== tok_ellipsis2
|| nowtok
== tok_ellipsis4_2
530 || nowtok
== tok_ellipsis2_2
)
533 if (nowtok
== tok_ellipsis4_2
)
536 nowtok
= tok_ellipsis4
;
538 else if (nowtok
== tok_ellipsis2_2
)
541 nowtok
= tok_ellipsis2
;
549 if (nowtok
!= tok_charcode
)
551 lr_error (cmfile
, _("syntax error in %s definition: %s"),
552 "CHARMAP", _("invalid encoding given"));
554 lr_ignore_rest (cmfile
, 0);
560 if (now
->val
.charcode
.nbytes
< result
->mb_cur_min
)
561 lr_error (cmfile
, _("too few bytes in character encoding"));
562 else if (now
->val
.charcode
.nbytes
> result
->mb_cur_max
)
563 lr_error (cmfile
, _("too many bytes in character encoding"));
565 charmap_new_char (cmfile
, result
, now
->val
.charcode
.nbytes
,
566 now
->val
.charcode
.bytes
, from_name
, to_name
,
567 ellipsis
!= tok_ellipsis2
, step
);
569 /* Ignore trailing comment silently. */
570 lr_ignore_rest (cmfile
, 0);
581 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
583 lr_error (cmfile
, _("syntax error in %s definition: %s"),
585 _("no symbolic name given for end of range"));
587 lr_ignore_rest (cmfile
, 0);
591 /* Copy the to-name in a safe place. */
592 if (nowtok
== tok_bsymbol
)
593 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
594 cmfile
->token
.val
.str
.startmb
,
595 cmfile
->token
.val
.str
.lenmb
);
598 obstack_printf (&result
->mem_pool
, "U%08X",
599 cmfile
->token
.val
.ucs4
);
600 obstack_1grow (&result
->mem_pool
, '\0');
601 to_name
= (char *) obstack_finish (&result
->mem_pool
);
608 if (nowtok
!= expected_tok
)
609 lr_error (cmfile
, _("\
610 %1$s: definition does not end with `END %1$s'"), expected_str
);
612 lr_ignore_rest (cmfile
, nowtok
== expected_tok
);
617 /* Waiting for WIDTH... */
618 if (nowtok
== tok_eol
)
619 /* Ignore empty lines. */
622 if (nowtok
== tok_width_default
)
628 if (nowtok
== tok_width
)
630 lr_ignore_rest (cmfile
, 1);
635 if (nowtok
== tok_width_variable
)
637 lr_ignore_rest (cmfile
, 1);
642 lr_error (cmfile
, _("\
643 only WIDTH definitions are allowed to follow the CHARMAP definition"));
645 lr_ignore_rest (cmfile
, 0);
649 if (nowtok
!= tok_number
)
650 lr_error (cmfile
, _("value for %s must be an integer"),
653 result
->width_default
= now
->val
.num
;
655 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
661 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
663 if (nowtok
== tok_eol
)
664 /* ignore empty lines. */
667 if (nowtok
== tok_end
)
669 expected_tok
= tok_width
;
670 expected_str
= "WIDTH";
675 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
677 lr_error (cmfile
, _("syntax error in %s definition: %s"),
678 "WIDTH", _("no symbolic name given"));
680 lr_ignore_rest (cmfile
, 0);
684 if (from_name
!= NULL
)
685 obstack_free (&result
->mem_pool
, from_name
);
687 if (nowtok
== tok_bsymbol
)
688 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
689 now
->val
.str
.startmb
,
693 obstack_printf (&result
->mem_pool
, "U%08X",
694 cmfile
->token
.val
.ucs4
);
695 obstack_1grow (&result
->mem_pool
, '\0');
696 from_name
= (char *) obstack_finish (&result
->mem_pool
);
705 if (nowtok
== tok_ellipsis3
)
712 if (nowtok
!= tok_number
)
713 lr_error (cmfile
, _("value for %s must be an integer"),
717 /* Store width for chars. */
718 new_width (cmfile
, result
, from_name
, to_name
, now
->val
.num
);
724 lr_ignore_rest (cmfile
, nowtok
== tok_number
);
730 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
732 lr_error (cmfile
, _("syntax error in %s definition: %s"),
733 "WIDTH", _("no symbolic name given for end of range"));
735 lr_ignore_rest (cmfile
, 0);
741 if (nowtok
== tok_bsymbol
)
742 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
743 now
->val
.str
.startmb
,
747 obstack_printf (&result
->mem_pool
, "U%08X",
748 cmfile
->token
.val
.ucs4
);
749 obstack_1grow (&result
->mem_pool
, '\0');
750 to_name
= (char *) obstack_finish (&result
->mem_pool
);
757 /* We now expect `END WIDTH_VARIABLE' or lines of the format
758 "%s\n" or "%s...%s\n". */
759 if (nowtok
== tok_eol
)
760 /* ignore empty lines. */
763 if (nowtok
== tok_end
)
765 expected_tok
= tok_width_variable
;
766 expected_str
= "WIDTH_VARIABLE";
771 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
773 lr_error (cmfile
, _("syntax error in %s definition: %s"),
774 "WIDTH_VARIABLE", _("no symbolic name given"));
776 lr_ignore_rest (cmfile
, 0);
781 if (from_name
!= NULL
)
782 obstack_free (&result
->mem_pool
, from_name
);
784 if (nowtok
== tok_bsymbol
)
785 from_name
= (char *) obstack_copy0 (&result
->mem_pool
,
786 now
->val
.str
.startmb
,
790 obstack_printf (&result
->mem_pool
, "U%08X",
791 cmfile
->token
.val
.ucs4
);
792 obstack_1grow (&result
->mem_pool
, '\0');
793 from_name
= (char *) obstack_finish (&result
->mem_pool
);
801 if (nowtok
== tok_ellipsis3
)
812 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_ucs4
)
814 lr_error (cmfile
, _("syntax error in %s definition: %s"),
816 _("no symbolic name given for end of range"));
817 lr_ignore_rest (cmfile
, 0);
821 if (nowtok
== tok_bsymbol
)
822 to_name
= (char *) obstack_copy0 (&result
->mem_pool
,
823 now
->val
.str
.startmb
,
827 obstack_printf (&result
->mem_pool
, "U%08X",
828 cmfile
->token
.val
.ucs4
);
829 obstack_1grow (&result
->mem_pool
, '\0');
830 to_name
= (char *) obstack_finish (&result
->mem_pool
);
833 /* XXX Enter value into table. */
835 lr_ignore_rest (cmfile
, 1);
841 WITH_CUR_LOCALE (error (5, 0, _("%s: error in state machine"),
848 if (state
!= 91 && !be_quiet
)
849 WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"),
859 new_width (struct linereader
*cmfile
, struct charmap_t
*result
,
860 const char *from
, const char *to
, unsigned long int width
)
862 struct charseq
*from_val
;
863 struct charseq
*to_val
;
865 from_val
= charmap_find_value (result
, from
, strlen (from
));
866 if (from_val
== NULL
)
868 lr_error (cmfile
, _("unknown character `%s'"), from
);
876 to_val
= charmap_find_value (result
, to
, strlen (to
));
879 lr_error (cmfile
, _("unknown character `%s'"), to
);
883 /* Make sure the number of bytes for the end points of the range
885 if (from_val
->nbytes
!= to_val
->nbytes
)
887 lr_error (cmfile
, _("\
888 number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"),
889 from_val
->nbytes
, to_val
->nbytes
);
894 if (result
->nwidth_rules
>= result
->nwidth_rules_max
)
896 size_t new_size
= result
->nwidth_rules
+ 32;
897 struct width_rule
*new_rules
=
898 (struct width_rule
*) obstack_alloc (&result
->mem_pool
,
900 * sizeof (struct width_rule
)));
902 memcpy (new_rules
, result
->width_rules
,
903 result
->nwidth_rules_max
* sizeof (struct width_rule
));
905 result
->width_rules
= new_rules
;
906 result
->nwidth_rules_max
= new_size
;
909 result
->width_rules
[result
->nwidth_rules
].from
= from_val
;
910 result
->width_rules
[result
->nwidth_rules
].to
= to_val
;
911 result
->width_rules
[result
->nwidth_rules
].width
= (unsigned int) width
;
912 ++result
->nwidth_rules
;
917 charmap_find_value (const struct charmap_t
*cm
, const char *name
, size_t len
)
921 return (find_entry ((hash_table
*) &cm
->char_table
, name
, len
, &result
)
922 < 0 ? NULL
: (struct charseq
*) result
);
927 charmap_new_char (struct linereader
*lr
, struct charmap_t
*cm
,
928 size_t nbytes
, unsigned char *bytes
,
929 const char *from
, const char *to
,
930 int decimal_ellipsis
, int step
)
932 hash_table
*ht
= &cm
->char_table
;
933 hash_table
*bt
= &cm
->byte_table
;
934 struct obstack
*ob
= &cm
->mem_pool
;
938 int prefix_len
, len1
, len2
;
939 unsigned int from_nr
, to_nr
, cnt
;
940 struct charseq
*newp
;
942 len1
= strlen (from
);
946 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
947 newp
->nbytes
= nbytes
;
948 memcpy (newp
->bytes
, bytes
, nbytes
);
951 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
952 if ((from
[0] == 'U' || from
[0] == 'P') && (len1
== 5 || len1
== 9))
954 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
955 xxxx and xxxxxxxx are hexadecimal numbers. In this case
956 we use the value of xxxx or xxxxxxxx as the UCS4 value of
957 this character and we don't have to consult the repertoire
960 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
961 and xxxxxxxx also give the code point in UCS4 but this must
962 be in the private, i.e., unassigned, area. This should be
963 used for characters which do not (yet) have an equivalent
964 in ISO 10646 and Unicode. */
968 newp
->ucs4
= strtoul (from
+ 1, &endp
, 16);
969 if (endp
- from
!= len1
970 || (newp
->ucs4
== ~((uint32_t) 0) && errno
== ERANGE
)
971 || newp
->ucs4
>= 0x80000000)
972 /* This wasn't successful. Signal this name cannot be a
973 correct UCS value. */
974 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
977 insert_entry (ht
, from
, len1
, newp
);
978 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
979 /* Please note that it isn't a bug if a symbol is defined more
980 than once. All later definitions are simply discarded. */
984 /* We have a range: the names must have names with equal prefixes
985 and an equal number of digits, where the second number is greater
986 or equal than the first. */
992 lr_error (lr
, _("invalid names for character range"));
996 cp
= &from
[len1
- 1];
997 if (decimal_ellipsis
)
998 while (isdigit (*cp
) && cp
>= from
)
1001 while (isxdigit (*cp
) && cp
>= from
)
1003 if (!isdigit (*cp
) && !isupper (*cp
))
1005 hexadecimal range format should use only capital characters"));
1009 prefix_len
= (cp
- from
) + 1;
1011 if (cp
== &from
[len1
- 1] || strncmp (from
, to
, prefix_len
) != 0)
1015 from_nr
= strtoul (&from
[prefix_len
], &from_end
, decimal_ellipsis
? 10 : 16);
1016 if (*from_end
!= '\0' || (from_nr
== UINT_MAX
&& errno
== ERANGE
)
1017 || ((to_nr
= strtoul (&to
[prefix_len
], &to_end
,
1018 decimal_ellipsis
? 10 : 16)) == UINT_MAX
1022 lr_error (lr
, _("<%s> and <%s> are invalid names for range"), from
, to
);
1026 if (from_nr
> to_nr
)
1028 lr_error (lr
, _("upper limit in range is smaller than lower limit"));
1032 for (cnt
= from_nr
; cnt
<= to_nr
; cnt
+= step
)
1035 obstack_printf (ob
, decimal_ellipsis
? "%.*s%0*d" : "%.*s%0*X",
1036 prefix_len
, from
, len1
- prefix_len
, cnt
);
1037 obstack_1grow (ob
, '\0');
1038 name_end
= obstack_finish (ob
);
1040 newp
= (struct charseq
*) obstack_alloc (ob
, sizeof (*newp
) + nbytes
);
1041 newp
->nbytes
= nbytes
;
1042 memcpy (newp
->bytes
, bytes
, nbytes
);
1043 newp
->name
= name_end
;
1045 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1046 if ((name_end
[0] == 'U' || name_end
[0] == 'P')
1047 && (len1
== 5 || len1
== 9))
1049 /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where
1050 xxxx and xxxxxxxx are hexadecimal numbers. In this case
1051 we use the value of xxxx or xxxxxxxx as the UCS4 value of
1052 this character and we don't have to consult the repertoire
1055 If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx
1056 and xxxxxxxx also give the code point in UCS4 but this must
1057 be in the private, i.e., unassigned, area. This should be
1058 used for characters which do not (yet) have an equivalent
1059 in ISO 10646 and Unicode. */
1063 newp
->ucs4
= strtoul (name_end
+ 1, &endp
, 16);
1064 if (endp
- name_end
!= len1
1065 || (newp
->ucs4
== ~((uint32_t) 0) && errno
== ERANGE
)
1066 || newp
->ucs4
>= 0x80000000)
1067 /* This wasn't successful. Signal this name cannot be a
1068 correct UCS value. */
1069 newp
->ucs4
= UNINITIALIZED_CHAR_VALUE
;
1072 insert_entry (ht
, name_end
, len1
, newp
);
1073 insert_entry (bt
, newp
->bytes
, nbytes
, newp
);
1074 /* Please note we don't examine the return value since it is no error
1075 if we have two definitions for a symbol. */
1077 /* Increment the value in the byte sequence. */
1078 if (++bytes
[nbytes
- 1] == '\0')
1086 _("resulting bytes for range not representable."));
1089 while (++bytes
[b
--] == 0);
1096 charmap_find_symbol (const struct charmap_t
*cm
, const char *bytes
,
1101 return (find_entry ((hash_table
*) &cm
->byte_table
, bytes
, nbytes
, &result
)
1102 < 0 ? NULL
: (struct charseq
*) result
);