Update copyright notices with scripts/update-copyrights
[glibc.git] / locale / programs / repertoire.c
blob28e4bcc15fcfd4e33975cf1fd50378389250edf2
1 /* Copyright (C) 1998-2014 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
22 #include <errno.h>
23 #include <error.h>
24 #include <limits.h>
25 #include <obstack.h>
26 #include <search.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <stdint.h>
32 #include "localedef.h"
33 #include "linereader.h"
34 #include "charmap.h"
35 #include "repertoire.h"
36 #include "simple-hash.h"
39 /* Simple keyword hashing for the repertoiremap. */
40 static const struct keyword_t *repertoiremap_hash (const char *str,
41 unsigned int len);
42 static void repertoire_new_char (struct linereader *lr, hash_table *ht,
43 hash_table *rt, struct obstack *ob,
44 uint32_t value, const char *from,
45 const char *to, int decimal_ellipsis);
46 static int repertoire_compare (const void *p1, const void *p2);
48 /* Already known repertoire maps. */
49 static void *known;
51 /* List of repertoire maps which are not available and which have been
52 reported to not be. */
53 static void *unavailable;
56 struct repertoire_t *
57 repertoire_read (const char *filename)
59 struct linereader *repfile;
60 struct repertoire_t *result;
61 struct repertoire_t **resultp;
62 struct repertoire_t search;
63 int state;
64 char *from_name = NULL;
65 char *to_name = NULL;
66 enum token_t ellipsis = tok_none;
68 search.name = filename;
69 resultp = tfind (&search, &known, &repertoire_compare);
70 if (resultp != NULL)
71 return *resultp;
73 /* Determine path. */
74 repfile = lr_open (filename, repertoiremap_hash);
75 if (repfile == NULL)
77 if (strchr (filename, '/') == NULL)
79 char *i18npath = getenv ("I18NPATH");
80 if (i18npath != NULL && *i18npath != '\0')
82 const size_t pathlen = strlen (i18npath);
83 char i18npathbuf[pathlen + 1];
84 char path[strlen (filename) + 1 + pathlen
85 + sizeof ("/repertoiremaps/") - 1];
86 char *next;
87 i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1);
89 while (repfile == NULL
90 && (next = strsep (&i18npath, ":")) != NULL)
92 stpcpy (stpcpy (stpcpy (path, next), "/repertoiremaps/"),
93 filename);
95 repfile = lr_open (path, repertoiremap_hash);
97 if (repfile == NULL)
99 stpcpy (stpcpy (path, next), filename);
101 repfile = lr_open (path, repertoiremap_hash);
106 if (repfile == NULL)
108 /* Look in the systems charmap directory. */
109 char *buf = xmalloc (strlen (filename) + 1
110 + sizeof (REPERTOIREMAP_PATH));
112 stpcpy (stpcpy (stpcpy (buf, REPERTOIREMAP_PATH), "/"),
113 filename);
114 repfile = lr_open (buf, repertoiremap_hash);
116 free (buf);
120 if (repfile == NULL)
121 return NULL;
124 /* We don't want symbolic names in string to be translated. */
125 repfile->translate_strings = 0;
127 /* Allocate room for result. */
128 result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t));
129 memset (result, '\0', sizeof (struct repertoire_t));
131 result->name = xstrdup (filename);
133 #define obstack_chunk_alloc malloc
134 #define obstack_chunk_free free
135 obstack_init (&result->mem_pool);
137 if (init_hash (&result->char_table, 256)
138 || init_hash (&result->reverse_table, 256)
139 || init_hash (&result->seq_table, 256))
141 free (result);
142 return NULL;
145 /* We use a state machine to describe the charmap description file
146 format. */
147 state = 1;
148 while (1)
150 /* What's on? */
151 struct token *now = lr_token (repfile, NULL, NULL, NULL, verbose);
152 enum token_t nowtok = now->tok;
153 struct token *arg;
155 if (nowtok == tok_eof)
156 break;
158 switch (state)
160 case 1:
161 /* We haven't yet read any character definition. This is where
162 we accept escape_char and comment_char definitions. */
163 if (nowtok == tok_eol)
164 /* Ignore empty lines. */
165 continue;
167 if (nowtok == tok_escape_char || nowtok == tok_comment_char)
169 /* We know that we need an argument. */
170 arg = lr_token (repfile, NULL, NULL, NULL, verbose);
172 if (arg->tok != tok_ident)
174 lr_error (repfile, _("syntax error in prolog: %s"),
175 _("bad argument"));
177 lr_ignore_rest (repfile, 0);
178 continue;
181 if (arg->val.str.lenmb != 1)
183 lr_error (repfile, _("\
184 argument to <%s> must be a single character"),
185 nowtok == tok_escape_char ? "escape_char"
186 : "comment_char");
188 lr_ignore_rest (repfile, 0);
189 continue;
192 if (nowtok == tok_escape_char)
193 repfile->escape_char = *arg->val.str.startmb;
194 else
195 repfile->comment_char = *arg->val.str.startmb;
197 lr_ignore_rest (repfile, 1);
198 continue;
201 if (nowtok == tok_charids)
203 lr_ignore_rest (repfile, 1);
205 state = 2;
206 continue;
209 /* Otherwise we start reading the character definitions. */
210 state = 2;
211 /* FALLTHROUGH */
213 case 2:
214 /* We are now are in the body. Each line
215 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
216 if (nowtok == tok_eol)
217 /* Ignore empty lines. */
218 continue;
220 if (nowtok == tok_end)
222 state = 90;
223 continue;
226 if (nowtok != tok_bsymbol)
228 lr_error (repfile,
229 _("syntax error in repertoire map definition: %s"),
230 _("no symbolic name given"));
232 lr_ignore_rest (repfile, 0);
233 continue;
236 /* If the previous line was not completely correct free the
237 used memory. */
238 if (from_name != NULL)
239 obstack_free (&result->mem_pool, from_name);
241 from_name = (char *) obstack_copy0 (&result->mem_pool,
242 now->val.str.startmb,
243 now->val.str.lenmb);
244 to_name = NULL;
246 state = 3;
247 continue;
249 case 3:
250 /* We have two possibilities: We can see an ellipsis or an
251 encoding value. */
252 if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4
253 || nowtok == tok_ellipsis2)
255 ellipsis = nowtok;
256 state = 4;
257 continue;
259 /* FALLTHROUGH */
261 case 5:
262 /* We expect a value of the form <Uxxxx> or <Uxxxxxxxx> where
263 the xxx mean a hexadecimal value. */
264 state = 2;
266 errno = 0;
267 if (nowtok != tok_ucs4)
269 lr_error (repfile,
270 _("syntax error in repertoire map definition: %s"),
271 _("no <Uxxxx> or <Uxxxxxxxx> value given"));
273 lr_ignore_rest (repfile, 0);
274 continue;
277 /* We've found a new valid definition. */
278 repertoire_new_char (repfile, &result->char_table,
279 &result->reverse_table, &result->mem_pool,
280 now->val.ucs4, from_name, to_name,
281 ellipsis != tok_ellipsis2);
283 /* Ignore the rest of the line. */
284 lr_ignore_rest (repfile, 0);
286 from_name = NULL;
287 to_name = NULL;
289 continue;
291 case 4:
292 if (nowtok != tok_bsymbol)
294 lr_error (repfile,
295 _("syntax error in repertoire map definition: %s"),
296 _("no symbolic name given for end of range"));
298 lr_ignore_rest (repfile, 0);
299 state = 2;
300 continue;
303 /* Copy the to-name in a safe place. */
304 to_name = (char *) obstack_copy0 (&result->mem_pool,
305 repfile->token.val.str.startmb,
306 repfile->token.val.str.lenmb);
308 state = 5;
309 continue;
311 case 90:
312 if (nowtok != tok_charids)
313 lr_error (repfile, _("\
314 %1$s: definition does not end with `END %1$s'"), "CHARIDS");
316 lr_ignore_rest (repfile, nowtok == tok_charids);
317 break;
320 break;
323 if (state != 2 && state != 90 && !be_quiet)
324 WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"),
325 repfile->fname));
327 lr_close (repfile);
329 if (tsearch (result, &known, &repertoire_compare) == NULL)
330 /* Something went wrong. */
331 WITH_CUR_LOCALE (error (0, errno, _("cannot save new repertoire map")));
333 return result;
337 void
338 repertoire_complain (const char *name)
340 if (tfind (name, &unavailable, (__compar_fn_t) strcmp) == NULL)
342 WITH_CUR_LOCALE (error (0, errno, _("\
343 repertoire map file `%s' not found"), name));
345 /* Remember that we reported this map. */
346 tsearch (name, &unavailable, (__compar_fn_t) strcmp);
351 static int
352 repertoire_compare (const void *p1, const void *p2)
354 struct repertoire_t *r1 = (struct repertoire_t *) p1;
355 struct repertoire_t *r2 = (struct repertoire_t *) p2;
357 return strcmp (r1->name, r2->name);
361 static const struct keyword_t *
362 repertoiremap_hash (const char *str, unsigned int len)
364 static const struct keyword_t wordlist[] =
366 {"escape_char", tok_escape_char, 0},
367 {"comment_char", tok_comment_char, 0},
368 {"CHARIDS", tok_charids, 0},
369 {"END", tok_end, 0},
372 if (len == 11 && memcmp (wordlist[0].name, str, 11) == 0)
373 return &wordlist[0];
374 if (len == 12 && memcmp (wordlist[1].name, str, 12) == 0)
375 return &wordlist[1];
376 if (len == 7 && memcmp (wordlist[2].name, str, 7) == 0)
377 return &wordlist[2];
378 if (len == 3 && memcmp (wordlist[3].name, str, 3) == 0)
379 return &wordlist[3];
381 return NULL;
385 static void
386 repertoire_new_char (struct linereader *lr, hash_table *ht, hash_table *rt,
387 struct obstack *ob, uint32_t value, const char *from,
388 const char *to, int decimal_ellipsis)
390 char *from_end;
391 char *to_end;
392 const char *cp;
393 char *buf = NULL;
394 int prefix_len, len1, len2;
395 unsigned long int from_nr, to_nr, cnt;
397 if (to == NULL)
399 insert_entry (ht, from, strlen (from),
400 (void *) (unsigned long int) value);
401 /* Please note that it isn't a bug if a symbol is defined more
402 than once. All later definitions are simply discarded. */
404 insert_entry (rt, obstack_copy (ob, &value, sizeof (value)),
405 sizeof (value), (void *) from);
407 return;
410 /* We have a range: the names must have names with equal prefixes
411 and an equal number of digits, where the second number is greater
412 or equal than the first. */
413 len1 = strlen (from);
414 len2 = strlen (to);
416 if (len1 != len2)
418 invalid_range:
419 lr_error (lr, _("invalid names for character range"));
420 return;
423 cp = &from[len1 - 1];
424 if (decimal_ellipsis)
425 while (isdigit (*cp) && cp >= from)
426 --cp;
427 else
428 while (isxdigit (*cp) && cp >= from)
430 if (!isdigit (*cp) && !isupper (*cp))
431 lr_error (lr, _("\
432 hexadecimal range format should use only capital characters"));
433 --cp;
436 prefix_len = (cp - from) + 1;
438 if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
439 goto invalid_range;
441 errno = 0;
442 from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16);
443 if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE)
444 || ((to_nr = strtoul (&to[prefix_len], &to_end,
445 decimal_ellipsis ? 10 : 16)) == ULONG_MAX
446 && errno == ERANGE)
447 || *to_end != '\0')
449 lr_error (lr, _("<%s> and <%s> are invalid names for range"),
450 from, to);
451 return;
454 if (from_nr > to_nr)
456 lr_error (lr, _("upper limit in range is smaller than lower limit"));
457 return;
460 for (cnt = from_nr; cnt <= to_nr; ++cnt)
462 uint32_t this_value = value + (cnt - from_nr);
464 obstack_printf (ob, decimal_ellipsis ? "%.*s%0*ld" : "%.*s%0*lX",
465 prefix_len, from, len1 - prefix_len, cnt);
466 obstack_1grow (ob, '\0');
468 insert_entry (ht, buf, len1,
469 (void *) (unsigned long int) this_value);
470 /* Please note we don't examine the return value since it is no error
471 if we have two definitions for a symbol. */
473 insert_entry (rt, obstack_copy (ob, &this_value, sizeof (this_value)),
474 sizeof (this_value), (void *) from);
479 uint32_t
480 repertoire_find_value (const struct repertoire_t *rep, const char *name,
481 size_t len)
483 void *result;
485 if (rep == NULL)
486 return ILLEGAL_CHAR_VALUE;
488 if (find_entry ((hash_table *) &rep->char_table, name, len, &result) < 0)
489 return ILLEGAL_CHAR_VALUE;
491 return (uint32_t) ((unsigned long int) result);
495 const char *
496 repertoire_find_symbol (const struct repertoire_t *rep, uint32_t ucs)
498 void *result;
500 if (rep == NULL)
501 return NULL;
503 if (find_entry ((hash_table *) &rep->reverse_table, &ucs, sizeof (ucs),
504 &result) < 0)
505 return NULL;
507 return (const char *) result;
511 struct charseq *
512 repertoire_find_seq (const struct repertoire_t *rep, uint32_t ucs)
514 void *result;
516 if (rep == NULL)
517 return NULL;
519 if (find_entry ((hash_table *) &rep->seq_table, &ucs, sizeof (ucs),
520 &result) < 0)
521 return NULL;
523 return (struct charseq *) result;