Update.
[glibc.git] / locale / programs / charmap.c
blob416615a1b9813810d70ad553679f2eaa9336bd63
1 /* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <ctype.h>
25 #include <dirent.h>
26 #include <errno.h>
27 #include <libintl.h>
28 #include <obstack.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
33 #include "error.h"
34 #include "linereader.h"
35 #include "charset.h"
38 /* Uncomment following line for production version. */
39 /* define NDEBUG 1 */
40 #include <assert.h>
43 /* Define the lookup function. */
44 #include "charmap-kw.h"
47 void *xmalloc (size_t __n);
49 /* Prototypes for local functions. */
50 static struct charset_t *parse_charmap (const char *filename);
51 static void new_width (struct linereader *cmfile, struct charset_t *result,
52 const char *from, const char *to,
53 unsigned long int width);
56 struct charset_t *
57 charmap_read (const char *filename)
59 const char *pathnfile;
60 struct charset_t *result = NULL;
62 if (filename != NULL)
64 if (euidaccess (filename, R_OK) >= 0)
65 pathnfile = filename;
66 else
68 char *cp = xmalloc (strlen (filename) + sizeof CHARMAP_PATH + 1);
69 stpcpy (stpcpy (stpcpy (cp, CHARMAP_PATH), "/"), filename);
71 pathnfile = (const char *) cp;
74 result = parse_charmap (pathnfile);
76 if (result == NULL && !be_quiet)
77 error (0, errno, _("character map file `%s' not found"), filename);
80 if (result == NULL)
82 /* OK, one more try. We also accept the names given to the
83 character sets in the files. Sometimes they differ from the
84 file name. */
85 DIR *dir;
86 struct dirent *dirent;
88 dir = opendir (CHARMAP_PATH);
89 if (dir == NULL)
91 while ((dirent = readdir (dir)) != NULL)
92 if (strcmp (dirent->d_name, ".") != 0
93 && strcmp (dirent->d_name, "..") != 0)
95 char buf[sizeof (CHARMAP_PATH)
96 + strlen (dirent->d_name) + 1];
97 FILE *fp;
98 #ifdef _DIRENT_HAVE_D_TYPE
99 if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_REG)
100 continue;
101 #endif
102 stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"),
103 dirent->d_name);
105 fp = fopen (buf, "r");
106 if (fp != NULL)
108 char *name = NULL;
110 while (!feof (fp))
112 char junk[BUFSIZ];
114 if (fscanf (fp, " <code_set_name> %as", &name) == 1)
115 break;
117 while (fgets (junk, sizeof junk, fp) != NULL
118 && strchr (junk, '\n') == NULL)
119 continue;
122 fclose (fp);
124 if (name != NULL)
126 if (strcmp (name, filename) == 0)
128 result = parse_charmap (buf);
130 free (buf);
132 if (result)
133 return result;
135 break;
138 free (name);
143 closedir (dir);
147 if (result == NULL)
149 pathnfile = CHARMAP_PATH "/" DEFAULT_CHARMAP;
151 result = parse_charmap (pathnfile);
153 if (result == NULL)
154 error (4, errno, _("default character map file `%s' not found"),
155 DEFAULT_CHARMAP);
158 return result;
162 static struct charset_t *
163 parse_charmap (const char *filename)
165 struct linereader *cmfile;
166 struct charset_t *result;
167 int state;
168 enum token_t expected_tok = tok_error;
169 const char *expected_str = NULL;
170 char *from_name = NULL;
171 char *to_name = NULL;
173 /* Determine path. */
174 cmfile = lr_open (filename, charmap_hash);
175 if (cmfile == NULL)
177 if (strchr (filename, '/') == NULL)
179 /* Look in the systems charmap directory. */
180 char *buf = xmalloc (strlen (filename) + 1 + sizeof (CHARMAP_PATH));
182 stpcpy (stpcpy (stpcpy (buf, CHARMAP_PATH), "/"), filename);
183 cmfile = lr_open (buf, charmap_hash);
185 if (cmfile == NULL)
186 free (buf);
189 if (cmfile == NULL)
190 return NULL;
193 /* Allocate room for result. */
194 result = (struct charset_t *) xmalloc (sizeof (struct charset_t));
195 memset (result, '\0', sizeof (struct charset_t));
196 /* The default DEFAULT_WIDTH is 1. */
197 result->width_default = 1;
199 #define obstack_chunk_alloc malloc
200 #define obstack_chunk_free free
201 obstack_init (&result->mem_pool);
203 if (init_hash (&result->char_table, 256))
205 free (result);
206 return NULL;
209 /* We use a state machine to describe the charmap description file
210 format. */
211 state = 1;
212 while (1)
214 /* What's on? */
215 struct token *now = lr_token (cmfile, NULL);
216 enum token_t nowtok = now->tok;
217 struct token *arg;
219 if (nowtok == tok_eof)
220 break;
222 switch (state)
224 case 1:
225 /* The beginning. We expect the special declarations, EOL or
226 `CHARMAP'. */
227 if (nowtok == tok_eol)
228 /* Ignore empty lines. */
229 continue;
231 if (nowtok == tok_charmap)
233 from_name = NULL;
234 to_name = NULL;
236 /* We have to set up the real work. Fill in some
237 default values. */
238 if (result->mb_cur_max == 0)
239 result->mb_cur_max = 1;
240 if (result->mb_cur_min == 0)
241 result->mb_cur_min = result->mb_cur_max;
242 if (result->mb_cur_min > result->mb_cur_max && !be_quiet)
244 error (0, 0, _("\
245 %s: <mb_cur_max> must be greater than <mb_cur_min>\n"),
246 cmfile->fname);
248 result->mb_cur_min = result->mb_cur_max;
251 lr_ignore_rest (cmfile, 1);
253 state = 2;
254 continue;
257 if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max
258 && nowtok != tok_mb_cur_min && nowtok != tok_escape_char
259 && nowtok != tok_comment_char && nowtok != tok_g0esc
260 && nowtok != tok_g1esc && nowtok != tok_g2esc
261 && nowtok != tok_g3esc)
263 lr_error (cmfile, _("syntax error in prolog: %s"),
264 _("illegal definition"));
266 lr_ignore_rest (cmfile, 0);
267 continue;
270 /* We know that we need an argument. */
271 arg = lr_token (cmfile, NULL);
273 switch (nowtok)
275 case tok_code_set_name:
276 if (arg->tok != tok_ident)
278 badarg:
279 lr_error (cmfile, _("syntax error in prolog: %s"),
280 _("bad argument"));
282 lr_ignore_rest (cmfile, 0);
283 continue;
286 result->code_set_name = obstack_copy0 (&result->mem_pool,
287 arg->val.str.start,
288 arg->val.str.len);
290 lr_ignore_rest (cmfile, 1);
291 continue;
293 case tok_mb_cur_max:
294 case tok_mb_cur_min:
295 if (arg->tok != tok_number)
296 goto badarg;
298 if (arg->val.num < 1 || arg->val.num > 4)
300 lr_error (cmfile,
301 _("value for <%s> must lie between 1 and 4"),
302 nowtok == tok_mb_cur_min ? "mb_cur_min"
303 : "mb_cur_max");
305 lr_ignore_rest (cmfile, 0);
306 continue;
308 if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0
309 && (int) arg->val.num < result->mb_cur_min)
310 || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0
311 && (int) arg->val.num > result->mb_cur_max))
313 lr_error (cmfile, _("\
314 value of <mb_cur_max> must be greater than the value of <mb_cur_min>"));
316 lr_ignore_rest (cmfile, 0);
317 continue;
320 if (nowtok == tok_mb_cur_max)
321 result->mb_cur_max = arg->val.num;
322 else
323 result->mb_cur_min = arg->val.num;
325 lr_ignore_rest (cmfile, 1);
326 continue;
328 case tok_escape_char:
329 case tok_comment_char:
330 if (arg->tok != tok_ident)
331 goto badarg;
333 if (arg->val.str.len != 1)
335 lr_error (cmfile, _("\
336 argument to <%s> must be a single character"),
337 nowtok == tok_escape_char ? "escape_char"
338 : "comment_char");
340 lr_ignore_rest (cmfile, 0);
341 continue;
344 if (nowtok == tok_escape_char)
345 cmfile->escape_char = *arg->val.str.start;
346 else
347 cmfile->comment_char = *arg->val.str.start;
349 lr_ignore_rest (cmfile, 1);
350 continue;
352 case tok_g0esc:
353 case tok_g1esc:
354 case tok_g2esc:
355 case tok_g3esc:
356 lr_ignore_rest (cmfile, 0); /* XXX */
357 continue;
359 default:
360 /* Cannot happen. */
361 assert (! "Should not happen");
363 break;
365 case 2:
366 /* We have seen `CHARMAP' and now are in the body. Each line
367 must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
368 if (nowtok == tok_eol)
369 /* Ignore empty lines. */
370 continue;
372 if (nowtok == tok_end)
374 expected_tok = tok_charmap;
375 expected_str = "CHARMAP";
376 state = 90;
377 continue;
380 if (nowtok != tok_bsymbol)
382 lr_error (cmfile, _("syntax error in %s definition: %s"),
383 "CHARMAP", _("no symbolic name given"));
385 lr_ignore_rest (cmfile, 0);
386 continue;
389 /* If the previous line was not completely correct free the
390 used memory. */
391 if (from_name != NULL)
392 obstack_free (&result->mem_pool, from_name);
394 from_name = (char *) obstack_copy0 (&result->mem_pool,
395 now->val.str.start,
396 now->val.str.len);
397 to_name = NULL;
399 state = 3;
400 continue;
402 case 3:
403 /* We have two possibilities: We can see an ellipsis or an
404 encoding value. */
405 if (nowtok == tok_ellipsis)
407 state = 4;
408 continue;
410 /* FALLTHROUGH */
412 case 5:
413 if (nowtok != tok_charcode && nowtok != tok_ucs2
414 && nowtok != tok_ucs4)
416 lr_error (cmfile, _("syntax error in %s definition: %s"),
417 "CHARMAP", _("illegal encoding given"));
419 lr_ignore_rest (cmfile, 0);
421 state = 2;
422 continue;
425 if (nowtok == tok_charcode)
426 /* Write char value in table. */
427 charset_new_char (cmfile, result, now->val.charcode.nbytes,
428 now->val.charcode.val, from_name, to_name);
429 else
430 /* Determine ISO 10646 value and write into table. */
431 charset_new_unicode (cmfile, result, now->val.charcode.nbytes,
432 now->val.charcode.val, from_name, to_name);
434 /* Ignore trailing comment silently. */
435 lr_ignore_rest (cmfile, 0);
437 from_name = NULL;
438 to_name = NULL;
440 state = 2;
441 continue;
443 case 4:
444 if (nowtok != tok_bsymbol)
446 lr_error (cmfile, _("syntax error in %s definition: %s"),
447 "CHARMAP",
448 _("no symbolic name given for end of range"));
450 lr_ignore_rest (cmfile, 0);
451 continue;
454 /* If the previous line was not completely correct free the
455 used memory. */
456 to_name = (char *) obstack_copy0 (&result->mem_pool,
457 cmfile->token.val.str.start,
458 cmfile->token.val.str.len);
460 state = 3;
461 continue;
463 case 90:
464 if (nowtok != expected_tok)
465 lr_error (cmfile, _("\
466 `%1$s' definition does not end with `END %1$s'"), expected_str);
468 lr_ignore_rest (cmfile, nowtok == expected_tok);
469 state = 91;
470 continue;
472 case 91:
473 /* Waiting for WIDTH... */
474 if (nowtok == tok_eol)
475 /* Ignore empty lines. */
476 continue;
478 if (nowtok == tok_width_default)
480 state = 92;
481 continue;
484 if (nowtok == tok_width)
486 lr_ignore_rest (cmfile, 1);
487 state = 93;
488 continue;
491 if (nowtok == tok_width_variable)
493 lr_ignore_rest (cmfile, 1);
494 state = 98;
495 continue;
498 lr_error (cmfile, _("\
499 only WIDTH definitions are allowed to follow the CHARMAP definition"));
501 lr_ignore_rest (cmfile, 0);
502 continue;
504 case 92:
505 if (nowtok != tok_number)
506 lr_error (cmfile, _("value for %s must be an integer"),
507 "WIDTH_DEFAULT");
508 else
509 result->width_default = now->val.num;
511 lr_ignore_rest (cmfile, nowtok == tok_number);
513 state = 91;
514 continue;
516 case 93:
517 /* We now expect `END WIDTH' or lines of the format "%s %d\n" or
518 "%s...%s %d\n". */
519 if (nowtok == tok_eol)
520 /* ignore empty lines. */
521 continue;
523 if (nowtok == tok_end)
525 expected_tok = tok_width;
526 expected_str = "WIDTH";
527 state = 90;
528 continue;
531 if (nowtok != tok_bsymbol)
533 lr_error (cmfile, _("syntax error in %s definition: %s"),
534 "WIDTH", _("no symbolic name given"));
536 lr_ignore_rest (cmfile, 0);
537 continue;
540 if (from_name != NULL)
541 obstack_free (&result->mem_pool, from_name);
543 from_name = (char *) obstack_copy0 (&result->mem_pool,
544 now->val.str.start,
545 now->val.str.len);
546 to_name = NULL;
548 state = 94;
549 continue;
551 case 94:
552 if (nowtok == tok_ellipsis)
554 state = 95;
555 continue;
558 case 96:
559 if (nowtok != tok_number)
560 lr_error (cmfile, _("value for %s must be an integer"),
561 "WIDTH");
562 else
564 /* Store width for chars. */
565 new_width (cmfile, result, from_name, to_name, now->val.num);
567 from_name = NULL;
568 to_name = NULL;
571 lr_ignore_rest (cmfile, nowtok == tok_number);
573 state = 93;
574 continue;
576 case 95:
577 if (nowtok != tok_bsymbol)
579 lr_error (cmfile, _("syntax error in %s definition: %s"),
580 "WIDTH", _("no symbolic name given for end of range"));
582 lr_ignore_rest (cmfile, 0);
584 state = 93;
585 continue;
588 to_name = (char *) obstack_copy0 (&result->mem_pool,
589 now->val.str.start,
590 now->val.str.len);
592 state = 96;
593 continue;
595 case 98:
596 /* We now expect `END WIDTH_VARIABLE' or lines of the format
597 "%s\n" or "%s...%s\n". */
598 if (nowtok == tok_eol)
599 /* ignore empty lines. */
600 continue;
602 if (nowtok == tok_end)
604 expected_tok = tok_width_variable;
605 expected_str = "WIDTH_VARIABLE";
606 state = 90;
607 continue;
610 if (nowtok != tok_bsymbol)
612 lr_error (cmfile, _("syntax error in %s definition: %s"),
613 "WIDTH_VARIABLE", _("no symbolic name given"));
615 lr_ignore_rest (cmfile, 0);
617 continue;
620 if (from_name != NULL)
621 obstack_free (&result->mem_pool, from_name);
623 from_name = (char *) obstack_copy0 (&result->mem_pool,
624 now->val.str.start,
625 now->val.str.len);
626 to_name = NULL;
628 state = 99;
629 continue;
631 case 99:
632 if (nowtok == tok_ellipsis)
633 state = 100;
635 /* Store info. */
636 from_name = NULL;
638 /* Warn */
639 state = 98;
640 continue;
642 case 100:
643 if (nowtok != tok_bsymbol)
644 lr_error (cmfile, _("syntax error in %s definition: %s"),
645 "WIDTH_VARIABLE",
646 _("no symbolic name given for end of range"));
647 else
649 to_name = (char *) obstack_copy0 (&result->mem_pool,
650 now->val.str.start,
651 now->val.str.len);
652 /* XXX Enter value into table. */
655 lr_ignore_rest (cmfile, nowtok == tok_bsymbol);
657 state = 98;
658 continue;
660 default:
661 error (5, 0, _("%s: error in state machine"), __FILE__);
662 /* NOTREACHED */
664 break;
667 if (state != 91 && !be_quiet)
668 error (0, 0, _("%s: premature end of file"), cmfile->fname);
670 lr_close (cmfile);
672 return result;
676 static void
677 new_width (struct linereader *cmfile, struct charset_t *result,
678 const char *from, const char *to, unsigned long int width)
680 unsigned int from_val, to_val;
682 from_val = charset_find_value (result, from, strlen (from));
683 if ((wchar_t) from_val == ILLEGAL_CHAR_VALUE)
685 lr_error (cmfile, _("unknown character `%s'"), from);
686 return;
689 if (to == NULL)
690 to_val = from_val;
691 else
693 to_val = charset_find_value (result, to, strlen (to));
694 if ((wchar_t) to_val == ILLEGAL_CHAR_VALUE)
696 lr_error (cmfile, _("unknown character `%s'"), to);
697 return;
701 if (result->nwidth_rules >= result->nwidth_rules_max)
703 size_t new_size = result->nwidth_rules + 32;
704 struct width_rule *new_rules =
705 (struct width_rule *) obstack_alloc (&result->mem_pool,
706 (new_size
707 * sizeof (struct width_rule)));
709 memcpy (new_rules, result->width_rules,
710 result->nwidth_rules_max * sizeof (struct width_rule));
712 result->width_rules = new_rules;
713 result->nwidth_rules_max = new_size;
716 result->width_rules[result->nwidth_rules].from = from_val;
717 result->width_rules[result->nwidth_rules].to = to_val;
718 result->width_rules[result->nwidth_rules].width = (unsigned int) width;
719 ++result->nwidth_rules;