Thu Nov 2 19:24:37 1995 Roland McGrath <roland@churchy.gnu.ai.mit.edu>
[glibc.git] / locale / charmap.c
blobad1075e5bc4f1d7f8d17d624da4b3442fd03378e
1 /* Copyright (C) 1995 Free Software Foundation, Inc.
3 The GNU C Library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Library General Public License as
5 published by the Free Software Foundation; either version 2 of the
6 License, or (at your option) any later version.
8 The GNU C Library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Library General Public License for more details.
13 You should have received a copy of the GNU Library General Public
14 License along with the GNU C Library; see the file COPYING.LIB. If
15 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
16 Cambridge, MA 02139, USA. */
18 #include <ctype.h>
19 #include <errno.h>
20 #include <libintl.h>
21 #include <limits.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
27 #include "localedef.h"
28 #include "hash.h"
30 /* Data structure for representing charmap database. */
31 struct charmap charmap_data;
33 /* Line number in charmap file. */
34 static unsigned int line_no;
36 /* Prototypes for local functions. */
37 static void read_prolog (FILE *infile);
38 static unsigned long read_body (FILE *infile);
41 /* Read complete table of symbolic names for character set from file. If
42 this file does not exist or is not readable a default file is tried.
43 If this also is not readable no character map is defined. */
44 void
45 charmap_read (const char *filename)
47 unsigned long max_char;
48 long path_max = pathconf (".", _PC_PATH_MAX);
49 char buf[path_max];
50 FILE *infile = NULL;
52 /* Initialize charmap data. */
53 charmap_data.codeset_name = NULL;
54 charmap_data.mb_cur_max = -1;
55 charmap_data.mb_cur_min = -1;
56 charmap_data.escape_char = '\\';
57 charmap_data.comment_char = '#';
59 if (filename != NULL)
61 strcpy (buf, filename);
62 infile = fopen (filename, "r");
63 if (infile == NULL && filename[0] != '/')
65 snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, filename);
66 infile = fopen (buf, "r");
69 if (infile == NULL)
71 if (filename != NULL)
72 error (0, errno, gettext ("input file `%s' not found"), filename);
74 snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, DEFAULT_CHARMAP);
75 infile = fopen (buf, "r");
77 if (infile == NULL)
78 error (4, errno, gettext ("input file `%s' not found"), filename);
81 charmap_data.filename = buf;
82 init_hash (&charmap_data.table, 500);
83 line_no = 0;
85 /* Read the prolog of the charmap file. */
86 read_prolog (infile);
88 /* Last works on the charmap tables global data. */
89 if (charmap_data.mb_cur_max == -1)
90 charmap_data.mb_cur_max = 1;
91 if (charmap_data.mb_cur_min == -1)
92 charmap_data.mb_cur_min = charmap_data.mb_cur_max;
94 if ((size_t) charmap_data.mb_cur_max > sizeof (long))
96 error (2, 0, gettext ("program limitation: for now only upto %Zu "
97 "bytes per character are allowed"), sizeof (long));
100 /* Now process all entries. */
101 max_char = read_body (infile);
103 /* We don't need the file anymore. */
104 fclose (infile);
107 /* Determine the optimal table size when using the simple modulo hashing
108 function. */
109 if (max_char >= 256)
111 int size;
112 /* Current best values, initialized to some never reached high value. */
113 int best_count = 10000;
114 int best_size = 10000;
115 int best_product = best_count * best_size;
117 /* Give warning. */
118 error (-1, 0, gettext ("computing character table size: this may take "
119 "a while"));
121 for (size = 256; size <= best_product; ++size)
123 /* Array with slot counters. */
124 int cnt[size];
125 /* Current character. */
126 int ch;
127 /* Maximal number of characters in any slot. */
128 int maxcnt = 0;
129 /* Product of current size and maximal count. */
130 int product = 0;
131 /* Iteration pointer through hashing table. */
132 char *ptr = NULL;
134 /* Initializes counters to zero. */
135 memset(cnt, 0, size * sizeof (int));
137 /* Iterate through whole hashing table. */
138 while (product < best_product
139 && iterate_table (&charmap_data.table, (void **) &ptr,
140 (void **) &ch))
142 /* Increment slot counter. */
143 ++cnt[ch % size];
144 /* Test for current maximum. */
145 if (cnt[ch % size] > maxcnt)
147 maxcnt = cnt[ch % size];
148 product = maxcnt * size;
152 if (product < best_product)
154 best_count = maxcnt;
155 best_size = size;
156 best_product = best_count * best_size;
160 charmap_data.hash_size = best_size;
161 charmap_data.hash_layers = best_count;
163 else
165 charmap_data.hash_size = 256;
166 charmap_data.hash_layers = 1;
171 #define SYNTAX_ERROR \
172 do { error (0, 0, gettext ("%s:%u: syntax error in charmap file"), \
173 charmap_data.filename, line_no); \
174 goto end_of_loop; } while (0)
176 /* Read the prolog of the charmap file until the line containing `CHARMAP'.
177 All possible entries are processed. */
178 static void
179 read_prolog (FILE *infile)
181 size_t bufsize = sysconf (_SC_LINE_MAX);
182 char buf[bufsize];
184 while (1)
186 char *cp = buf;
187 char len;
189 /* Read the next line. */
190 fgets (buf, bufsize, infile);
191 len = strlen (buf);
193 /* On EOF simply return. */
194 if (len == 0 || buf[len - 1] != '\n')
195 error (4, 0, gettext ("%s: unexpected end of file in charmap"),
196 charmap_data.filename);
198 /* This is the next line. */
199 ++line_no;
201 /* Comments and empty lines are ignored. */
202 if (len == 1 || buf[0] == charmap_data.comment_char)
203 continue;
205 buf[len - 1] = '\0';
207 /* Throw away leading white spaces. This is not defined in POSIX.2
208 so don't do it if conformance is requested. */
209 if (!posix_conformance)
210 while (isspace (*cp))
211 ++cp;
213 /* If `CHARMAP' is read the prolog is over. */
214 if (strncmp (cp, "CHARMAP", 7) == 0
215 && (!posix_conformance || cp[7] == '\0'))
216 return;
218 /* Now it can be only one of special symbols defining the charmap
219 parameters. All are beginning with '<'. */
220 if (*cp != '<')
221 SYNTAX_ERROR;
223 ++cp;
224 if (strncmp (cp, "code_set_name>", 14) == 0)
226 char *startp;
228 #define cp_to_arg(no,pred) \
229 cp += no; \
230 while (isspace (*cp)) \
231 ++cp; \
232 if (*cp == '\0' || !pred (*cp)) \
233 SYNTAX_ERROR;
235 cp_to_arg (14,isgraph)
237 if (charmap_data.codeset_name != NULL)
239 error (0, 0, gettext ("%s:%u: duplicate code set name "
240 "specification"),
241 charmap_data.filename, line_no);
242 free (charmap_data.codeset_name);
245 startp = cp;
246 while (*cp != '\0' && isgraph (*cp) && !isspace (*cp))
247 ++cp;
249 charmap_data.codeset_name = (char *) xmalloc (cp - startp + 1);
250 strncpy (startp, startp, cp - startp);
252 else if (strncmp (cp, "mb_cur_max>", 11) == 0)
254 int new_val;
255 cp_to_arg (11,isdigit)
257 if (charmap_data.mb_cur_max != -1)
258 error (0, 0,
259 gettext ("%s:%u: duplicate definition of mb_cur_max"),
260 charmap_data.filename, line_no);
262 new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
263 if (new_val < 1)
264 error (0, 0, gettext ("%s:%u: illegal value for mb_cur_max: %d"),
265 charmap_data.filename, line_no, new_val);
266 else
267 charmap_data.mb_cur_max = new_val;
269 else if (strncmp (cp, "mb_cur_min>", 11) == 0)
271 int new_val;
272 cp_to_arg (11,isdigit)
274 if (charmap_data.mb_cur_max != -1)
275 error (0, 0,
276 gettext ("%s:%u: duplicate definition of mb_cur_min"),
277 charmap_data.filename, line_no);
279 new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
280 if (new_val < 1)
281 error (0, 0, gettext ("%s:%u: illegal value for mb_cur_min: %d"),
282 charmap_data.filename, line_no, new_val);
283 else
284 charmap_data.mb_cur_min = new_val;
286 else if (strncmp (cp, "escape_char>", 12) == 0)
288 cp_to_arg (12, isgraph)
289 charmap_data.escape_char = *cp;
291 else if (strncmp (cp, "comment_char>", 13) == 0)
293 cp_to_arg (13, isgraph)
294 charmap_data.comment_char = *cp;
296 else
297 SYNTAX_ERROR;
298 end_of_loop:
301 #undef cp_to_arg
304 static unsigned long
305 read_body (FILE *infile)
307 unsigned long max_char = 0;
308 size_t bufsize = sysconf (_SC_LINE_MAX);
309 char buf[bufsize];
310 char name_str[bufsize / 2];
311 char code_str[bufsize / 2];
313 while (1)
315 char *cp = buf;
316 size_t len;
318 /* Read the next line. */
319 fgets (buf, bufsize, infile);
320 len = strlen (buf);
322 /* On EOF simply return. */
323 if (len == 0)
324 error (0, 0, gettext ("%s: `END CHARMAP' is missing"),
325 charmap_data.filename);
327 /* This is the next line. */
328 ++line_no;
330 if (len == bufsize - 1)
332 error (0, 0, gettext ("%s:%u: line too long; use `getconf "
333 "LINE_MAX' to get the current maximum line"
334 "length"), charmap_data.filename, line_no);
337 fgets (buf, bufsize, infile);
338 len = strlen (buf);
340 while (len == bufsize - 1);
341 continue;
344 /* Comments and empty lines are ignored. */
345 if (len == 1 || buf[0] == charmap_data.comment_char)
346 continue;
348 buf[len - 1] = '\0';
350 /* Throw away leading white spaces. This is not defined in POSIX.2
351 so don't do it if conformance is requested. */
352 if (!posix_conformance)
353 while (isspace (*cp))
354 ++cp;
356 if (*cp == '<')
358 char *end1p, *end2p, *start2p;
359 size_t cnt = 0;
360 unsigned long char_value = 0;
362 if (sscanf (cp + 1, "%s %s", name_str, code_str) != 2)
363 SYNTAX_ERROR;
365 end1p = cp = name_str;
366 while (*cp != '\0' && *cp != '>')
368 if (*cp == charmap_data.escape_char)
369 if (*++cp == '\0')
370 SYNTAX_ERROR;
371 *end1p++ = *cp++;
373 if (*cp == '\0')
374 /* No final '>'. Make error condition. */
375 end1p = name_str;
376 else
377 ++cp;
379 *end1p = '\0';
381 if (*cp == '.' && *++cp == '.' && *++cp == '.' && *++cp == '<')
383 /* This might be the alternate form. */
384 start2p = end2p = ++cp;
385 while (*cp != '\0' && *cp != '>')
387 if (*cp == charmap_data.escape_char)
388 if (*++cp == '\0')
389 SYNTAX_ERROR;
390 *end2p = *cp++;
392 if (*cp == '\0')
393 /* NO final '>'. Make error condition. */
394 end2p = start2p;
395 else
396 ++cp;
398 else
399 start2p = end2p = NULL;
402 if (end1p == name_str || (start2p != NULL && start2p != end2p)
403 || *cp != '\0'
404 || *code_str != charmap_data.escape_char)
405 SYNTAX_ERROR;
407 cp = code_str;
410 char *begin;
411 long val;
413 switch (*++cp)
415 case 'd':
416 val = strtol ((begin = cp + 1), &cp, 10);
417 break;
418 case 'x':
419 val = strtol ((begin = cp + 1), &cp, 16);
420 break;
421 default:
422 val = strtol ((begin = cp), &cp, 8);
423 break;
425 if (begin == cp)
426 SYNTAX_ERROR;
428 if (posix_conformance && cp - begin < 2)
429 error (0, 0, gettext ("%s:%u: byte constant has less than "
430 "two digits"),
431 charmap_data.filename, line_no);
433 if (val < 0 || val > 255)
435 error (0, 0, gettext ("%s:%u: character encoding must be "
436 "given in 8-bit bytes"),
437 charmap_data.filename, line_no);
438 goto end_of_loop;
441 if (cnt < (size_t) charmap_data.mb_cur_max)
443 if (cnt < sizeof (long)) /* FIXME */
444 char_value = (char_value << 8) | val;
446 else
448 error (0, 0, gettext ("%s:%u: number of bytes in character "
449 "definition exceeds `mb_cur_max'"),
450 charmap_data.filename, line_no);
451 break;
453 ++cnt;
455 while (*cp == charmap_data.escape_char);
457 /* Ignore the rest of the line (comment). */
458 if (end2p == NULL)
460 if (insert_entry (&charmap_data.table, name_str,
461 end1p - name_str, (void *) char_value))
462 error (0, 0, gettext ("%s:%u: duplicate entry"),
463 charmap_data.filename, line_no);
465 max_char = MAX (max_char, char_value);
467 else
469 char *en1, *en2, *start1p;
470 long n1, n2, n;
472 start1p = name_str;
474 while (*start1p == *start2p && !isdigit (*start1p)
475 && start1p < end1p)
476 ++start1p, ++start2p;
478 n1 = strtol (start1p, &en1, 10);
479 n2 = strtol (start2p, &en2, 10);
481 if (en1 - start1p != en2 - start2p || en1 != end1p
482 || en2 != end2p)
483 SYNTAX_ERROR;
485 if (n1 > n2)
486 error (0, 0, gettext ("%s:%u: starting character is bigger "
487 "than last"),
488 charmap_data.filename, line_no);
490 n = n1;
491 while (n <= n2)
493 snprintf(start1p, en1 - start1p, "%0*d", en1 - start1p, n);
495 if (insert_entry (&charmap_data.table, name_str,
496 en1 - name_str,
497 (void *) (char_value + n - n1)))
498 error (0, 0, gettext ("%s:%u: duplicate entry"),
499 charmap_data.filename, line_no);
501 max_char = MAX (max_char, char_value + n - n1);
502 ++n;
506 else
508 if (strncmp (cp, "END CHARMAP", 11) == 0)
509 return max_char;
511 SYNTAX_ERROR;
513 end_of_loop:
516 return max_char;
520 * Local Variables:
521 * mode:c
522 * c-basic-offset:2
523 * End: