1 /* Copyright (C) 1995 Free Software Foundation, Inc.
3 The GNU C Library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Library General Public License as
5 published by the Free Software Foundation; either version 2 of the
6 License, or (at your option) any later version.
8 The GNU C Library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Library General Public License for more details.
13 You should have received a copy of the GNU Library General Public
14 License along with the GNU C Library; see the file COPYING.LIB. If
15 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
16 Cambridge, MA 02139, USA. */
27 #include "localedef.h"
30 /* Data structure for representing charmap database. */
31 struct charmap charmap_data
;
33 /* Line number in charmap file. */
34 static unsigned int line_no
;
36 /* Prototypes for local functions. */
37 static void read_prolog (FILE *infile
);
38 static unsigned long read_body (FILE *infile
);
41 /* Read complete table of symbolic names for character set from file. If
42 this file does not exist or is not readable a default file is tried.
43 If this also is not readable no character map is defined. */
45 charmap_read (const char *filename
)
47 unsigned long max_char
;
48 long path_max
= pathconf (".", _PC_PATH_MAX
);
52 /* Initialize charmap data. */
53 charmap_data
.codeset_name
= NULL
;
54 charmap_data
.mb_cur_max
= -1;
55 charmap_data
.mb_cur_min
= -1;
56 charmap_data
.escape_char
= '\\';
57 charmap_data
.comment_char
= '#';
61 strcpy (buf
, filename
);
62 infile
= fopen (filename
, "r");
63 if (infile
== NULL
&& filename
[0] != '/')
65 snprintf (buf
, path_max
, "%s/%s", CHARMAP_PATH
, filename
);
66 infile
= fopen (buf
, "r");
72 error (0, errno
, gettext ("input file `%s' not found"), filename
);
74 snprintf (buf
, path_max
, "%s/%s", CHARMAP_PATH
, DEFAULT_CHARMAP
);
75 infile
= fopen (buf
, "r");
78 error (4, errno
, gettext ("input file `%s' not found"), filename
);
81 charmap_data
.filename
= buf
;
82 init_hash (&charmap_data
.table
, 500);
85 /* Read the prolog of the charmap file. */
88 /* Last works on the charmap tables global data. */
89 if (charmap_data
.mb_cur_max
== -1)
90 charmap_data
.mb_cur_max
= 1;
91 if (charmap_data
.mb_cur_min
== -1)
92 charmap_data
.mb_cur_min
= charmap_data
.mb_cur_max
;
94 if ((size_t) charmap_data
.mb_cur_max
> sizeof (long))
96 error (2, 0, gettext ("program limitation: for now only upto %Zu "
97 "bytes per character are allowed"), sizeof (long));
100 /* Now process all entries. */
101 max_char
= read_body (infile
);
103 /* We don't need the file anymore. */
107 /* Determine the optimal table size when using the simple modulo hashing
112 /* Current best values, initialized to some never reached high value. */
113 int best_count
= 10000;
114 int best_size
= 10000;
115 int best_product
= best_count
* best_size
;
118 error (-1, 0, gettext ("computing character table size: this may take "
121 for (size
= 256; size
<= best_product
; ++size
)
123 /* Array with slot counters. */
125 /* Current character. */
127 /* Maximal number of characters in any slot. */
129 /* Product of current size and maximal count. */
131 /* Iteration pointer through hashing table. */
134 /* Initializes counters to zero. */
135 memset(cnt
, 0, size
* sizeof (int));
137 /* Iterate through whole hashing table. */
138 while (product
< best_product
139 && iterate_table (&charmap_data
.table
, (void **) &ptr
,
142 /* Increment slot counter. */
144 /* Test for current maximum. */
145 if (cnt
[ch
% size
] > maxcnt
)
147 maxcnt
= cnt
[ch
% size
];
148 product
= maxcnt
* size
;
152 if (product
< best_product
)
156 best_product
= best_count
* best_size
;
160 charmap_data
.hash_size
= best_size
;
161 charmap_data
.hash_layers
= best_count
;
165 charmap_data
.hash_size
= 256;
166 charmap_data
.hash_layers
= 1;
171 #define SYNTAX_ERROR \
172 do { error (0, 0, gettext ("%s:%u: syntax error in charmap file"), \
173 charmap_data.filename, line_no); \
174 goto end_of_loop; } while (0)
176 /* Read the prolog of the charmap file until the line containing `CHARMAP'.
177 All possible entries are processed. */
179 read_prolog (FILE *infile
)
181 size_t bufsize
= sysconf (_SC_LINE_MAX
);
189 /* Read the next line. */
190 fgets (buf
, bufsize
, infile
);
193 /* On EOF simply return. */
194 if (len
== 0 || buf
[len
- 1] != '\n')
195 error (4, 0, gettext ("%s: unexpected end of file in charmap"),
196 charmap_data
.filename
);
198 /* This is the next line. */
201 /* Comments and empty lines are ignored. */
202 if (len
== 1 || buf
[0] == charmap_data
.comment_char
)
207 /* Throw away leading white spaces. This is not defined in POSIX.2
208 so don't do it if conformance is requested. */
209 if (!posix_conformance
)
210 while (isspace (*cp
))
213 /* If `CHARMAP' is read the prolog is over. */
214 if (strncmp (cp
, "CHARMAP", 7) == 0
215 && (!posix_conformance
|| cp
[7] == '\0'))
218 /* Now it can be only one of special symbols defining the charmap
219 parameters. All are beginning with '<'. */
224 if (strncmp (cp
, "code_set_name>", 14) == 0)
228 #define cp_to_arg(no,pred) \
230 while (isspace (*cp)) \
232 if (*cp == '\0' || !pred (*cp)) \
235 cp_to_arg (14,isgraph
)
237 if (charmap_data
.codeset_name
!= NULL
)
239 error (0, 0, gettext ("%s:%u: duplicate code set name "
241 charmap_data
.filename
, line_no
);
242 free (charmap_data
.codeset_name
);
246 while (*cp
!= '\0' && isgraph (*cp
) && !isspace (*cp
))
249 charmap_data
.codeset_name
= (char *) xmalloc (cp
- startp
+ 1);
250 strncpy (startp
, startp
, cp
- startp
);
252 else if (strncmp (cp
, "mb_cur_max>", 11) == 0)
255 cp_to_arg (11,isdigit
)
257 if (charmap_data
.mb_cur_max
!= -1)
259 gettext ("%s:%u: duplicate definition of mb_cur_max"),
260 charmap_data
.filename
, line_no
);
262 new_val
= (int) strtol (cp
, &cp
, posix_conformance
? 10 : 0);
264 error (0, 0, gettext ("%s:%u: illegal value for mb_cur_max: %d"),
265 charmap_data
.filename
, line_no
, new_val
);
267 charmap_data
.mb_cur_max
= new_val
;
269 else if (strncmp (cp
, "mb_cur_min>", 11) == 0)
272 cp_to_arg (11,isdigit
)
274 if (charmap_data
.mb_cur_max
!= -1)
276 gettext ("%s:%u: duplicate definition of mb_cur_min"),
277 charmap_data
.filename
, line_no
);
279 new_val
= (int) strtol (cp
, &cp
, posix_conformance
? 10 : 0);
281 error (0, 0, gettext ("%s:%u: illegal value for mb_cur_min: %d"),
282 charmap_data
.filename
, line_no
, new_val
);
284 charmap_data
.mb_cur_min
= new_val
;
286 else if (strncmp (cp
, "escape_char>", 12) == 0)
288 cp_to_arg (12, isgraph
)
289 charmap_data
.escape_char
= *cp
;
291 else if (strncmp (cp
, "comment_char>", 13) == 0)
293 cp_to_arg (13, isgraph
)
294 charmap_data
.comment_char
= *cp
;
305 read_body (FILE *infile
)
307 unsigned long max_char
= 0;
308 size_t bufsize
= sysconf (_SC_LINE_MAX
);
310 char name_str
[bufsize
/ 2];
311 char code_str
[bufsize
/ 2];
318 /* Read the next line. */
319 fgets (buf
, bufsize
, infile
);
322 /* On EOF simply return. */
324 error (0, 0, gettext ("%s: `END CHARMAP' is missing"),
325 charmap_data
.filename
);
327 /* This is the next line. */
330 if (len
== bufsize
- 1)
332 error (0, 0, gettext ("%s:%u: line too long; use `getconf "
333 "LINE_MAX' to get the current maximum line"
334 "length"), charmap_data
.filename
, line_no
);
337 fgets (buf
, bufsize
, infile
);
340 while (len
== bufsize
- 1);
344 /* Comments and empty lines are ignored. */
345 if (len
== 1 || buf
[0] == charmap_data
.comment_char
)
350 /* Throw away leading white spaces. This is not defined in POSIX.2
351 so don't do it if conformance is requested. */
352 if (!posix_conformance
)
353 while (isspace (*cp
))
358 char *end1p
, *end2p
, *start2p
;
360 unsigned long char_value
= 0;
362 if (sscanf (cp
+ 1, "%s %s", name_str
, code_str
) != 2)
365 end1p
= cp
= name_str
;
366 while (*cp
!= '\0' && *cp
!= '>')
368 if (*cp
== charmap_data
.escape_char
)
374 /* No final '>'. Make error condition. */
381 if (*cp
== '.' && *++cp
== '.' && *++cp
== '.' && *++cp
== '<')
383 /* This might be the alternate form. */
384 start2p
= end2p
= ++cp
;
385 while (*cp
!= '\0' && *cp
!= '>')
387 if (*cp
== charmap_data
.escape_char
)
393 /* NO final '>'. Make error condition. */
399 start2p
= end2p
= NULL
;
402 if (end1p
== name_str
|| (start2p
!= NULL
&& start2p
!= end2p
)
404 || *code_str
!= charmap_data
.escape_char
)
416 val
= strtol ((begin
= cp
+ 1), &cp
, 10);
419 val
= strtol ((begin
= cp
+ 1), &cp
, 16);
422 val
= strtol ((begin
= cp
), &cp
, 8);
428 if (posix_conformance
&& cp
- begin
< 2)
429 error (0, 0, gettext ("%s:%u: byte constant has less than "
431 charmap_data
.filename
, line_no
);
433 if (val
< 0 || val
> 255)
435 error (0, 0, gettext ("%s:%u: character encoding must be "
436 "given in 8-bit bytes"),
437 charmap_data
.filename
, line_no
);
441 if (cnt
< (size_t) charmap_data
.mb_cur_max
)
443 if (cnt
< sizeof (long)) /* FIXME */
444 char_value
= (char_value
<< 8) | val
;
448 error (0, 0, gettext ("%s:%u: number of bytes in character "
449 "definition exceeds `mb_cur_max'"),
450 charmap_data
.filename
, line_no
);
455 while (*cp
== charmap_data
.escape_char
);
457 /* Ignore the rest of the line (comment). */
460 if (insert_entry (&charmap_data
.table
, name_str
,
461 end1p
- name_str
, (void *) char_value
))
462 error (0, 0, gettext ("%s:%u: duplicate entry"),
463 charmap_data
.filename
, line_no
);
465 max_char
= MAX (max_char
, char_value
);
469 char *en1
, *en2
, *start1p
;
474 while (*start1p
== *start2p
&& !isdigit (*start1p
)
476 ++start1p
, ++start2p
;
478 n1
= strtol (start1p
, &en1
, 10);
479 n2
= strtol (start2p
, &en2
, 10);
481 if (en1
- start1p
!= en2
- start2p
|| en1
!= end1p
486 error (0, 0, gettext ("%s:%u: starting character is bigger "
488 charmap_data
.filename
, line_no
);
493 snprintf(start1p
, en1
- start1p
, "%0*d", en1
- start1p
, n
);
495 if (insert_entry (&charmap_data
.table
, name_str
,
497 (void *) (char_value
+ n
- n1
)))
498 error (0, 0, gettext ("%s:%u: duplicate entry"),
499 charmap_data
.filename
, line_no
);
501 max_char
= MAX (max_char
, char_value
+ n
- n1
);
508 if (strncmp (cp
, "END CHARMAP", 11) == 0)