Update.
[glibc.git] / iconv / gconv_conf.c
blobdce913da744cc67a45d77b202321ac97dbd79b8b
1 /* Handle configuration data.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <search.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <sys/param.h>
31 #include <gconv_int.h>
34 /* This is the default path where we look for module lists. */
35 static const char default_gconv_path[] = GCONV_PATH;
37 /* Name of the file containing the module information in the directories
38 along the path. */
39 static const char gconv_conf_filename[] = "gconv-modules";
41 /* Filename extension for the modules. */
42 #ifndef MODULE_EXT
43 # define MODULE_EXT ".so"
44 #endif
45 static const char gconv_module_ext[] = MODULE_EXT;
47 /* We have a few builtin transformations. */
48 static struct gconv_module builtin_modules[] =
50 #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \
51 Fct, Init, End, MinF, MaxF, MinT, MaxT) \
52 { \
53 from_pattern: From, \
54 from_constpfx: ConstPfx, \
55 from_constpfx_len: ConstLen, \
56 from_regex: NULL, \
57 to_string: To, \
58 cost_hi: Cost, \
59 cost_lo: INT_MAX, \
60 module_name: Name \
62 #define BUILTIN_ALIAS(From, To)
64 #include "gconv_builtin.h"
67 #undef BUILTIN_TRANSFORMATION
68 #undef BUILTIN_ALIAS
70 static const char *builtin_aliases[] =
72 #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \
73 Fct, Init, End, MinF, MaxF, MinT, MaxT)
74 #define BUILTIN_ALIAS(From, To) From " " To,
76 #include "gconv_builtin.h"
79 #ifdef USE_IN_LIBIO
80 # include <libio/libioP.h>
81 # define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp)
82 #endif
85 /* Test whether there is already a matching module known. */
86 static int
87 internal_function
88 detect_conflict (const char *alias, size_t alias_len)
90 struct gconv_module *node = __gconv_modules_db;
92 while (node != NULL)
94 int cmpres = strncmp (alias, node->from_constpfx,
95 MIN (alias_len, node->from_constpfx_len));
97 if (cmpres == 0)
99 struct gconv_module *runp;
101 if (alias_len < node->from_constpfx_len)
102 /* Cannot possibly match. */
103 return 0;
105 /* This means the prefix and the alias are identical. If
106 there is now a simple extry or a regular expression
107 matching this name we have found a conflict. If there is
108 no conflict with the elements in the `same' list there
109 cannot be a conflict. */
110 runp = node;
113 if (runp->from_pattern == NULL)
115 /* This is a simple entry and therefore we have a
116 conflict if the strings are really the same. */
117 if (alias_len == node->from_constpfx_len)
118 return 1;
120 else
122 /* Compile the regular expression if necessary. */
123 if (runp->from_regex == NULL)
125 if (__regcomp (&runp->from_regex_mem,
126 runp->from_pattern,
127 REG_EXTENDED | REG_ICASE) != 0)
128 /* Something is wrong. Remember this. */
129 runp->from_regex = (regex_t *) -1L;
130 else
131 runp->from_regex = &runp->from_regex_mem;
134 if (runp->from_regex != (regex_t *) -1L)
136 regmatch_t match[1];
138 /* Try to match the regular expression. */
139 if (__regexec (runp->from_regex, alias, 1, match, 0) == 0
140 && match[0].rm_so == 0
141 && alias[match[0].rm_eo] == '\0')
142 /* They match, therefore it is a conflict. */
143 return 1;
147 runp = runp->same;
149 while (runp != NULL);
151 if (alias_len == node->from_constpfx_len)
152 return 0;
154 node = node->matching;
156 else if (cmpres < 0)
157 node = node->left;
158 else
159 node = node->right;
162 return node != NULL;
166 /* Add new alias. */
167 static inline void
168 add_alias (char *rp, void *modules)
170 /* We now expect two more string. The strings are normalized
171 (converted to UPPER case) and strored in the alias database. */
172 struct gconv_alias *new_alias;
173 char *from, *to, *wp;
175 while (isspace (*rp))
176 ++rp;
177 from = wp = rp;
178 while (*rp != '\0' && !isspace (*rp))
179 *wp++ = toupper (*rp++);
180 if (*rp == '\0')
181 /* There is no `to' string on the line. Ignore it. */
182 return;
183 *wp++ = '\0';
184 to = ++rp;
185 while (isspace (*rp))
186 ++rp;
187 while (*rp != '\0' && !isspace (*rp))
188 *wp++ = toupper (*rp++);
189 if (to == wp)
190 /* No `to' string, ignore the line. */
191 return;
192 *wp++ = '\0';
194 /* Test whether this alias conflicts with any available module. */
195 if (detect_conflict (from, to - from - 1))
196 /* It does conflict, don't add the alias. */
197 return;
199 new_alias = (struct gconv_alias *)
200 malloc (sizeof (struct gconv_alias) + (wp - from));
201 if (new_alias != NULL)
203 void **inserted;
205 new_alias->fromname = memcpy ((char *) new_alias
206 + sizeof (struct gconv_alias),
207 from, wp - from);
208 new_alias->toname = new_alias->fromname + (to - from);
210 inserted = (void **) __tsearch (new_alias, &__gconv_alias_db,
211 __gconv_alias_compare);
212 if (inserted == NULL || *inserted != (void **) new_alias)
213 /* Something went wrong, free this entry. */
214 free (new_alias);
219 /* Insert a data structure for a new module in the search tree. */
220 static inline void
221 internal_function
222 insert_module (struct gconv_module *newp)
224 struct gconv_module **rootp = &__gconv_modules_db;
226 while (*rootp != NULL)
228 struct gconv_module *root = *rootp;
229 size_t minlen = MIN (newp->from_constpfx_len, root->from_constpfx_len);
230 int cmpres;
232 cmpres = strncmp (newp->from_constpfx, root->from_constpfx, minlen);
233 if (cmpres == 0)
235 /* This can mean two things: the prefix is entirely the same or
236 it matches only for the minimum length of both strings. */
237 if (newp->from_constpfx_len == root->from_constpfx_len)
239 /* Both prefixes are identical. Insert the string at the
240 end of the `same' list if it is not already there. */
241 const char *from_pattern = (newp->from_pattern
242 ?: newp->from_constpfx);
244 while (strcmp (from_pattern,
245 root->from_pattern ?: root->from_constpfx) != 0
246 || strcmp (newp->to_string, root->to_string) != 0)
248 rootp = &root->same;
249 root = *rootp;
250 if (root == NULL)
251 break;
254 if (root != NULL)
255 /* This is a no new conversion. */
256 return;
258 break;
261 /* The new element either has a prefix which is itself a
262 prefix for the prefix of the current node or vice verse.
263 In the first case we insert the node right here. Otherwise
264 we have to descent further. */
265 if (newp->from_constpfx_len < root->from_constpfx_len)
267 newp->matching = root;
268 break;
271 rootp = &root->matching;
273 else if (cmpres < 0)
274 rootp = &root->left;
275 else
276 rootp = &root->right;
279 /* Plug in the new node here. */
280 *rootp = newp;
284 /* Add new module. */
285 static inline void
286 internal_function
287 add_module (char *rp, const char *directory, size_t dir_len, void **modules,
288 size_t *nmodules, int modcounter)
290 /* We expect now
291 1. `from' name
292 2. `to' name
293 3. filename of the module
294 4. an optional cost value
296 struct gconv_module *new_module;
297 char *from, *to, *module, *wp;
298 size_t const_len;
299 int from_is_regex;
300 int need_ext;
301 int cost_hi;
303 while (isspace (*rp))
304 ++rp;
305 from = rp;
306 from_is_regex = 0;
307 while (*rp != '\0' && !isspace (*rp))
309 if (!isalnum (*rp) && *rp != '-' && *rp != '/' && *rp != '.'
310 && *rp != '_' && *rp != '(' && *rp != ')')
311 from_is_regex = 1;
312 *rp = toupper (*rp);
313 ++rp;
315 if (*rp == '\0')
316 return;
317 *rp++ = '\0';
318 to = wp = rp;
319 while (isspace (*rp))
320 ++rp;
321 while (*rp != '\0' && !isspace (*rp))
322 *wp++ = toupper (*rp++);
323 if (*rp == '\0')
324 return;
325 *wp++ = '\0';
327 ++rp;
328 while (isspace (*rp));
329 module = wp;
330 while (*rp != '\0' && !isspace (*rp))
331 *wp++ = *rp++;
332 if (*rp == '\0')
334 /* There is no cost, use one by default. */
335 *wp++ = '\0';
336 cost_hi = 1;
338 else
340 /* There might be a cost value. */
341 char *endp;
343 *wp++ = '\0';
344 cost_hi = strtol (rp, &endp, 10);
345 if (rp == endp || cost_hi < 1)
346 /* No useful information. */
347 cost_hi = 1;
350 if (module[0] == '\0')
351 /* No module name given. */
352 return;
353 if (module[0] == '/')
354 dir_len = 0;
355 else
356 /* Increment by one for the slash. */
357 ++dir_len;
359 /* See whether we must add the ending. */
360 need_ext = 0;
361 if (wp - module < sizeof (gconv_module_ext)
362 || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext,
363 sizeof (gconv_module_ext)) != 0)
364 /* We must add the module extension. */
365 need_ext = sizeof (gconv_module_ext) - 1;
367 /* We've collected all the information, now create an entry. */
369 if (from_is_regex)
371 const_len = 0;
372 while (isalnum (from[const_len]) || from[const_len] == '-'
373 || from[const_len] == '/' || from[const_len] == '.'
374 || from[const_len] == '_')
375 ++const_len;
377 else
378 const_len = to - from - 1;
380 new_module = (struct gconv_module *) calloc (1,
381 sizeof (struct gconv_module)
382 + (wp - from)
383 + dir_len + need_ext);
384 if (new_module != NULL)
386 char *tmp;
388 new_module->from_constpfx = memcpy ((char *) new_module
389 + sizeof (struct gconv_module),
390 from, to - from);
391 if (from_is_regex)
392 new_module->from_pattern = new_module->from_constpfx;
394 new_module->from_constpfx_len = const_len;
396 new_module->to_string = memcpy ((char *) new_module->from_constpfx
397 + (to - from), to, module - to);
399 new_module->cost_hi = cost_hi;
400 new_module->cost_lo = modcounter;
402 new_module->module_name = (char *) new_module->to_string + (module - to);
404 if (dir_len == 0)
405 tmp = (char *) new_module->module_name;
406 else
408 tmp = __mempcpy ((char *) new_module->module_name,
409 directory, dir_len - 1);
410 *tmp++ = '/';
413 tmp = __mempcpy (tmp, module, wp - module);
415 if (need_ext)
416 memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext));
418 /* See whether we have already an alias with this name defined.
419 We do allow regular expressions matching this any alias since
420 this expression can also match other names and we test for aliases
421 before testing for modules. */
422 if (! from_is_regex)
424 struct gconv_alias fake_alias;
426 fake_alias.fromname = new_module->from_constpfx;
428 if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
429 != NULL)
431 /* This module duplicates an alias. */
432 free (new_module);
433 return;
437 /* Now insert the new module data structure in our search tree. */
438 insert_module (new_module);
443 /* Read the next configuration file. */
444 static void
445 internal_function
446 read_conf_file (const char *filename, const char *directory, size_t dir_len,
447 void **modules, size_t *nmodules)
449 FILE *fp = fopen (filename, "r");
450 char *line = NULL;
451 size_t line_len = 0;
452 int modcounter = 0;
454 /* Don't complain if a file is not present or readable, simply silently
455 ignore it. */
456 if (fp == NULL)
457 return;
459 /* Process the known entries of the file. Comments start with `#' and
460 end with the end of the line. Empty lines are ignored. */
461 while (!feof_unlocked (fp))
463 char *rp, *endp, *word;
464 ssize_t n = __getdelim (&line, &line_len, '\n', fp);
465 if (n < 0)
466 /* An error occurred. */
467 break;
469 rp = line;
470 /* Terminate the line (excluding comments or newline) by an NUL byte
471 to simplify the following code. */
472 endp = strchr (rp, '#');
473 if (endp != NULL)
474 *endp = '\0';
475 else
476 if (rp[n - 1] == '\n')
477 rp[n - 1] = '\0';
479 while (isspace (*rp))
480 ++rp;
482 /* If this is an empty line go on with the next one. */
483 if (rp == endp)
484 continue;
486 word = rp;
487 while (*rp != '\0' && !isspace (*rp))
488 ++rp;
490 if (rp - word == sizeof ("alias") - 1
491 && memcmp (word, "alias", sizeof ("alias") - 1) == 0)
492 add_alias (rp, *modules);
493 else if (rp - word == sizeof ("module") - 1
494 && memcmp (word, "module", sizeof ("module") - 1) == 0)
495 add_module (rp, directory, dir_len, modules, nmodules, modcounter++);
496 /* else */
497 /* Otherwise ignore the line. */
500 if (line != NULL)
501 free (line);
502 fclose (fp);
506 /* Read all configuration files found in the user-specified and the default
507 path. */
508 void
509 __gconv_read_conf (void)
511 const char *user_path = __secure_getenv ("GCONV_PATH");
512 char *gconv_path, *elem;
513 void *modules = NULL;
514 size_t nmodules = 0;
515 int save_errno = errno;
516 size_t cnt;
518 if (user_path == NULL)
519 /* No user-defined path. Make a modifiable copy of the default path. */
520 gconv_path = strdupa (default_gconv_path);
521 else
523 /* Append the default path to the user-defined path. */
524 size_t user_len = strlen (user_path);
526 gconv_path = alloca (user_len + 1 + sizeof (default_gconv_path));
527 __mempcpy (__mempcpy (__mempcpy (gconv_path, user_path, user_len),
528 ":", 1),
529 default_gconv_path, sizeof (default_gconv_path));
532 elem = __strtok_r (gconv_path, ":", &gconv_path);
533 while (elem != NULL)
535 #ifndef MAXPATHLEN
536 /* We define a reasonable limit. */
537 # define MAXPATHLEN 4096
538 #endif
539 char real_elem[MAXPATHLEN];
541 if (__realpath (elem, real_elem) != NULL)
543 size_t elem_len = strlen (real_elem);
544 char *filename;
546 filename = alloca (elem_len + 1 + sizeof (gconv_conf_filename));
547 __mempcpy (__mempcpy (__mempcpy (filename, real_elem, elem_len),
548 "/", 1),
549 gconv_conf_filename, sizeof (gconv_conf_filename));
551 /* Read the next configuration file. */
552 read_conf_file (filename, real_elem, elem_len, &modules, &nmodules);
555 /* Get next element in the path. */
556 elem = __strtok_r (NULL, ":", &gconv_path);
559 /* Add the internal modules. */
560 for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
561 ++cnt)
563 if (builtin_modules[cnt].from_pattern == NULL)
565 struct gconv_alias fake_alias;
567 fake_alias.fromname = builtin_modules[cnt].from_constpfx;
569 if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
570 != NULL)
571 /* It'll conflict so don't add it. */
572 continue;
575 insert_module (&builtin_modules[cnt]);
578 /* Add aliases for builtin conversions. */
579 cnt = sizeof (builtin_aliases) / sizeof (builtin_aliases[0]);
580 while (cnt > 0)
582 char *copy = strdupa (builtin_aliases[--cnt]);
583 add_alias (copy, modules);
586 /* Restore the error number. */
587 __set_errno (save_errno);