Update.
[glibc.git] / iconv / gconv_conf.c
blob24ec14aea8ed6ec2dd21623c52eb25c66978d33b
1 /* Handle configuration data.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <search.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <sys/param.h>
31 #include <gconv_int.h>
34 /* This is the default path where we look for module lists. */
35 static const char default_gconv_path[] = GCONV_PATH;
37 /* Name of the file containing the module information in the directories
38 along the path. */
39 static const char gconv_conf_filename[] = "gconv-modules";
41 /* Filename extension for the modules. */
42 #ifndef MODULE_EXT
43 # define MODULE_EXT ".so"
44 #endif
45 static const char gconv_module_ext[] = MODULE_EXT;
47 /* We have a few builtin transformations. */
48 static struct gconv_module builtin_modules[] =
50 #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \
51 Fct, Init, End, MinF, MaxF, MinT, MaxT) \
52 { \
53 from_pattern: From, \
54 from_constpfx: ConstPfx, \
55 from_constpfx_len: ConstLen, \
56 from_regex: NULL, \
57 to_string: To, \
58 cost_hi: Cost, \
59 cost_lo: INT_MAX, \
60 module_name: Name \
62 #define BUILTIN_ALIAS(From, To)
64 #include "gconv_builtin.h"
67 #undef BUILTIN_TRANSFORMATION
68 #undef BUILTIN_ALIAS
70 static const char *
71 builtin_aliases[] =
73 #define BUILTIN_TRANSFORMATION(From, ConstPfx, ConstLen, To, Cost, Name, \
74 Fct, Init, End, MinF, MaxF, MinT, MaxT)
75 #define BUILTIN_ALIAS(From, To) From " " To,
77 #include "gconv_builtin.h"
80 #ifdef USE_IN_LIBIO
81 # include <libio/libioP.h>
82 # define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp)
83 #endif
86 /* Test whether there is already a matching module known. */
87 static int
88 internal_function
89 detect_conflict (const char *alias, size_t alias_len)
91 struct gconv_module *node = __gconv_modules_db;
93 while (node != NULL)
95 int cmpres = strncmp (alias, node->from_constpfx,
96 MIN (alias_len, node->from_constpfx_len));
98 if (cmpres == 0)
100 struct gconv_module *runp;
102 if (alias_len < node->from_constpfx_len)
103 /* Cannot possibly match. */
104 return 0;
106 /* This means the prefix and the alias are identical. If
107 there is now a simple extry or a regular expression
108 matching this name we have found a conflict. If there is
109 no conflict with the elements in the `same' list there
110 cannot be a conflict. */
111 runp = node;
114 if (runp->from_pattern == NULL)
116 /* This is a simple entry and therefore we have a
117 conflict if the strings are really the same. */
118 if (alias_len == node->from_constpfx_len)
119 return 1;
121 else
123 /* Compile the regular expression if necessary. */
124 if (runp->from_regex == NULL)
126 if (__regcomp (&runp->from_regex_mem,
127 runp->from_pattern,
128 REG_EXTENDED | REG_ICASE) != 0)
129 /* Something is wrong. Remember this. */
130 runp->from_regex = (regex_t *) -1L;
131 else
132 runp->from_regex = &runp->from_regex_mem;
135 if (runp->from_regex != (regex_t *) -1L)
137 regmatch_t match[1];
139 /* Try to match the regular expression. */
140 if (__regexec (runp->from_regex, alias, 1, match, 0) == 0
141 && match[0].rm_so == 0
142 && alias[match[0].rm_eo] == '\0')
143 /* They match, therefore it is a conflict. */
144 return 1;
148 runp = runp->same;
150 while (runp != NULL);
152 if (alias_len == node->from_constpfx_len)
153 return 0;
155 node = node->matching;
157 else if (cmpres < 0)
158 node = node->left;
159 else
160 node = node->right;
163 return node != NULL;
167 /* Add new alias. */
168 static inline void
169 add_alias (char *rp, void *modules)
171 /* We now expect two more string. The strings are normalized
172 (converted to UPPER case) and strored in the alias database. */
173 struct gconv_alias *new_alias;
174 char *from, *to, *wp;
176 while (isspace (*rp))
177 ++rp;
178 from = wp = rp;
179 while (*rp != '\0' && !isspace (*rp))
180 ++rp;
181 if (*rp == '\0')
182 /* There is no `to' string on the line. Ignore it. */
183 return;
184 *rp++ = '\0';
185 to = wp = rp;
186 while (isspace (*rp))
187 ++rp;
188 while (*rp != '\0' && !isspace (*rp))
189 *wp++ = *rp++;
190 if (to == wp)
191 /* No `to' string, ignore the line. */
192 return;
193 *wp++ = '\0';
195 /* Test whether this alias conflicts with any available module. */
196 if (detect_conflict (from, to - from - 1))
197 /* It does conflict, don't add the alias. */
198 return;
200 new_alias = (struct gconv_alias *)
201 malloc (sizeof (struct gconv_alias) + (wp - from));
202 if (new_alias != NULL)
204 new_alias->fromname = memcpy ((char *) new_alias
205 + sizeof (struct gconv_alias),
206 from, wp - from);
207 new_alias->toname = new_alias->fromname + (to - from);
209 if (__tsearch (new_alias, &__gconv_alias_db, __gconv_alias_compare)
210 == NULL)
211 /* Something went wrong, free this entry. */
212 free (new_alias);
217 /* Insert a data structure for a new module in the search tree. */
218 static inline void
219 internal_function
220 insert_module (struct gconv_module *newp)
222 struct gconv_module **rootp = &__gconv_modules_db;
224 while (*rootp != NULL)
226 struct gconv_module *root = *rootp;
227 size_t minlen = MIN (newp->from_constpfx_len, root->from_constpfx_len);
228 int cmpres;
230 cmpres = strncmp (newp->from_constpfx, root->from_constpfx, minlen);
231 if (cmpres == 0)
233 /* This can mean two things: the prefix is entirely the same or
234 it matches only for the minimum length of both strings. */
235 if (newp->from_constpfx_len == root->from_constpfx_len)
237 /* Both prefixes are identical. Insert the string at the
238 end of the `same' list if it is not already there. */
239 const char *from_pattern = (newp->from_pattern
240 ?: newp->from_constpfx);
242 while (strcmp (from_pattern,
243 root->from_pattern ?: root->from_constpfx) != 0
244 || strcmp (newp->to_string, root->to_string) != 0)
246 rootp = &root->same;
247 root = *rootp;
248 if (root == NULL)
249 break;
252 if (root != NULL)
253 /* This is a no new conversion. */
254 return;
256 break;
259 /* The new element either has a prefix which is itself a
260 prefix for the prefix of the current node or vice verse.
261 In the first case we insert the node right here. Otherwise
262 we have to descent further. */
263 if (newp->from_constpfx_len < root->from_constpfx_len)
265 newp->matching = root;
266 break;
269 rootp = &root->matching;
271 else if (cmpres < 0)
272 rootp = &root->left;
273 else
274 rootp = &root->right;
277 /* Plug in the new node here. */
278 *rootp = newp;
282 /* Add new module. */
283 static inline void
284 internal_function
285 add_module (char *rp, const char *directory, size_t dir_len, void **modules,
286 size_t *nmodules, int modcounter)
288 /* We expect now
289 1. `from' name
290 2. `to' name
291 3. filename of the module
292 4. an optional cost value
294 struct gconv_module *new_module;
295 char *from, *to, *module, *wp;
296 size_t const_len;
297 int from_is_regex;
298 int need_ext;
299 int cost_hi;
301 while (isspace (*rp))
302 ++rp;
303 from = rp;
304 from_is_regex = 0;
305 while (*rp != '\0' && !isspace (*rp))
307 if (!isalnum (*rp) && *rp != '-' && *rp != '/' && *rp != '.'
308 && *rp != '_' && *rp != '(' && *rp != ')')
309 from_is_regex = 1;
310 ++rp;
312 if (*rp == '\0')
313 return;
314 *rp++ = '\0';
315 to = wp = rp;
316 while (isspace (*rp))
317 ++rp;
318 while (*rp != '\0' && !isspace (*rp))
319 *wp++ = *rp++;
320 if (*rp == '\0')
321 return;
322 *wp++ = '\0';
324 ++rp;
325 while (isspace (*rp));
326 module = wp;
327 while (*rp != '\0' && !isspace (*rp))
328 *wp++ = *rp++;
329 if (*rp == '\0')
331 /* There is no cost, use one by default. */
332 *wp++ = '\0';
333 cost_hi = 1;
335 else
337 /* There might be a cost value. */
338 char *endp;
340 *wp++ = '\0';
341 cost_hi = strtol (rp, &endp, 10);
342 if (rp == endp || cost_hi < 1)
343 /* No useful information. */
344 cost_hi = 1;
347 if (module[0] == '\0')
348 /* No module name given. */
349 return;
350 if (module[0] == '/')
351 dir_len = 0;
352 else
353 /* Increment by one for the slash. */
354 ++dir_len;
356 /* See whether we must add the ending. */
357 need_ext = 0;
358 if (wp - module < sizeof (gconv_module_ext)
359 || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext,
360 sizeof (gconv_module_ext)) != 0)
361 /* We must add the module extension. */
362 need_ext = sizeof (gconv_module_ext) - 1;
364 /* We've collected all the information, now create an entry. */
366 if (from_is_regex)
368 const_len = 0;
369 while (isalnum (from[const_len]) || from[const_len] == '-'
370 || from[const_len] == '/' || from[const_len] == '.'
371 || from[const_len] == '_')
372 ++const_len;
374 else
375 const_len = to - from - 1;
377 new_module = (struct gconv_module *) calloc (1,
378 sizeof (struct gconv_module)
379 + (wp - from)
380 + dir_len + need_ext);
381 if (new_module != NULL)
383 char *tmp;
385 new_module->from_constpfx = memcpy ((char *) new_module
386 + sizeof (struct gconv_module),
387 from, to - from);
388 if (from_is_regex)
389 new_module->from_pattern = new_module->from_constpfx;
391 new_module->from_constpfx_len = const_len;
393 new_module->to_string = memcpy ((char *) new_module->from_constpfx
394 + (to - from), to, module - to);
396 new_module->cost_hi = cost_hi;
397 new_module->cost_lo = modcounter;
399 new_module->module_name = (char *) new_module->to_string + (module - to);
401 if (dir_len == 0)
402 tmp = (char *) new_module->module_name;
403 else
405 tmp = __mempcpy ((char *) new_module->module_name,
406 directory, dir_len - 1);
407 *tmp++ = '/';
410 tmp = __mempcpy (tmp, module, wp - module);
412 if (need_ext)
413 memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext));
415 /* See whether we have already an alias with this name defined.
416 We do allow regular expressions matching this any alias since
417 this expression can also match other names and we test for aliases
418 before testing for modules. */
419 if (! from_is_regex)
421 struct gconv_alias fake_alias;
423 fake_alias.fromname = new_module->from_constpfx;
425 if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
426 != NULL)
428 /* This module duplicates an alias. */
429 free (new_module);
430 return;
434 /* Now insert the new module data structure in our search tree. */
435 insert_module (new_module);
440 /* Read the next configuration file. */
441 static void
442 internal_function
443 read_conf_file (const char *filename, const char *directory, size_t dir_len,
444 void **modules, size_t *nmodules)
446 FILE *fp = fopen (filename, "r");
447 char *line = NULL;
448 size_t line_len = 0;
449 int modcounter = 0;
451 /* Don't complain if a file is not present or readable, simply silently
452 ignore it. */
453 if (fp == NULL)
454 return;
456 /* Process the known entries of the file. Comments start with `#' and
457 end with the end of the line. Empty lines are ignored. */
458 while (!feof_unlocked (fp))
460 char *rp, *endp, *word;
461 ssize_t n = __getdelim (&line, &line_len, '\n', fp);
462 if (n < 0)
463 /* An error occurred. */
464 break;
466 rp = line;
467 /* Terminate the line (excluding comments or newline) by an NUL byte
468 to simplify the following code. */
469 endp = strchr (rp, '#');
470 if (endp != NULL)
471 *endp = '\0';
472 else
473 if (rp[n - 1] == '\n')
474 rp[n - 1] = '\0';
476 while (isspace (*rp))
477 ++rp;
479 /* If this is an empty line go on with the next one. */
480 if (rp == endp)
481 continue;
483 word = rp;
484 while (*rp != '\0' && !isspace (*rp))
485 ++rp;
487 if (rp - word == sizeof ("alias") - 1
488 && memcmp (word, "alias", sizeof ("alias") - 1) == 0)
489 add_alias (rp, *modules);
490 else if (rp - word == sizeof ("module") - 1
491 && memcmp (word, "module", sizeof ("module") - 1) == 0)
492 add_module (rp, directory, dir_len, modules, nmodules, modcounter++);
493 /* else */
494 /* Otherwise ignore the line. */
497 if (line != NULL)
498 free (line);
499 fclose (fp);
503 /* Read all configuration files found in the user-specified and the default
504 path. */
505 void
506 __gconv_read_conf (void)
508 const char *user_path = __secure_getenv ("GCONV_PATH");
509 char *gconv_path, *elem;
510 void *modules = NULL;
511 size_t nmodules = 0;
512 int save_errno = errno;
513 size_t cnt;
515 if (user_path == NULL)
516 /* No user-defined path. Make a modifiable copy of the default path. */
517 gconv_path = strdupa (default_gconv_path);
518 else
520 /* Append the default path to the user-defined path. */
521 size_t user_len = strlen (user_path);
523 gconv_path = alloca (user_len + 1 + sizeof (default_gconv_path));
524 __mempcpy (__mempcpy (__mempcpy (gconv_path, user_path, user_len),
525 ":", 1),
526 default_gconv_path, sizeof (default_gconv_path));
529 elem = __strtok_r (gconv_path, ":", &gconv_path);
530 while (elem != NULL)
532 #ifndef MAXPATHLEN
533 /* We define a reasonable limit. */
534 # define MAXPATHLEN 4096
535 #endif
536 char real_elem[MAXPATHLEN];
538 if (__realpath (elem, real_elem) != NULL)
540 size_t elem_len = strlen (real_elem);
541 char *filename;
543 filename = alloca (elem_len + 1 + sizeof (gconv_conf_filename));
544 __mempcpy (__mempcpy (__mempcpy (filename, real_elem, elem_len),
545 "/", 1),
546 gconv_conf_filename, sizeof (gconv_conf_filename));
548 /* Read the next configuration file. */
549 read_conf_file (filename, real_elem, elem_len, &modules, &nmodules);
552 /* Get next element in the path. */
553 elem = __strtok_r (NULL, ":", &gconv_path);
556 /* Add the internal modules. */
557 for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
558 ++cnt)
560 if (builtin_modules[cnt].from_pattern == NULL)
562 struct gconv_alias fake_alias;
564 fake_alias.fromname = builtin_modules[cnt].from_constpfx;
566 if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
567 != NULL)
568 /* It'll conflict so don't add it. */
569 continue;
572 insert_module (&builtin_modules[cnt]);
575 /* Add aliases for builtin conversions. */
576 cnt = sizeof (builtin_aliases) / sizeof (builtin_aliases[0]);
577 while (cnt > 0)
579 char *copy = strdupa (builtin_aliases[--cnt]);
580 add_alias (copy, modules);
583 /* Restore the error number. */
584 __set_errno (save_errno);