Update.
[glibc.git] / iconv / gconv_db.c
blob2c66249cc5a08b8248043decd65504736a3bdf77
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <search.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <bits/libc-lock.h>
25 #include <elf/ldsodefs.h>
27 #include <gconv_int.h>
30 /* Simple data structure for alias mapping. We have two names, `from'
31 and `to'. */
32 void *__gconv_alias_db;
34 /* Array with available modules. */
35 size_t __gconv_nmodules;
36 struct gconv_module **__gconv_modules_db;
38 /* We modify global data. */
39 __libc_lock_define_initialized (static, lock)
42 /* Function for searching alias. */
43 int
44 __gconv_alias_compare (const void *p1, const void *p2)
46 struct gconv_alias *s1 = (struct gconv_alias *) p1;
47 struct gconv_alias *s2 = (struct gconv_alias *) p2;
48 return __strcasecmp (s1->fromname, s2->fromname);
52 /* To search for a derivation we create a list of intermediate steps.
53 Each element contains a pointer to the element which precedes it
54 in the derivation order. */
55 struct derivation_step
57 const char *result_set;
58 struct gconv_module *code;
59 struct derivation_step *last;
60 struct derivation_step *next;
63 #define NEW_STEP(result, module, last_mod) \
64 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
65 newp->result_set = result; \
66 newp->code = module; \
67 newp->last = last_mod; \
68 newp->next = NULL; \
69 newp; })
72 /* If a specific transformation is used more than once we should not need
73 to start looking for it again. Instead cache each successful result. */
74 struct known_derivation
76 const char *from;
77 const char *to;
78 struct gconv_step *steps;
79 size_t nsteps;
82 /* Compare function for database of found derivations. */
83 static int
84 derivation_compare (const void *p1, const void *p2)
86 struct known_derivation *s1 = (struct known_derivation *) p1;
87 struct known_derivation *s2 = (struct known_derivation *) p2;
88 int result;
90 result = strcmp (s1->from, s2->from);
91 if (result == 0)
92 result = strcmp (s1->to, s2->to);
93 return result;
96 /* The search tree for known derivations. */
97 static void *known_derivations;
99 /* Look up whether given transformation was already requested before. */
100 static int
101 internal_function
102 derivation_lookup (const char *fromset, const char *toset,
103 struct gconv_step **handle, size_t *nsteps)
105 struct known_derivation key = { fromset, toset, NULL, 0 };
106 struct known_derivation **result;
108 result = __tfind (&key, &known_derivations, derivation_compare);
110 if (result == NULL)
111 return GCONV_NOCONV;
113 *handle = (*result)->steps;
114 *nsteps = (*result)->nsteps;
116 /* Please note that we return GCONV_OK even if the last search for
117 this transformation was unsuccessful. */
118 return GCONV_OK;
121 /* Add new derivation to list of known ones. */
122 static void
123 internal_function
124 add_derivation (const char *fromset, const char *toset,
125 struct gconv_step *handle, size_t nsteps)
127 struct known_derivation *new_deriv;
128 size_t fromset_len = strlen (fromset) + 1;
129 size_t toset_len = strlen (toset) + 1;
131 new_deriv = (struct known_derivation *)
132 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
133 if (new_deriv != NULL)
135 new_deriv->from = memcpy (new_deriv + 1, fromset, fromset_len);
136 new_deriv->to = memcpy ((char *) new_deriv->from + fromset_len,
137 toset, toset_len);
139 new_deriv->steps = handle;
140 new_deriv->nsteps = nsteps;
142 __tsearch (new_deriv, &known_derivations, derivation_compare);
144 /* Please note that we don't complain if the allocation failed. This
145 is not tragically but in case we use the memory debugging facilities
146 not all memory will be freed. */
149 static void
150 internal_function
151 free_derivation (void *p)
153 struct known_derivation *deriv = (struct known_derivation *) p;
154 size_t cnt;
156 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
157 if (deriv->steps[cnt].end_fct)
158 _CALL_DL_FCT (deriv->steps[cnt].end_fct, (&deriv->steps[cnt]));
160 free ((struct gconv_step *) deriv->steps);
161 free (deriv);
165 static int
166 internal_function
167 gen_steps (struct derivation_step *best, const char *toset,
168 const char *fromset, struct gconv_step **handle, size_t *nsteps)
170 size_t step_cnt = 0;
171 struct gconv_step *result;
172 struct derivation_step *current;
173 int status = GCONV_NOMEM;
175 /* First determine number of steps. */
176 for (current = best; current->last != NULL; current = current->last)
177 ++step_cnt;
179 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
180 * step_cnt);
181 if (result != NULL)
183 int failed = 0;
185 *nsteps = step_cnt;
186 current = best;
187 while (step_cnt-- > 0)
189 result[step_cnt].from_name = (step_cnt == 0
190 ? __strdup (fromset)
191 : current->last->result_set);
192 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
193 ? __strdup (current->result_set)
194 : result[step_cnt + 1].from_name);
196 #ifndef STATIC_GCONV
197 if (current->code->module_name[0] == '/')
199 /* Load the module, return handle for it. */
200 struct gconv_loaded_object *shlib_handle =
201 __gconv_find_shlib (current->code->module_name);
203 if (shlib_handle == NULL)
205 failed = 1;
206 break;
209 result[step_cnt].shlib_handle = shlib_handle;
210 result[step_cnt].modname = shlib_handle->name;
211 result[step_cnt].counter = 0;
212 result[step_cnt].fct = shlib_handle->fct;
213 result[step_cnt].init_fct = shlib_handle->init_fct;
214 result[step_cnt].end_fct = shlib_handle->end_fct;
216 else
217 #endif
218 /* It's a builtin transformation. */
219 __gconv_get_builtin_trans (current->code->module_name,
220 &result[step_cnt]);
222 /* Call the init function. */
223 if (result[step_cnt].init_fct != NULL)
224 _CALL_DL_FCT (result[step_cnt].init_fct, (&result[step_cnt]));
226 current = current->last;
229 if (failed != 0)
231 /* Something went wrong while initializing the modules. */
232 while (++step_cnt < *nsteps)
234 if (result[step_cnt].end_fct != NULL)
235 _CALL_DL_FCT (result[step_cnt].end_fct, (&result[step_cnt]));
236 #ifndef STATIC_GCONV
237 __gconv_release_shlib (result[step_cnt].shlib_handle);
238 #endif
240 free (result);
241 *nsteps = 0;
242 status = GCONV_NOCONV;
244 else
246 *handle = result;
247 status = GCONV_OK;
251 return status;
255 /* The main function: find a possible derivation from the `fromset' (either
256 the given name or the alias) to the `toset' (again with alias). */
257 static int
258 internal_function
259 find_derivation (const char *toset, const char *toset_expand,
260 const char *fromset, const char *fromset_expand,
261 struct gconv_step **handle, size_t *nsteps)
263 __libc_lock_define_initialized (static, lock)
264 struct derivation_step *first, *current, **lastp, *best = NULL;
265 int best_cost_hi = 0;
266 int best_cost_lo = 0;
267 int result;
269 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
270 handle, nsteps);
271 if (result == GCONV_OK)
272 return result;
274 __libc_lock_lock (lock);
276 /* There is a small chance that this derivation is meanwhile found. This
277 can happen if in `find_derivation' we look for this derivation, didn't
278 find it but at the same time another thread looked for this derivation. */
279 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
280 handle, nsteps);
281 if (result == GCONV_OK)
282 return result;
284 /* ### TODO
285 For now we use a simple algorithm with quadratic runtime behaviour.
286 The task is to match the `toset' with any of the available rules,
287 starting from FROMSET. */
288 if (fromset_expand != NULL)
290 first = NEW_STEP (fromset_expand, NULL, NULL);
291 first->next = NEW_STEP (fromset, NULL, NULL);
292 lastp = &first->next->next;
294 else
296 first = NEW_STEP (fromset, NULL, NULL);
297 lastp = &first->next;
300 current = first;
301 while (current != NULL)
303 /* Now match all the available module specifications against the
304 current charset name. If any of them matches check whether
305 we already have a derivation for this charset. If yes, use the
306 one with the lower costs. Otherwise add the new charset at the
307 end. */
308 size_t cnt;
310 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
312 const char *result_set = NULL;
314 if (__gconv_modules_db[cnt]->from_pattern == NULL)
316 if (__strcasecmp (current->result_set,
317 __gconv_modules_db[cnt]->from_constpfx) == 0)
319 if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0)
320 result_set = toset_expand ?: toset;
321 else
322 result_set = __gconv_modules_db[cnt]->to_string;
325 else
326 /* We have a regular expression. First see if the prefix
327 matches. */
328 if (__strncasecmp (current->result_set,
329 __gconv_modules_db[cnt]->from_constpfx,
330 __gconv_modules_db[cnt]->from_constpfx_len)
331 == 0)
333 /* First compile the regex if not already done. */
334 if (__gconv_modules_db[cnt]->from_regex == NULL)
336 regex_t *newp = (regex_t *) malloc (sizeof (regex_t));
338 if (__regcomp (newp, __gconv_modules_db[cnt]->from_pattern,
339 REG_EXTENDED | REG_ICASE) != 0)
341 /* Something is wrong. Remember this. */
342 free (newp);
343 __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L;
345 else
346 __gconv_modules_db[cnt]->from_regex = newp;
349 if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L)
351 /* Try to match the from name. */
352 regmatch_t match[4];
354 if (__regexec (__gconv_modules_db[cnt]->from_regex,
355 current->result_set, 4, match, 0) == 0
356 && match[0].rm_so == 0
357 && current->result_set[match[0].rm_eo] == '\0')
359 /* At least the whole <from> string is matched.
360 We must now match sed-like possible
361 subexpressions from the match to the
362 toset expression. */
363 #define ENSURE_LEN(LEN) \
364 if (wp + (LEN) >= constr + len - 1) \
366 char *newp = alloca (len += 128); \
367 memcpy (newp, constr, wp - constr); \
368 wp = newp + (wp - constr); \
369 constr = newp; \
371 size_t len = 128;
372 char *constr = alloca (len);
373 char *wp = constr;
374 const char *cp = __gconv_modules_db[cnt]->to_string;
376 while (*cp != '\0')
378 if (*cp != '\\')
380 ENSURE_LEN (1);
381 *wp++ = *cp++;
383 else if (cp[1] == '\0')
384 /* Backslash at end of string. */
385 break;
386 else
388 ++cp;
389 if (*cp == '\\')
391 *wp++ = *cp++;
392 ENSURE_LEN (1);
394 else if (*cp < '1' || *cp > '3')
395 break;
396 else
398 int idx = *cp - '0';
399 if (match[idx].rm_so == -1)
400 /* No match. */
401 break;
403 ENSURE_LEN (match[idx].rm_eo
404 - match[idx].rm_so);
405 wp = __mempcpy (wp,
406 &current->result_set[match[idx].rm_so],
407 match[idx].rm_eo
408 - match[idx].rm_so);
409 ++cp;
413 if (*cp == '\0' && wp != constr)
415 /* Terminate the constructed string. */
416 *wp = '\0';
417 result_set = constr;
423 if (result_set != NULL)
425 /* We managed to find a derivation. First see whether
426 this is what we are looking for. */
427 if (__strcasecmp (result_set, toset) == 0
428 || (toset_expand != NULL
429 && __strcasecmp (result_set, toset_expand) == 0))
431 /* Determine the costs. If they are lower than the
432 previous solution (or this is the first solution)
433 remember this solution. */
434 int cost_hi = __gconv_modules_db[cnt]->cost_hi;
435 int cost_lo = __gconv_modules_db[cnt]->cost_lo;
436 struct derivation_step *runp = current;
437 while (runp->code != NULL)
439 cost_hi += runp->code->cost_hi;
440 cost_lo += runp->code->cost_lo;
441 runp = runp->last;
443 if (best == NULL || cost_hi < best_cost_hi
444 || (cost_hi == best_cost_hi && cost_lo < best_cost_lo))
446 best = NEW_STEP (result_set, __gconv_modules_db[cnt],
447 current);
448 best_cost_hi = cost_hi;
449 best_cost_lo = cost_lo;
452 else
454 /* Append at the end if there is no entry with this name. */
455 struct derivation_step *runp = first;
457 while (runp != NULL)
459 if (__strcasecmp (result_set, runp->result_set) == 0)
460 break;
461 runp = runp->next;
464 if (runp == NULL)
466 *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt],
467 current);
468 lastp = &(*lastp)->next;
474 /* Go on with the next entry. */
475 current = current->next;
478 if (best != NULL)
479 /* We really found a way to do the transformation. Now build a data
480 structure describing the transformation steps.*/
481 result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset,
482 handle, nsteps);
483 else
485 /* We haven't found a transformation. Clear the result values. */
486 *handle = NULL;
487 *nsteps = 0;
490 /* Add result in any case to list of known derivations. */
491 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
492 *handle, *nsteps);
494 __libc_lock_unlock (lock);
496 return result;
501 internal_function
502 __gconv_find_transform (const char *toset, const char *fromset,
503 struct gconv_step **handle, size_t *nsteps)
505 __libc_once_define (static, once);
506 const char *fromset_expand = NULL;
507 const char *toset_expand = NULL;
508 int result;
510 /* Ensure that the configuration data is read. */
511 __libc_once (once, __gconv_read_conf);
513 /* Acquire the lock. */
514 __libc_lock_lock (lock);
516 /* If we don't have a module database return with an error. */
517 if (__gconv_modules_db == NULL)
518 return GCONV_NOCONV;
520 /* See whether the names are aliases. */
521 if (__gconv_alias_db != NULL)
523 struct gconv_alias key;
524 struct gconv_alias **found;
526 key.fromname = fromset;
527 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
528 fromset_expand = found != NULL ? (*found)->toname : NULL;
530 key.fromname = toset;
531 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
532 toset_expand = found != NULL ? (*found)->toname : NULL;
535 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
536 handle, nsteps);
538 #ifndef STATIC_GCONV
539 /* Increment the user counter. */
540 if (result == GCONV_OK)
542 size_t cnt = *nsteps;
543 struct gconv_step *steps = *handle;
546 if (steps[--cnt].counter++ == 0)
548 steps[cnt].shlib_handle =
549 __gconv_find_shlib (steps[cnt].modname);
550 if (steps[cnt].shlib_handle == NULL)
552 /* Oops, this is the second time we use this module (after
553 unloading) and this time loading failed!? */
554 while (++cnt < *nsteps)
555 __gconv_release_shlib (steps[cnt].shlib_handle);
556 result = GCONV_NOCONV;
557 break;
560 while (cnt > 0);
562 #endif
564 /* Release the lock. */
565 __libc_lock_unlock (lock);
567 /* The following code is necessary since `find_derivation' will return
568 GCONV_OK even when no derivation was found but the same request
569 was processed before. I.e., negative results will also be cached. */
570 return (result == GCONV_OK
571 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
572 : result);
576 /* Release the entries of the modules list. */
578 internal_function
579 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
581 int result = GCONV_OK;
583 #ifndef STATIC_GCONV
584 /* Acquire the lock. */
585 __libc_lock_lock (lock);
587 while (nsteps-- > 0)
588 if (steps[nsteps].shlib_handle != NULL
589 && --steps[nsteps].counter == 0)
591 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
592 if (result != GCONV_OK)
593 break;
594 steps[nsteps].shlib_handle = NULL;
597 /* Release the lock. */
598 __libc_lock_unlock (lock);
599 #endif
601 return result;
605 /* Free all resources if necessary. */
606 static void __attribute__ ((unused))
607 free_mem (void)
609 size_t cnt;
611 if (__gconv_alias_db != NULL)
612 __tdestroy (__gconv_alias_db, free);
614 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
616 if (__gconv_modules_db[cnt]->from_regex != NULL)
617 __regfree ((regex_t *) __gconv_modules_db[cnt]->from_regex);
619 /* Modules which names do not start with a slash are builtin
620 transformations and the memory is not allocated dynamically. */
621 if (__gconv_modules_db[cnt]->module_name[0] == '/')
622 free (__gconv_modules_db[cnt]);
625 if (known_derivations != NULL)
626 __tdestroy (known_derivations, free_derivation);
629 text_set_element (__libc_subfreeres, free_mem);