Update.
[glibc.git] / iconv / gconv_db.c
blob25b06d07e4ad9afd89af8859d2c8a1a5f4a52573
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997,98,99,2000,2001,2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
28 #include <dlfcn.h>
29 #include <gconv_int.h>
30 #include <gconv_charset.h>
33 /* Simple data structure for alias mapping. We have two names, `from'
34 and `to'. */
35 void *__gconv_alias_db;
37 /* Array with available modules. */
38 struct gconv_module *__gconv_modules_db;
40 /* We modify global data. */
41 __libc_lock_define_initialized (static, lock)
44 /* Provide access to module database. */
45 struct gconv_module *
46 __gconv_get_modules_db (void)
48 return __gconv_modules_db;
51 void *
52 __gconv_get_alias_db (void)
54 return __gconv_alias_db;
58 /* Function for searching alias. */
59 int
60 __gconv_alias_compare (const void *p1, const void *p2)
62 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
63 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
64 return strcmp (s1->fromname, s2->fromname);
68 /* To search for a derivation we create a list of intermediate steps.
69 Each element contains a pointer to the element which precedes it
70 in the derivation order. */
71 struct derivation_step
73 const char *result_set;
74 size_t result_set_len;
75 int cost_lo;
76 int cost_hi;
77 struct gconv_module *code;
78 struct derivation_step *last;
79 struct derivation_step *next;
82 #define NEW_STEP(result, hi, lo, module, last_mod) \
83 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
84 newp->result_set = result; \
85 newp->result_set_len = strlen (result); \
86 newp->cost_hi = hi; \
87 newp->cost_lo = lo; \
88 newp->code = module; \
89 newp->last = last_mod; \
90 newp->next = NULL; \
91 newp; })
94 /* If a specific transformation is used more than once we should not need
95 to start looking for it again. Instead cache each successful result. */
96 struct known_derivation
98 const char *from;
99 const char *to;
100 struct __gconv_step *steps;
101 size_t nsteps;
104 /* Compare function for database of found derivations. */
105 static int
106 derivation_compare (const void *p1, const void *p2)
108 const struct known_derivation *s1 = (const struct known_derivation *) p1;
109 const struct known_derivation *s2 = (const struct known_derivation *) p2;
110 int result;
112 result = strcmp (s1->from, s2->from);
113 if (result == 0)
114 result = strcmp (s1->to, s2->to);
115 return result;
118 /* The search tree for known derivations. */
119 static void *known_derivations;
121 /* Look up whether given transformation was already requested before. */
122 static int
123 internal_function
124 derivation_lookup (const char *fromset, const char *toset,
125 struct __gconv_step **handle, size_t *nsteps)
127 struct known_derivation key = { fromset, toset, NULL, 0 };
128 struct known_derivation **result;
130 result = __tfind (&key, &known_derivations, derivation_compare);
132 if (result == NULL)
133 return __GCONV_NOCONV;
135 *handle = (*result)->steps;
136 *nsteps = (*result)->nsteps;
138 /* Please note that we return GCONV_OK even if the last search for
139 this transformation was unsuccessful. */
140 return __GCONV_OK;
143 /* Add new derivation to list of known ones. */
144 static void
145 internal_function
146 add_derivation (const char *fromset, const char *toset,
147 struct __gconv_step *handle, size_t nsteps)
149 struct known_derivation *new_deriv;
150 size_t fromset_len = strlen (fromset) + 1;
151 size_t toset_len = strlen (toset) + 1;
153 new_deriv = (struct known_derivation *)
154 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
155 if (new_deriv != NULL)
157 new_deriv->from = (char *) (new_deriv + 1);
158 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
159 toset, toset_len);
161 new_deriv->steps = handle;
162 new_deriv->nsteps = nsteps;
164 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
165 == NULL)
166 /* There is some kind of memory allocation problem. */
167 free (new_deriv);
169 /* Please note that we don't complain if the allocation failed. This
170 is not tragically but in case we use the memory debugging facilities
171 not all memory will be freed. */
174 static void
175 free_derivation (void *p)
177 struct known_derivation *deriv = (struct known_derivation *) p;
178 size_t cnt;
180 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
181 if (deriv->steps[cnt].__counter > 0
182 && deriv->steps[cnt].__end_fct != NULL)
183 DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt]));
185 /* Free the name strings. */
186 free ((char *) deriv->steps[0].__from_name);
187 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
189 free ((struct __gconv_step *) deriv->steps);
190 free (deriv);
194 /* Decrement the reference count for a single step in a steps array. */
195 void
196 internal_function
197 __gconv_release_step (struct __gconv_step *step)
199 if (--step->__counter == 0)
201 /* Call the destructor. */
202 if (step->__end_fct != NULL)
203 DL_CALL_FCT (step->__end_fct, (step));
205 #ifndef STATIC_GCONV
206 /* Skip builtin modules; they are not reference counted. */
207 if (step->__shlib_handle != NULL)
209 /* Release the loaded module. */
210 __gconv_release_shlib (step->__shlib_handle);
211 step->__shlib_handle = NULL;
213 #endif
217 static int
218 internal_function
219 gen_steps (struct derivation_step *best, const char *toset,
220 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
222 size_t step_cnt = 0;
223 struct __gconv_step *result;
224 struct derivation_step *current;
225 int status = __GCONV_NOMEM;
227 /* First determine number of steps. */
228 for (current = best; current->last != NULL; current = current->last)
229 ++step_cnt;
231 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
232 * step_cnt);
233 if (result != NULL)
235 int failed = 0;
237 status = __GCONV_OK;
238 *nsteps = step_cnt;
239 current = best;
240 while (step_cnt-- > 0)
242 result[step_cnt].__from_name = (step_cnt == 0
243 ? __strdup (fromset)
244 : (char *)current->last->result_set);
245 result[step_cnt].__to_name = (step_cnt + 1 == *nsteps
246 ? __strdup (current->result_set)
247 : result[step_cnt + 1].__from_name);
249 result[step_cnt].__counter = 1;
250 result[step_cnt].__data = NULL;
252 #ifndef STATIC_GCONV
253 if (current->code->module_name[0] == '/')
255 /* Load the module, return handle for it. */
256 struct __gconv_loaded_object *shlib_handle =
257 __gconv_find_shlib (current->code->module_name);
259 if (shlib_handle == NULL)
261 failed = 1;
262 break;
265 result[step_cnt].__shlib_handle = shlib_handle;
266 result[step_cnt].__modname = shlib_handle->name;
267 result[step_cnt].__fct = shlib_handle->fct;
268 result[step_cnt].__init_fct = shlib_handle->init_fct;
269 result[step_cnt].__end_fct = shlib_handle->end_fct;
271 /* Call the init function. */
272 if (result[step_cnt].__init_fct != NULL)
274 status = DL_CALL_FCT (result[step_cnt].__init_fct,
275 (&result[step_cnt]));
277 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
279 failed = 1;
280 /* Make sure we unload this modules. */
281 --step_cnt;
282 result[step_cnt].__end_fct = NULL;
283 break;
287 else
288 #endif
289 /* It's a builtin transformation. */
290 __gconv_get_builtin_trans (current->code->module_name,
291 &result[step_cnt]);
293 current = current->last;
296 if (__builtin_expect (failed, 0) != 0)
298 /* Something went wrong while initializing the modules. */
299 while (++step_cnt < *nsteps)
300 __gconv_release_step (&result[step_cnt]);
301 free (result);
302 *nsteps = 0;
303 *handle = NULL;
304 if (status == __GCONV_OK)
305 status = __GCONV_NOCONV;
307 else
308 *handle = result;
310 else
312 *nsteps = 0;
313 *handle = NULL;
316 return status;
320 #ifndef STATIC_GCONV
321 static int
322 internal_function
323 increment_counter (struct __gconv_step *steps, size_t nsteps)
325 /* Increment the user counter. */
326 size_t cnt = nsteps;
327 int result = __GCONV_OK;
329 while (cnt-- > 0)
331 struct __gconv_step *step = &steps[cnt];
333 if (step->__counter++ == 0)
335 /* Skip builtin modules. */
336 if (step->__modname != NULL)
338 /* Reopen a previously used module. */
339 step->__shlib_handle = __gconv_find_shlib (step->__modname);
340 if (step->__shlib_handle == NULL)
342 /* Oops, this is the second time we use this module
343 (after unloading) and this time loading failed!? */
344 --step->__counter;
345 while (++cnt < nsteps)
346 __gconv_release_step (&steps[cnt]);
347 result = __GCONV_NOCONV;
348 break;
351 /* The function addresses defined by the module may
352 have changed. */
353 step->__fct = step->__shlib_handle->fct;
354 step->__init_fct = step->__shlib_handle->init_fct;
355 step->__end_fct = step->__shlib_handle->end_fct;
358 if (step->__init_fct != NULL)
359 DL_CALL_FCT (step->__init_fct, (step));
362 return result;
364 #endif
367 /* The main function: find a possible derivation from the `fromset' (either
368 the given name or the alias) to the `toset' (again with alias). */
369 static int
370 internal_function
371 find_derivation (const char *toset, const char *toset_expand,
372 const char *fromset, const char *fromset_expand,
373 struct __gconv_step **handle, size_t *nsteps)
375 struct derivation_step *first, *current, **lastp, *solution = NULL;
376 int best_cost_hi = INT_MAX;
377 int best_cost_lo = INT_MAX;
378 int result;
380 /* Look whether an earlier call to `find_derivation' has already
381 computed a possible derivation. If so, return it immediately. */
382 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
383 handle, nsteps);
384 if (result == __GCONV_OK)
386 #ifndef STATIC_GCONV
387 result = increment_counter (*handle, *nsteps);
388 #endif
389 return result;
392 /* The task is to find a sequence of transformations, backed by the
393 existing modules - whether builtin or dynamically loadable -,
394 starting at `fromset' (or `fromset_expand') and ending at `toset'
395 (or `toset_expand'), and with minimal cost.
397 For computer scientists, this is a shortest path search in the
398 graph where the nodes are all possible charsets and the edges are
399 the transformations listed in __gconv_modules_db.
401 For now we use a simple algorithm with quadratic runtime behaviour.
402 A breadth-first search, starting at `fromset' and `fromset_expand'.
403 The list starting at `first' contains all nodes that have been
404 visited up to now, in the order in which they have been visited --
405 excluding the goal nodes `toset' and `toset_expand' which get
406 managed in the list starting at `solution'.
407 `current' walks through the list starting at `first' and looks
408 which nodes are reachable from the current node, adding them to
409 the end of the list [`first' or `solution' respectively] (if
410 they are visited the first time) or updating them in place (if
411 they have have already been visited).
412 In each node of either list, cost_lo and cost_hi contain the
413 minimum cost over any paths found up to now, starting at `fromset'
414 or `fromset_expand', ending at that node. best_cost_lo and
415 best_cost_hi represent the minimum over the elements of the
416 `solution' list. */
418 if (fromset_expand != NULL)
420 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
421 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
422 lastp = &first->next->next;
424 else
426 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
427 lastp = &first->next;
430 for (current = first; current != NULL; current = current->next)
432 /* Now match all the available module specifications against the
433 current charset name. If any of them matches check whether
434 we already have a derivation for this charset. If yes, use the
435 one with the lower costs. Otherwise add the new charset at the
436 end.
438 The module database is organized in a tree form which allows
439 searching for prefixes. So we search for the first entry with a
440 matching prefix and any other matching entry can be found from
441 this place. */
442 struct gconv_module *node;
444 /* Maybe it is not necessary anymore to look for a solution for
445 this entry since the cost is already as high (or higher) as
446 the cost for the best solution so far. */
447 if (current->cost_hi > best_cost_hi
448 || (current->cost_hi == best_cost_hi
449 && current->cost_lo >= best_cost_lo))
450 continue;
452 node = __gconv_modules_db;
453 while (node != NULL)
455 int cmpres = strcmp (current->result_set, node->from_string);
456 if (cmpres == 0)
458 /* Walk through the list of modules with this prefix and
459 try to match the name. */
460 struct gconv_module *runp;
462 /* Check all the modules with this prefix. */
463 runp = node;
466 const char *result_set = (strcmp (runp->to_string, "-") == 0
467 ? (toset_expand ?: toset)
468 : runp->to_string);
469 int cost_hi = runp->cost_hi + current->cost_hi;
470 int cost_lo = runp->cost_lo + current->cost_lo;
471 struct derivation_step *step;
473 /* We managed to find a derivation. First see whether
474 we have reached one of the goal nodes. */
475 if (strcmp (result_set, toset) == 0
476 || (toset_expand != NULL
477 && strcmp (result_set, toset_expand) == 0))
479 /* Append to the `solution' list if there
480 is no entry with this name. */
481 for (step = solution; step != NULL; step = step->next)
482 if (strcmp (result_set, step->result_set) == 0)
483 break;
485 if (step == NULL)
487 step = NEW_STEP (result_set,
488 cost_hi, cost_lo,
489 runp, current);
490 step->next = solution;
491 solution = step;
493 else if (step->cost_hi > cost_hi
494 || (step->cost_hi == cost_hi
495 && step->cost_lo > cost_lo))
497 /* A better path was found for the node,
498 on the `solution' list. */
499 step->code = runp;
500 step->last = current;
501 step->cost_hi = cost_hi;
502 step->cost_lo = cost_lo;
505 /* Update best_cost accordingly. */
506 if (cost_hi < best_cost_hi
507 || (cost_hi == best_cost_hi
508 && cost_lo < best_cost_lo))
510 best_cost_hi = cost_hi;
511 best_cost_lo = cost_lo;
514 else if (cost_hi < best_cost_hi
515 || (cost_hi == best_cost_hi
516 && cost_lo < best_cost_lo))
518 /* Append at the end of the `first' list if there
519 is no entry with this name. */
520 for (step = first; step != NULL; step = step->next)
521 if (strcmp (result_set, step->result_set) == 0)
522 break;
524 if (step == NULL)
526 *lastp = NEW_STEP (result_set,
527 cost_hi, cost_lo,
528 runp, current);
529 lastp = &(*lastp)->next;
531 else if (step->cost_hi > cost_hi
532 || (step->cost_hi == cost_hi
533 && step->cost_lo > cost_lo))
535 /* A better path was found for the node,
536 on the `first' list. */
537 step->code = runp;
538 step->last = current;
540 /* Update the cost for all steps. */
541 for (step = first; step != NULL;
542 step = step->next)
543 /* But don't update the start nodes. */
544 if (step->code != NULL)
546 struct derivation_step *back;
547 int hi, lo;
549 hi = step->code->cost_hi;
550 lo = step->code->cost_lo;
552 for (back = step->last; back->code != NULL;
553 back = back->last)
555 hi += back->code->cost_hi;
556 lo += back->code->cost_lo;
559 step->cost_hi = hi;
560 step->cost_lo = lo;
563 /* Likewise for the nodes on the solution list.
564 Also update best_cost accordingly. */
565 for (step = solution; step != NULL;
566 step = step->next)
568 step->cost_hi = (step->code->cost_hi
569 + step->last->cost_hi);
570 step->cost_lo = (step->code->cost_lo
571 + step->last->cost_lo);
573 if (step->cost_hi < best_cost_hi
574 || (step->cost_hi == best_cost_hi
575 && step->cost_lo < best_cost_lo))
577 best_cost_hi = step->cost_hi;
578 best_cost_lo = step->cost_lo;
584 runp = runp->same;
586 while (runp != NULL);
588 break;
590 else if (cmpres < 0)
591 node = node->left;
592 else
593 node = node->right;
597 if (solution != NULL)
599 /* We really found a way to do the transformation. */
601 /* Choose the best solution. This is easy because we know that
602 the solution list has at most length 2 (one for every possible
603 goal node). */
604 if (solution->next != NULL)
606 struct derivation_step *solution2 = solution->next;
608 if (solution2->cost_hi < solution->cost_hi
609 || (solution2->cost_hi == solution->cost_hi
610 && solution2->cost_lo < solution->cost_lo))
611 solution = solution2;
614 /* Now build a data structure describing the transformation steps. */
615 result = gen_steps (solution, toset_expand ?: toset,
616 fromset_expand ?: fromset, handle, nsteps);
618 else
620 /* We haven't found a transformation. Clear the result values. */
621 *handle = NULL;
622 *nsteps = 0;
625 /* Add result in any case to list of known derivations. */
626 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
627 *handle, *nsteps);
629 return result;
633 /* Control of initialization. */
634 __libc_once_define (static, once);
637 static const char *
638 do_lookup_alias (const char *name)
640 struct gconv_alias key;
641 struct gconv_alias **found;
643 key.fromname = (char *) name;
644 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
645 return found != NULL ? (*found)->toname : NULL;
650 internal_function
651 __gconv_compare_alias (const char *name1, const char *name2)
653 int result;
655 /* Ensure that the configuration data is read. */
656 __libc_once (once, __gconv_read_conf);
658 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
659 result = strcmp (do_lookup_alias (name1) ?: name1,
660 do_lookup_alias (name2) ?: name2);
662 return result;
667 internal_function
668 __gconv_find_transform (const char *toset, const char *fromset,
669 struct __gconv_step **handle, size_t *nsteps,
670 int flags)
672 const char *fromset_expand;
673 const char *toset_expand;
674 int result;
676 /* Ensure that the configuration data is read. */
677 __libc_once (once, __gconv_read_conf);
679 /* Acquire the lock. */
680 __libc_lock_lock (lock);
682 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
683 if (result != __GCONV_NODB)
685 /* We have a cache and could resolve the request, successful or not. */
686 __libc_lock_unlock (lock);
687 return result;
690 /* If we don't have a module database return with an error. */
691 if (__gconv_modules_db == NULL)
693 __libc_lock_unlock (lock);
694 return __GCONV_NOCONV;
697 /* See whether the names are aliases. */
698 fromset_expand = do_lookup_alias (fromset);
699 toset_expand = do_lookup_alias (toset);
701 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
702 /* We are not supposed to create a pseudo transformation (means
703 copying) when the input and output character set are the same. */
704 && (strcmp (toset, fromset) == 0
705 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
706 || (fromset_expand != NULL
707 && (strcmp (toset, fromset_expand) == 0
708 || (toset_expand != NULL
709 && strcmp (toset_expand, fromset_expand) == 0)))))
711 /* Both character sets are the same. */
712 __libc_lock_unlock (lock);
713 return __GCONV_NOCONV;
716 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
717 handle, nsteps);
719 /* Release the lock. */
720 __libc_lock_unlock (lock);
722 /* The following code is necessary since `find_derivation' will return
723 GCONV_OK even when no derivation was found but the same request
724 was processed before. I.e., negative results will also be cached. */
725 return (result == __GCONV_OK
726 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
727 : result);
731 /* Release the entries of the modules list. */
733 internal_function
734 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
736 int result = __GCONV_OK;
737 size_t cnt;
739 /* Acquire the lock. */
740 __libc_lock_lock (lock);
742 #ifndef STATIC_GCONV
743 cnt = nsteps;
744 while (cnt-- > 0)
745 __gconv_release_step (&steps[cnt]);
746 #endif
748 /* If we use the cache we free a bit more since we don't keep any
749 transformation records around, they are cheap enough to
750 recreate. */
751 __gconv_release_cache (steps, nsteps);
753 /* Release the lock. */
754 __libc_lock_unlock (lock);
756 return result;
760 /* Free the modules mentioned. */
761 static void
762 internal_function
763 free_modules_db (struct gconv_module *node)
765 if (node->left != NULL)
766 free_modules_db (node->left);
767 if (node->right != NULL)
768 free_modules_db (node->right);
771 struct gconv_module *act = node;
772 node = node->same;
773 if (act->module_name[0] == '/')
774 free (act);
776 while (node != NULL);
780 /* Free all resources if necessary. */
781 static void __attribute__ ((unused))
782 free_mem (void)
784 if (__gconv_alias_db != NULL)
785 __tdestroy (__gconv_alias_db, free);
787 if (__gconv_modules_db != NULL)
788 free_modules_db (__gconv_modules_db);
790 if (known_derivations != NULL)
791 __tdestroy (known_derivations, free_derivation);
794 text_set_element (__libc_subfreeres, free_mem);