Update.
[glibc.git] / iconv / gconv_db.c
blob8dc6b14d2584c97e151df259e878ab9c01c88fe4
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
27 #include <locale/localeinfo.h>
29 #include <dlfcn.h>
30 #include <gconv_int.h>
33 /* Simple data structure for alias mapping. We have two names, `from'
34 and `to'. */
35 void *__gconv_alias_db;
37 /* Array with available modules. */
38 struct gconv_module *__gconv_modules_db;
40 /* We modify global data. */
41 __libc_lock_define_initialized (, __gconv_lock)
44 /* Provide access to module database. */
45 struct gconv_module *
46 __gconv_get_modules_db (void)
48 return __gconv_modules_db;
51 void *
52 __gconv_get_alias_db (void)
54 return __gconv_alias_db;
58 /* Function for searching alias. */
59 int
60 __gconv_alias_compare (const void *p1, const void *p2)
62 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
63 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
64 return strcmp (s1->fromname, s2->fromname);
68 /* To search for a derivation we create a list of intermediate steps.
69 Each element contains a pointer to the element which precedes it
70 in the derivation order. */
71 struct derivation_step
73 const char *result_set;
74 size_t result_set_len;
75 int cost_lo;
76 int cost_hi;
77 struct gconv_module *code;
78 struct derivation_step *last;
79 struct derivation_step *next;
82 #define NEW_STEP(result, hi, lo, module, last_mod) \
83 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
84 newp->result_set = result; \
85 newp->result_set_len = strlen (result); \
86 newp->cost_hi = hi; \
87 newp->cost_lo = lo; \
88 newp->code = module; \
89 newp->last = last_mod; \
90 newp->next = NULL; \
91 newp; })
94 /* If a specific transformation is used more than once we should not need
95 to start looking for it again. Instead cache each successful result. */
96 struct known_derivation
98 const char *from;
99 const char *to;
100 struct __gconv_step *steps;
101 size_t nsteps;
104 /* Compare function for database of found derivations. */
105 static int
106 derivation_compare (const void *p1, const void *p2)
108 const struct known_derivation *s1 = (const struct known_derivation *) p1;
109 const struct known_derivation *s2 = (const struct known_derivation *) p2;
110 int result;
112 result = strcmp (s1->from, s2->from);
113 if (result == 0)
114 result = strcmp (s1->to, s2->to);
115 return result;
118 /* The search tree for known derivations. */
119 static void *known_derivations;
121 /* Look up whether given transformation was already requested before. */
122 static int
123 internal_function
124 derivation_lookup (const char *fromset, const char *toset,
125 struct __gconv_step **handle, size_t *nsteps)
127 struct known_derivation key = { fromset, toset, NULL, 0 };
128 struct known_derivation **result;
130 result = __tfind (&key, &known_derivations, derivation_compare);
132 if (result == NULL)
133 return __GCONV_NOCONV;
135 *handle = (*result)->steps;
136 *nsteps = (*result)->nsteps;
138 /* Please note that we return GCONV_OK even if the last search for
139 this transformation was unsuccessful. */
140 return __GCONV_OK;
143 /* Add new derivation to list of known ones. */
144 static void
145 internal_function
146 add_derivation (const char *fromset, const char *toset,
147 struct __gconv_step *handle, size_t nsteps)
149 struct known_derivation *new_deriv;
150 size_t fromset_len = strlen (fromset) + 1;
151 size_t toset_len = strlen (toset) + 1;
153 new_deriv = (struct known_derivation *)
154 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
155 if (new_deriv != NULL)
157 new_deriv->from = (char *) (new_deriv + 1);
158 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
159 toset, toset_len);
161 new_deriv->steps = handle;
162 new_deriv->nsteps = nsteps;
164 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
165 == NULL)
166 /* There is some kind of memory allocation problem. */
167 free (new_deriv);
169 /* Please note that we don't complain if the allocation failed. This
170 is not tragically but in case we use the memory debugging facilities
171 not all memory will be freed. */
174 static void __libc_freeres_fn_section
175 free_derivation (void *p)
177 struct known_derivation *deriv = (struct known_derivation *) p;
178 size_t cnt;
180 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
181 if (deriv->steps[cnt].__counter > 0
182 && deriv->steps[cnt].__end_fct != NULL)
183 DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt]));
185 /* Free the name strings. */
186 free ((char *) deriv->steps[0].__from_name);
187 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
189 free ((struct __gconv_step *) deriv->steps);
190 free (deriv);
194 /* Decrement the reference count for a single step in a steps array. */
195 void
196 internal_function
197 __gconv_release_step (struct __gconv_step *step)
199 if (--step->__counter == 0)
201 /* Call the destructor. */
202 if (step->__end_fct != NULL)
203 DL_CALL_FCT (step->__end_fct, (step));
205 #ifndef STATIC_GCONV
206 /* Skip builtin modules; they are not reference counted. */
207 if (step->__shlib_handle != NULL)
209 /* Release the loaded module. */
210 __gconv_release_shlib (step->__shlib_handle);
211 step->__shlib_handle = NULL;
213 #endif
217 static int
218 internal_function
219 gen_steps (struct derivation_step *best, const char *toset,
220 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
222 size_t step_cnt = 0;
223 struct __gconv_step *result;
224 struct derivation_step *current;
225 int status = __GCONV_NOMEM;
227 /* First determine number of steps. */
228 for (current = best; current->last != NULL; current = current->last)
229 ++step_cnt;
231 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
232 * step_cnt);
233 if (result != NULL)
235 int failed = 0;
237 status = __GCONV_OK;
238 *nsteps = step_cnt;
239 current = best;
240 while (step_cnt-- > 0)
242 result[step_cnt].__from_name = (step_cnt == 0
243 ? __strdup (fromset)
244 : (char *)current->last->result_set);
245 result[step_cnt].__to_name = (step_cnt + 1 == *nsteps
246 ? __strdup (current->result_set)
247 : result[step_cnt + 1].__from_name);
249 result[step_cnt].__counter = 1;
250 result[step_cnt].__data = NULL;
252 #ifndef STATIC_GCONV
253 if (current->code->module_name[0] == '/')
255 /* Load the module, return handle for it. */
256 struct __gconv_loaded_object *shlib_handle =
257 __gconv_find_shlib (current->code->module_name);
259 if (shlib_handle == NULL)
261 failed = 1;
262 break;
265 result[step_cnt].__shlib_handle = shlib_handle;
266 result[step_cnt].__modname = shlib_handle->name;
267 result[step_cnt].__fct = shlib_handle->fct;
268 result[step_cnt].__init_fct = shlib_handle->init_fct;
269 result[step_cnt].__end_fct = shlib_handle->end_fct;
271 /* These settings can be overridden by the init function. */
272 result[step_cnt].__btowc_fct = NULL;
274 /* Call the init function. */
275 if (result[step_cnt].__init_fct != NULL)
277 status = DL_CALL_FCT (result[step_cnt].__init_fct,
278 (&result[step_cnt]));
280 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
282 failed = 1;
283 /* Make sure we unload this modules. */
284 --step_cnt;
285 result[step_cnt].__end_fct = NULL;
286 break;
290 else
291 #endif
292 /* It's a builtin transformation. */
293 __gconv_get_builtin_trans (current->code->module_name,
294 &result[step_cnt]);
296 current = current->last;
299 if (__builtin_expect (failed, 0) != 0)
301 /* Something went wrong while initializing the modules. */
302 while (++step_cnt < *nsteps)
303 __gconv_release_step (&result[step_cnt]);
304 free (result);
305 *nsteps = 0;
306 *handle = NULL;
307 if (status == __GCONV_OK)
308 status = __GCONV_NOCONV;
310 else
311 *handle = result;
313 else
315 *nsteps = 0;
316 *handle = NULL;
319 return status;
323 #ifndef STATIC_GCONV
324 static int
325 internal_function
326 increment_counter (struct __gconv_step *steps, size_t nsteps)
328 /* Increment the user counter. */
329 size_t cnt = nsteps;
330 int result = __GCONV_OK;
332 while (cnt-- > 0)
334 struct __gconv_step *step = &steps[cnt];
336 if (step->__counter++ == 0)
338 /* Skip builtin modules. */
339 if (step->__modname != NULL)
341 /* Reopen a previously used module. */
342 step->__shlib_handle = __gconv_find_shlib (step->__modname);
343 if (step->__shlib_handle == NULL)
345 /* Oops, this is the second time we use this module
346 (after unloading) and this time loading failed!? */
347 --step->__counter;
348 while (++cnt < nsteps)
349 __gconv_release_step (&steps[cnt]);
350 result = __GCONV_NOCONV;
351 break;
354 /* The function addresses defined by the module may
355 have changed. */
356 step->__fct = step->__shlib_handle->fct;
357 step->__init_fct = step->__shlib_handle->init_fct;
358 step->__end_fct = step->__shlib_handle->end_fct;
360 /* These settings can be overridden by the init function. */
361 step->__btowc_fct = NULL;
364 /* Call the init function. */
365 if (step->__init_fct != NULL)
366 DL_CALL_FCT (step->__init_fct, (step));
369 return result;
371 #endif
374 /* The main function: find a possible derivation from the `fromset' (either
375 the given name or the alias) to the `toset' (again with alias). */
376 static int
377 internal_function
378 find_derivation (const char *toset, const char *toset_expand,
379 const char *fromset, const char *fromset_expand,
380 struct __gconv_step **handle, size_t *nsteps)
382 struct derivation_step *first, *current, **lastp, *solution = NULL;
383 int best_cost_hi = INT_MAX;
384 int best_cost_lo = INT_MAX;
385 int result;
387 /* Look whether an earlier call to `find_derivation' has already
388 computed a possible derivation. If so, return it immediately. */
389 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
390 handle, nsteps);
391 if (result == __GCONV_OK)
393 #ifndef STATIC_GCONV
394 result = increment_counter (*handle, *nsteps);
395 #endif
396 return result;
399 /* The task is to find a sequence of transformations, backed by the
400 existing modules - whether builtin or dynamically loadable -,
401 starting at `fromset' (or `fromset_expand') and ending at `toset'
402 (or `toset_expand'), and with minimal cost.
404 For computer scientists, this is a shortest path search in the
405 graph where the nodes are all possible charsets and the edges are
406 the transformations listed in __gconv_modules_db.
408 For now we use a simple algorithm with quadratic runtime behaviour.
409 A breadth-first search, starting at `fromset' and `fromset_expand'.
410 The list starting at `first' contains all nodes that have been
411 visited up to now, in the order in which they have been visited --
412 excluding the goal nodes `toset' and `toset_expand' which get
413 managed in the list starting at `solution'.
414 `current' walks through the list starting at `first' and looks
415 which nodes are reachable from the current node, adding them to
416 the end of the list [`first' or `solution' respectively] (if
417 they are visited the first time) or updating them in place (if
418 they have have already been visited).
419 In each node of either list, cost_lo and cost_hi contain the
420 minimum cost over any paths found up to now, starting at `fromset'
421 or `fromset_expand', ending at that node. best_cost_lo and
422 best_cost_hi represent the minimum over the elements of the
423 `solution' list. */
425 if (fromset_expand != NULL)
427 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
428 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
429 lastp = &first->next->next;
431 else
433 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
434 lastp = &first->next;
437 for (current = first; current != NULL; current = current->next)
439 /* Now match all the available module specifications against the
440 current charset name. If any of them matches check whether
441 we already have a derivation for this charset. If yes, use the
442 one with the lower costs. Otherwise add the new charset at the
443 end.
445 The module database is organized in a tree form which allows
446 searching for prefixes. So we search for the first entry with a
447 matching prefix and any other matching entry can be found from
448 this place. */
449 struct gconv_module *node;
451 /* Maybe it is not necessary anymore to look for a solution for
452 this entry since the cost is already as high (or higher) as
453 the cost for the best solution so far. */
454 if (current->cost_hi > best_cost_hi
455 || (current->cost_hi == best_cost_hi
456 && current->cost_lo >= best_cost_lo))
457 continue;
459 node = __gconv_modules_db;
460 while (node != NULL)
462 int cmpres = strcmp (current->result_set, node->from_string);
463 if (cmpres == 0)
465 /* Walk through the list of modules with this prefix and
466 try to match the name. */
467 struct gconv_module *runp;
469 /* Check all the modules with this prefix. */
470 runp = node;
473 const char *result_set = (strcmp (runp->to_string, "-") == 0
474 ? (toset_expand ?: toset)
475 : runp->to_string);
476 int cost_hi = runp->cost_hi + current->cost_hi;
477 int cost_lo = runp->cost_lo + current->cost_lo;
478 struct derivation_step *step;
480 /* We managed to find a derivation. First see whether
481 we have reached one of the goal nodes. */
482 if (strcmp (result_set, toset) == 0
483 || (toset_expand != NULL
484 && strcmp (result_set, toset_expand) == 0))
486 /* Append to the `solution' list if there
487 is no entry with this name. */
488 for (step = solution; step != NULL; step = step->next)
489 if (strcmp (result_set, step->result_set) == 0)
490 break;
492 if (step == NULL)
494 step = NEW_STEP (result_set,
495 cost_hi, cost_lo,
496 runp, current);
497 step->next = solution;
498 solution = step;
500 else if (step->cost_hi > cost_hi
501 || (step->cost_hi == cost_hi
502 && step->cost_lo > cost_lo))
504 /* A better path was found for the node,
505 on the `solution' list. */
506 step->code = runp;
507 step->last = current;
508 step->cost_hi = cost_hi;
509 step->cost_lo = cost_lo;
512 /* Update best_cost accordingly. */
513 if (cost_hi < best_cost_hi
514 || (cost_hi == best_cost_hi
515 && cost_lo < best_cost_lo))
517 best_cost_hi = cost_hi;
518 best_cost_lo = cost_lo;
521 else if (cost_hi < best_cost_hi
522 || (cost_hi == best_cost_hi
523 && cost_lo < best_cost_lo))
525 /* Append at the end of the `first' list if there
526 is no entry with this name. */
527 for (step = first; step != NULL; step = step->next)
528 if (strcmp (result_set, step->result_set) == 0)
529 break;
531 if (step == NULL)
533 *lastp = NEW_STEP (result_set,
534 cost_hi, cost_lo,
535 runp, current);
536 lastp = &(*lastp)->next;
538 else if (step->cost_hi > cost_hi
539 || (step->cost_hi == cost_hi
540 && step->cost_lo > cost_lo))
542 /* A better path was found for the node,
543 on the `first' list. */
544 step->code = runp;
545 step->last = current;
547 /* Update the cost for all steps. */
548 for (step = first; step != NULL;
549 step = step->next)
550 /* But don't update the start nodes. */
551 if (step->code != NULL)
553 struct derivation_step *back;
554 int hi, lo;
556 hi = step->code->cost_hi;
557 lo = step->code->cost_lo;
559 for (back = step->last; back->code != NULL;
560 back = back->last)
562 hi += back->code->cost_hi;
563 lo += back->code->cost_lo;
566 step->cost_hi = hi;
567 step->cost_lo = lo;
570 /* Likewise for the nodes on the solution list.
571 Also update best_cost accordingly. */
572 for (step = solution; step != NULL;
573 step = step->next)
575 step->cost_hi = (step->code->cost_hi
576 + step->last->cost_hi);
577 step->cost_lo = (step->code->cost_lo
578 + step->last->cost_lo);
580 if (step->cost_hi < best_cost_hi
581 || (step->cost_hi == best_cost_hi
582 && step->cost_lo < best_cost_lo))
584 best_cost_hi = step->cost_hi;
585 best_cost_lo = step->cost_lo;
591 runp = runp->same;
593 while (runp != NULL);
595 break;
597 else if (cmpres < 0)
598 node = node->left;
599 else
600 node = node->right;
604 if (solution != NULL)
606 /* We really found a way to do the transformation. */
608 /* Choose the best solution. This is easy because we know that
609 the solution list has at most length 2 (one for every possible
610 goal node). */
611 if (solution->next != NULL)
613 struct derivation_step *solution2 = solution->next;
615 if (solution2->cost_hi < solution->cost_hi
616 || (solution2->cost_hi == solution->cost_hi
617 && solution2->cost_lo < solution->cost_lo))
618 solution = solution2;
621 /* Now build a data structure describing the transformation steps. */
622 result = gen_steps (solution, toset_expand ?: toset,
623 fromset_expand ?: fromset, handle, nsteps);
625 else
627 /* We haven't found a transformation. Clear the result values. */
628 *handle = NULL;
629 *nsteps = 0;
632 /* Add result in any case to list of known derivations. */
633 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
634 *handle, *nsteps);
636 return result;
640 /* Control of initialization. */
641 __libc_once_define (static, once);
644 static const char *
645 do_lookup_alias (const char *name)
647 struct gconv_alias key;
648 struct gconv_alias **found;
650 key.fromname = (char *) name;
651 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
652 return found != NULL ? (*found)->toname : NULL;
657 internal_function
658 __gconv_compare_alias (const char *name1, const char *name2)
660 int result;
662 /* Ensure that the configuration data is read. */
663 __libc_once (once, __gconv_read_conf);
665 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
666 result = strcmp (do_lookup_alias (name1) ?: name1,
667 do_lookup_alias (name2) ?: name2);
669 return result;
674 internal_function
675 __gconv_find_transform (const char *toset, const char *fromset,
676 struct __gconv_step **handle, size_t *nsteps,
677 int flags)
679 const char *fromset_expand;
680 const char *toset_expand;
681 int result;
683 /* Ensure that the configuration data is read. */
684 __libc_once (once, __gconv_read_conf);
686 /* Acquire the lock. */
687 __libc_lock_lock (__gconv_lock);
689 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
690 if (result != __GCONV_NODB)
692 /* We have a cache and could resolve the request, successful or not. */
693 __libc_lock_unlock (__gconv_lock);
694 return result;
697 /* If we don't have a module database return with an error. */
698 if (__gconv_modules_db == NULL)
700 __libc_lock_unlock (__gconv_lock);
701 return __GCONV_NOCONV;
704 /* See whether the names are aliases. */
705 fromset_expand = do_lookup_alias (fromset);
706 toset_expand = do_lookup_alias (toset);
708 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
709 /* We are not supposed to create a pseudo transformation (means
710 copying) when the input and output character set are the same. */
711 && (strcmp (toset, fromset) == 0
712 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
713 || (fromset_expand != NULL
714 && (strcmp (toset, fromset_expand) == 0
715 || (toset_expand != NULL
716 && strcmp (toset_expand, fromset_expand) == 0)))))
718 /* Both character sets are the same. */
719 __libc_lock_unlock (__gconv_lock);
720 return __GCONV_NOCONV;
723 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
724 handle, nsteps);
726 /* Release the lock. */
727 __libc_lock_unlock (__gconv_lock);
729 /* The following code is necessary since `find_derivation' will return
730 GCONV_OK even when no derivation was found but the same request
731 was processed before. I.e., negative results will also be cached. */
732 return (result == __GCONV_OK
733 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
734 : result);
738 /* Release the entries of the modules list. */
740 internal_function
741 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
743 int result = __GCONV_OK;
744 size_t cnt;
746 /* Acquire the lock. */
747 __libc_lock_lock (__gconv_lock);
749 #ifndef STATIC_GCONV
750 cnt = nsteps;
751 while (cnt-- > 0)
752 __gconv_release_step (&steps[cnt]);
753 #endif
755 /* If we use the cache we free a bit more since we don't keep any
756 transformation records around, they are cheap enough to
757 recreate. */
758 __gconv_release_cache (steps, nsteps);
760 /* Release the lock. */
761 __libc_lock_unlock (__gconv_lock);
763 return result;
767 /* Free the modules mentioned. */
768 static void
769 internal_function __libc_freeres_fn_section
770 free_modules_db (struct gconv_module *node)
772 if (node->left != NULL)
773 free_modules_db (node->left);
774 if (node->right != NULL)
775 free_modules_db (node->right);
778 struct gconv_module *act = node;
779 node = node->same;
780 if (act->module_name[0] == '/')
781 free (act);
783 while (node != NULL);
787 /* Free all resources if necessary. */
788 libc_freeres_fn (free_mem)
790 /* First free locale memory. This needs to be done before freeing derivations,
791 as ctype cleanup functions dereference steps arrays which we free below. */
792 _nl_locale_subfreeres ();
794 /* finddomain.c has similar problem. */
795 extern void _nl_finddomain_subfreeres (void) attribute_hidden;
796 _nl_finddomain_subfreeres ();
798 if (__gconv_alias_db != NULL)
799 __tdestroy (__gconv_alias_db, free);
801 if (__gconv_modules_db != NULL)
802 free_modules_db (__gconv_modules_db);
804 if (known_derivations != NULL)
805 __tdestroy (known_derivations, free_derivation);