Update.
[glibc.git] / iconv / gconv_db.c
blob9bd27c5e697a3968f9471e4dc4f9a07f8abcb944
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997,98,99,2000,2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
28 #include <dlfcn.h>
29 #include <gconv_int.h>
30 #include <gconv_charset.h>
33 /* Simple data structure for alias mapping. We have two names, `from'
34 and `to'. */
35 void *__gconv_alias_db;
37 /* Array with available modules. */
38 struct gconv_module *__gconv_modules_db;
40 /* We modify global data. */
41 __libc_lock_define_initialized (static, lock)
44 /* Function for searching alias. */
45 int
46 __gconv_alias_compare (const void *p1, const void *p2)
48 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
49 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
50 return strcmp (s1->fromname, s2->fromname);
54 /* To search for a derivation we create a list of intermediate steps.
55 Each element contains a pointer to the element which precedes it
56 in the derivation order. */
57 struct derivation_step
59 const char *result_set;
60 size_t result_set_len;
61 int cost_lo;
62 int cost_hi;
63 struct gconv_module *code;
64 struct derivation_step *last;
65 struct derivation_step *next;
68 #define NEW_STEP(result, hi, lo, module, last_mod) \
69 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
70 newp->result_set = result; \
71 newp->result_set_len = strlen (result); \
72 newp->cost_hi = hi; \
73 newp->cost_lo = lo; \
74 newp->code = module; \
75 newp->last = last_mod; \
76 newp->next = NULL; \
77 newp; })
80 /* If a specific transformation is used more than once we should not need
81 to start looking for it again. Instead cache each successful result. */
82 struct known_derivation
84 const char *from;
85 const char *to;
86 struct __gconv_step *steps;
87 size_t nsteps;
90 /* Compare function for database of found derivations. */
91 static int
92 derivation_compare (const void *p1, const void *p2)
94 const struct known_derivation *s1 = (const struct known_derivation *) p1;
95 const struct known_derivation *s2 = (const struct known_derivation *) p2;
96 int result;
98 result = strcmp (s1->from, s2->from);
99 if (result == 0)
100 result = strcmp (s1->to, s2->to);
101 return result;
104 /* The search tree for known derivations. */
105 static void *known_derivations;
107 /* Look up whether given transformation was already requested before. */
108 static int
109 internal_function
110 derivation_lookup (const char *fromset, const char *toset,
111 struct __gconv_step **handle, size_t *nsteps)
113 struct known_derivation key = { fromset, toset, NULL, 0 };
114 struct known_derivation **result;
116 result = __tfind (&key, &known_derivations, derivation_compare);
118 if (result == NULL)
119 return __GCONV_NOCONV;
121 *handle = (*result)->steps;
122 *nsteps = (*result)->nsteps;
124 /* Please note that we return GCONV_OK even if the last search for
125 this transformation was unsuccessful. */
126 return __GCONV_OK;
129 /* Add new derivation to list of known ones. */
130 static void
131 internal_function
132 add_derivation (const char *fromset, const char *toset,
133 struct __gconv_step *handle, size_t nsteps)
135 struct known_derivation *new_deriv;
136 size_t fromset_len = strlen (fromset) + 1;
137 size_t toset_len = strlen (toset) + 1;
139 new_deriv = (struct known_derivation *)
140 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
141 if (new_deriv != NULL)
143 new_deriv->from = (char *) (new_deriv + 1);
144 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
145 toset, toset_len);
147 new_deriv->steps = handle;
148 new_deriv->nsteps = nsteps;
150 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
151 == NULL)
152 /* There is some kind of memory allocation problem. */
153 free (new_deriv);
155 /* Please note that we don't complain if the allocation failed. This
156 is not tragically but in case we use the memory debugging facilities
157 not all memory will be freed. */
160 static void
161 free_derivation (void *p)
163 struct known_derivation *deriv = (struct known_derivation *) p;
164 size_t cnt;
166 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
167 if (deriv->steps[cnt].__counter > 0
168 && deriv->steps[cnt].__end_fct != NULL)
169 DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt]));
171 /* Free the name strings. */
172 free ((char *) deriv->steps[0].__from_name);
173 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
175 free ((struct __gconv_step *) deriv->steps);
176 free (deriv);
180 /* Decrement the reference count for a single step in a steps array. */
181 void
182 internal_function
183 __gconv_release_step (struct __gconv_step *step)
185 if (--step->__counter == 0)
187 /* Call the destructor. */
188 if (step->__end_fct != NULL)
189 DL_CALL_FCT (step->__end_fct, (step));
191 #ifndef STATIC_GCONV
192 /* Skip builtin modules; they are not reference counted. */
193 if (step->__shlib_handle != NULL)
195 /* Release the loaded module. */
196 __gconv_release_shlib (step->__shlib_handle);
197 step->__shlib_handle = NULL;
199 #endif
203 static int
204 internal_function
205 gen_steps (struct derivation_step *best, const char *toset,
206 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
208 size_t step_cnt = 0;
209 struct __gconv_step *result;
210 struct derivation_step *current;
211 int status = __GCONV_NOMEM;
213 /* First determine number of steps. */
214 for (current = best; current->last != NULL; current = current->last)
215 ++step_cnt;
217 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
218 * step_cnt);
219 if (result != NULL)
221 int failed = 0;
223 status = __GCONV_OK;
224 *nsteps = step_cnt;
225 current = best;
226 while (step_cnt-- > 0)
228 result[step_cnt].__from_name = (step_cnt == 0
229 ? __strdup (fromset)
230 : (char *)current->last->result_set);
231 result[step_cnt].__to_name = (step_cnt + 1 == *nsteps
232 ? __strdup (current->result_set)
233 : result[step_cnt + 1].__from_name);
235 result[step_cnt].__counter = 1;
236 result[step_cnt].__data = NULL;
238 #ifndef STATIC_GCONV
239 if (current->code->module_name[0] == '/')
241 /* Load the module, return handle for it. */
242 struct __gconv_loaded_object *shlib_handle =
243 __gconv_find_shlib (current->code->module_name);
245 if (shlib_handle == NULL)
247 failed = 1;
248 break;
251 result[step_cnt].__shlib_handle = shlib_handle;
252 result[step_cnt].__modname = shlib_handle->name;
253 result[step_cnt].__fct = shlib_handle->fct;
254 result[step_cnt].__init_fct = shlib_handle->init_fct;
255 result[step_cnt].__end_fct = shlib_handle->end_fct;
257 /* Call the init function. */
258 if (result[step_cnt].__init_fct != NULL)
260 status = DL_CALL_FCT (result[step_cnt].__init_fct,
261 (&result[step_cnt]));
263 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
265 failed = 1;
266 /* Make sure we unload this modules. */
267 --step_cnt;
268 result[step_cnt].__end_fct = NULL;
269 break;
273 else
274 #endif
275 /* It's a builtin transformation. */
276 __gconv_get_builtin_trans (current->code->module_name,
277 &result[step_cnt]);
279 current = current->last;
282 if (__builtin_expect (failed, 0) != 0)
284 /* Something went wrong while initializing the modules. */
285 while (++step_cnt < *nsteps)
286 __gconv_release_step (&result[step_cnt]);
287 free (result);
288 *nsteps = 0;
289 *handle = NULL;
290 if (status == __GCONV_OK)
291 status = __GCONV_NOCONV;
293 else
294 *handle = result;
296 else
298 *nsteps = 0;
299 *handle = NULL;
302 return status;
306 #ifndef STATIC_GCONV
307 static int
308 internal_function
309 increment_counter (struct __gconv_step *steps, size_t nsteps)
311 /* Increment the user counter. */
312 size_t cnt = nsteps;
313 int result = __GCONV_OK;
315 while (cnt-- > 0)
317 struct __gconv_step *step = &steps[cnt];
319 if (step->__counter++ == 0)
321 /* Skip builtin modules. */
322 if (step->__modname != NULL)
324 /* Reopen a previously used module. */
325 step->__shlib_handle = __gconv_find_shlib (step->__modname);
326 if (step->__shlib_handle == NULL)
328 /* Oops, this is the second time we use this module
329 (after unloading) and this time loading failed!? */
330 --step->__counter;
331 while (++cnt < nsteps)
332 __gconv_release_step (&steps[cnt]);
333 result = __GCONV_NOCONV;
334 break;
337 /* The function addresses defined by the module may
338 have changed. */
339 step->__fct = step->__shlib_handle->fct;
340 step->__init_fct = step->__shlib_handle->init_fct;
341 step->__end_fct = step->__shlib_handle->end_fct;
344 if (step->__init_fct != NULL)
345 DL_CALL_FCT (step->__init_fct, (step));
348 return result;
350 #endif
353 /* The main function: find a possible derivation from the `fromset' (either
354 the given name or the alias) to the `toset' (again with alias). */
355 static int
356 internal_function
357 find_derivation (const char *toset, const char *toset_expand,
358 const char *fromset, const char *fromset_expand,
359 struct __gconv_step **handle, size_t *nsteps)
361 struct derivation_step *first, *current, **lastp, *solution = NULL;
362 int best_cost_hi = INT_MAX;
363 int best_cost_lo = INT_MAX;
364 int result;
366 /* Look whether an earlier call to `find_derivation' has already
367 computed a possible derivation. If so, return it immediately. */
368 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
369 handle, nsteps);
370 if (result == __GCONV_OK)
372 #ifndef STATIC_GCONV
373 result = increment_counter (*handle, *nsteps);
374 #endif
375 return result;
378 /* The task is to find a sequence of transformations, backed by the
379 existing modules - whether builtin or dynamically loadable -,
380 starting at `fromset' (or `fromset_expand') and ending at `toset'
381 (or `toset_expand'), and with minimal cost.
383 For computer scientists, this is a shortest path search in the
384 graph where the nodes are all possible charsets and the edges are
385 the transformations listed in __gconv_modules_db.
387 For now we use a simple algorithm with quadratic runtime behaviour.
388 A breadth-first search, starting at `fromset' and `fromset_expand'.
389 The list starting at `first' contains all nodes that have been
390 visited up to now, in the order in which they have been visited --
391 excluding the goal nodes `toset' and `toset_expand' which get
392 managed in the list starting at `solution'.
393 `current' walks through the list starting at `first' and looks
394 which nodes are reachable from the current node, adding them to
395 the end of the list [`first' or `solution' respectively] (if
396 they are visited the first time) or updating them in place (if
397 they have have already been visited).
398 In each node of either list, cost_lo and cost_hi contain the
399 minimum cost over any paths found up to now, starting at `fromset'
400 or `fromset_expand', ending at that node. best_cost_lo and
401 best_cost_hi represent the minimum over the elements of the
402 `solution' list. */
404 if (fromset_expand != NULL)
406 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
407 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
408 lastp = &first->next->next;
410 else
412 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
413 lastp = &first->next;
416 for (current = first; current != NULL; current = current->next)
418 /* Now match all the available module specifications against the
419 current charset name. If any of them matches check whether
420 we already have a derivation for this charset. If yes, use the
421 one with the lower costs. Otherwise add the new charset at the
422 end.
424 The module database is organized in a tree form which allows
425 searching for prefixes. So we search for the first entry with a
426 matching prefix and any other matching entry can be found from
427 this place. */
428 struct gconv_module *node;
430 /* Maybe it is not necessary anymore to look for a solution for
431 this entry since the cost is already as high (or higher) as
432 the cost for the best solution so far. */
433 if (current->cost_hi > best_cost_hi
434 || (current->cost_hi == best_cost_hi
435 && current->cost_lo >= best_cost_lo))
436 continue;
438 node = __gconv_modules_db;
439 while (node != NULL)
441 int cmpres = strcmp (current->result_set, node->from_string);
442 if (cmpres == 0)
444 /* Walk through the list of modules with this prefix and
445 try to match the name. */
446 struct gconv_module *runp;
448 /* Check all the modules with this prefix. */
449 runp = node;
452 const char *result_set = (strcmp (runp->to_string, "-") == 0
453 ? (toset_expand ?: toset)
454 : runp->to_string);
455 int cost_hi = runp->cost_hi + current->cost_hi;
456 int cost_lo = runp->cost_lo + current->cost_lo;
457 struct derivation_step *step;
459 /* We managed to find a derivation. First see whether
460 we have reached one of the goal nodes. */
461 if (strcmp (result_set, toset) == 0
462 || (toset_expand != NULL
463 && strcmp (result_set, toset_expand) == 0))
465 /* Append to the `solution' list if there
466 is no entry with this name. */
467 for (step = solution; step != NULL; step = step->next)
468 if (strcmp (result_set, step->result_set) == 0)
469 break;
471 if (step == NULL)
473 step = NEW_STEP (result_set,
474 cost_hi, cost_lo,
475 runp, current);
476 step->next = solution;
477 solution = step;
479 else if (step->cost_hi > cost_hi
480 || (step->cost_hi == cost_hi
481 && step->cost_lo > cost_lo))
483 /* A better path was found for the node,
484 on the `solution' list. */
485 step->code = runp;
486 step->last = current;
487 step->cost_hi = cost_hi;
488 step->cost_lo = cost_lo;
491 /* Update best_cost accordingly. */
492 if (cost_hi < best_cost_hi
493 || (cost_hi == best_cost_hi
494 && cost_lo < best_cost_lo))
496 best_cost_hi = cost_hi;
497 best_cost_lo = cost_lo;
500 else if (cost_hi < best_cost_hi
501 || (cost_hi == best_cost_hi
502 && cost_lo < best_cost_lo))
504 /* Append at the end of the `first' list if there
505 is no entry with this name. */
506 for (step = first; step != NULL; step = step->next)
507 if (strcmp (result_set, step->result_set) == 0)
508 break;
510 if (step == NULL)
512 *lastp = NEW_STEP (result_set,
513 cost_hi, cost_lo,
514 runp, current);
515 lastp = &(*lastp)->next;
517 else if (step->cost_hi > cost_hi
518 || (step->cost_hi == cost_hi
519 && step->cost_lo > cost_lo))
521 /* A better path was found for the node,
522 on the `first' list. */
523 step->code = runp;
524 step->last = current;
526 /* Update the cost for all steps. */
527 for (step = first; step != NULL;
528 step = step->next)
529 /* But don't update the start nodes. */
530 if (step->code != NULL)
532 struct derivation_step *back;
533 int hi, lo;
535 hi = step->code->cost_hi;
536 lo = step->code->cost_lo;
538 for (back = step->last; back->code != NULL;
539 back = back->last)
541 hi += back->code->cost_hi;
542 lo += back->code->cost_lo;
545 step->cost_hi = hi;
546 step->cost_lo = lo;
549 /* Likewise for the nodes on the solution list.
550 Also update best_cost accordingly. */
551 for (step = solution; step != NULL;
552 step = step->next)
554 step->cost_hi = (step->code->cost_hi
555 + step->last->cost_hi);
556 step->cost_lo = (step->code->cost_lo
557 + step->last->cost_lo);
559 if (step->cost_hi < best_cost_hi
560 || (step->cost_hi == best_cost_hi
561 && step->cost_lo < best_cost_lo))
563 best_cost_hi = step->cost_hi;
564 best_cost_lo = step->cost_lo;
570 runp = runp->same;
572 while (runp != NULL);
574 break;
576 else if (cmpres < 0)
577 node = node->left;
578 else
579 node = node->right;
583 if (solution != NULL)
585 /* We really found a way to do the transformation. */
587 /* Choose the best solution. This is easy because we know that
588 the solution list has at most length 2 (one for every possible
589 goal node). */
590 if (solution->next != NULL)
592 struct derivation_step *solution2 = solution->next;
594 if (solution2->cost_hi < solution->cost_hi
595 || (solution2->cost_hi == solution->cost_hi
596 && solution2->cost_lo < solution->cost_lo))
597 solution = solution2;
600 /* Now build a data structure describing the transformation steps. */
601 result = gen_steps (solution, toset_expand ?: toset,
602 fromset_expand ?: fromset, handle, nsteps);
604 else
606 /* We haven't found a transformation. Clear the result values. */
607 *handle = NULL;
608 *nsteps = 0;
611 /* Add result in any case to list of known derivations. */
612 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
613 *handle, *nsteps);
615 return result;
619 /* Control of initialization. */
620 __libc_once_define (static, once);
623 static const char *
624 do_lookup_alias (const char *name)
626 struct gconv_alias key;
627 struct gconv_alias **found;
629 key.fromname = (char *) name;
630 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
631 return found != NULL ? (*found)->toname : NULL;
636 internal_function
637 __gconv_compare_alias (const char *name1, const char *name2)
639 int result;
641 /* Ensure that the configuration data is read. */
642 __libc_once (once, __gconv_read_conf);
644 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
645 result = strcmp (do_lookup_alias (name1) ?: name1,
646 do_lookup_alias (name2) ?: name2);
648 return result;
653 internal_function
654 __gconv_find_transform (const char *toset, const char *fromset,
655 struct __gconv_step **handle, size_t *nsteps,
656 int flags)
658 const char *fromset_expand;
659 const char *toset_expand;
660 int result;
662 /* Ensure that the configuration data is read. */
663 __libc_once (once, __gconv_read_conf);
665 /* Acquire the lock. */
666 __libc_lock_lock (lock);
668 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
669 if (result != __GCONV_NODB)
671 /* We have a cache and could resolve the request, successful or not. */
672 __libc_lock_unlock (lock);
673 return result;
676 /* If we don't have a module database return with an error. */
677 if (__gconv_modules_db == NULL)
679 __libc_lock_unlock (lock);
680 return __GCONV_NOCONV;
683 /* See whether the names are aliases. */
684 fromset_expand = do_lookup_alias (fromset);
685 toset_expand = do_lookup_alias (toset);
687 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
688 /* We are not supposed to create a pseudo transformation (means
689 copying) when the input and output character set are the same. */
690 && (strcmp (toset, fromset) == 0
691 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
692 || (fromset_expand != NULL
693 && (strcmp (toset, fromset_expand) == 0
694 || (toset_expand != NULL
695 && strcmp (toset_expand, fromset_expand) == 0)))))
697 /* Both character sets are the same. */
698 __libc_lock_unlock (lock);
699 return __GCONV_NOCONV;
702 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
703 handle, nsteps);
705 /* Release the lock. */
706 __libc_lock_unlock (lock);
708 /* The following code is necessary since `find_derivation' will return
709 GCONV_OK even when no derivation was found but the same request
710 was processed before. I.e., negative results will also be cached. */
711 return (result == __GCONV_OK
712 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
713 : result);
717 /* Release the entries of the modules list. */
719 internal_function
720 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
722 int result = __GCONV_OK;
723 size_t cnt;
725 /* Acquire the lock. */
726 __libc_lock_lock (lock);
728 #ifndef STATIC_GCONV
729 cnt = nsteps;
730 while (cnt-- > 0)
731 __gconv_release_step (&steps[cnt]);
732 #endif
734 /* If we use the cache we free a bit more since we don't keep any
735 transformation records around, they are cheap enough to
736 recreate. */
737 __gconv_release_cache (steps, nsteps);
739 /* Release the lock. */
740 __libc_lock_unlock (lock);
742 return result;
746 /* Free the modules mentioned. */
747 static void
748 internal_function
749 free_modules_db (struct gconv_module *node)
751 if (node->left != NULL)
752 free_modules_db (node->left);
753 if (node->right != NULL)
754 free_modules_db (node->right);
757 struct gconv_module *act = node;
758 node = node->same;
759 if (act->module_name[0] == '/')
760 free (act);
762 while (node != NULL);
766 /* Free all resources if necessary. */
767 static void __attribute__ ((unused))
768 free_mem (void)
770 if (__gconv_alias_db != NULL)
771 __tdestroy (__gconv_alias_db, free);
773 if (__gconv_modules_db != NULL)
774 free_modules_db (__gconv_modules_db);
776 if (known_derivations != NULL)
777 __tdestroy (known_derivations, free_derivation);
780 text_set_element (__libc_subfreeres, free_mem);