* iconv/loop.c (put16) [!_STRING_ARCH_unaligned && BIG_ENDIAN]:
[glibc.git] / iconv / gconv_db.c
blobc661c3d472b18df2879227e6efdb91cf81dfc195
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997,98,99,2000,2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
28 #include <dlfcn.h>
29 #include <gconv_int.h>
30 #include <gconv_charset.h>
33 /* Simple data structure for alias mapping. We have two names, `from'
34 and `to'. */
35 void *__gconv_alias_db;
37 /* Array with available modules. */
38 struct gconv_module *__gconv_modules_db;
40 /* We modify global data. */
41 __libc_lock_define_initialized (static, lock)
44 /* Function for searching alias. */
45 int
46 __gconv_alias_compare (const void *p1, const void *p2)
48 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
49 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
50 return strcmp (s1->fromname, s2->fromname);
54 /* To search for a derivation we create a list of intermediate steps.
55 Each element contains a pointer to the element which precedes it
56 in the derivation order. */
57 struct derivation_step
59 const char *result_set;
60 size_t result_set_len;
61 int cost_lo;
62 int cost_hi;
63 struct gconv_module *code;
64 struct derivation_step *last;
65 struct derivation_step *next;
68 #define NEW_STEP(result, hi, lo, module, last_mod) \
69 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
70 newp->result_set = result; \
71 newp->result_set_len = strlen (result); \
72 newp->cost_hi = hi; \
73 newp->cost_lo = lo; \
74 newp->code = module; \
75 newp->last = last_mod; \
76 newp->next = NULL; \
77 newp; })
80 /* If a specific transformation is used more than once we should not need
81 to start looking for it again. Instead cache each successful result. */
82 struct known_derivation
84 const char *from;
85 const char *to;
86 struct __gconv_step *steps;
87 size_t nsteps;
90 /* Compare function for database of found derivations. */
91 static int
92 derivation_compare (const void *p1, const void *p2)
94 const struct known_derivation *s1 = (const struct known_derivation *) p1;
95 const struct known_derivation *s2 = (const struct known_derivation *) p2;
96 int result;
98 result = strcmp (s1->from, s2->from);
99 if (result == 0)
100 result = strcmp (s1->to, s2->to);
101 return result;
104 /* The search tree for known derivations. */
105 static void *known_derivations;
107 /* Look up whether given transformation was already requested before. */
108 static int
109 internal_function
110 derivation_lookup (const char *fromset, const char *toset,
111 struct __gconv_step **handle, size_t *nsteps)
113 struct known_derivation key = { fromset, toset, NULL, 0 };
114 struct known_derivation **result;
116 result = __tfind (&key, &known_derivations, derivation_compare);
118 if (result == NULL)
119 return __GCONV_NOCONV;
121 *handle = (*result)->steps;
122 *nsteps = (*result)->nsteps;
124 /* Please note that we return GCONV_OK even if the last search for
125 this transformation was unsuccessful. */
126 return __GCONV_OK;
129 /* Add new derivation to list of known ones. */
130 static void
131 internal_function
132 add_derivation (const char *fromset, const char *toset,
133 struct __gconv_step *handle, size_t nsteps)
135 struct known_derivation *new_deriv;
136 size_t fromset_len = strlen (fromset) + 1;
137 size_t toset_len = strlen (toset) + 1;
139 new_deriv = (struct known_derivation *)
140 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
141 if (new_deriv != NULL)
143 new_deriv->from = (char *) (new_deriv + 1);
144 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
145 toset, toset_len);
147 new_deriv->steps = handle;
148 new_deriv->nsteps = nsteps;
150 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
151 == NULL)
152 /* There is some kind of memory allocation problem. */
153 free (new_deriv);
155 /* Please note that we don't complain if the allocation failed. This
156 is not tragically but in case we use the memory debugging facilities
157 not all memory will be freed. */
160 static void
161 free_derivation (void *p)
163 struct known_derivation *deriv = (struct known_derivation *) p;
164 size_t cnt;
166 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
167 if (deriv->steps[cnt].__counter > 0
168 && deriv->steps[cnt].__end_fct != NULL)
169 DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt]));
171 /* Free the name strings. */
172 free ((char *) deriv->steps[0].__from_name);
173 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
175 free ((struct __gconv_step *) deriv->steps);
176 free (deriv);
180 /* Decrement the reference count for a single step in a steps array. */
181 static inline void
182 release_step (struct __gconv_step *step)
184 if (--step->__counter == 0)
186 /* Call the destructor. */
187 if (step->__end_fct != NULL)
188 DL_CALL_FCT (step->__end_fct, (step));
190 #ifndef STATIC_GCONV
191 /* Skip builtin modules; they are not reference counted. */
192 if (step->__shlib_handle != NULL)
194 /* Release the loaded module. */
195 __gconv_release_shlib (step->__shlib_handle);
196 step->__shlib_handle = NULL;
198 #endif
202 static int
203 internal_function
204 gen_steps (struct derivation_step *best, const char *toset,
205 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
207 size_t step_cnt = 0;
208 struct __gconv_step *result;
209 struct derivation_step *current;
210 int status = __GCONV_NOMEM;
212 /* First determine number of steps. */
213 for (current = best; current->last != NULL; current = current->last)
214 ++step_cnt;
216 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
217 * step_cnt);
218 if (result != NULL)
220 int failed = 0;
222 status = __GCONV_OK;
223 *nsteps = step_cnt;
224 current = best;
225 while (step_cnt-- > 0)
227 result[step_cnt].__from_name = (step_cnt == 0
228 ? __strdup (fromset)
229 : (char *)current->last->result_set);
230 result[step_cnt].__to_name = (step_cnt + 1 == *nsteps
231 ? __strdup (current->result_set)
232 : result[step_cnt + 1].__from_name);
234 #ifndef STATIC_GCONV
235 if (current->code->module_name[0] == '/')
237 /* Load the module, return handle for it. */
238 struct __gconv_loaded_object *shlib_handle =
239 __gconv_find_shlib (current->code->module_name);
241 if (shlib_handle == NULL)
243 failed = 1;
244 break;
247 result[step_cnt].__shlib_handle = shlib_handle;
248 result[step_cnt].__modname = shlib_handle->name;
249 result[step_cnt].__fct = shlib_handle->fct;
250 result[step_cnt].__init_fct = shlib_handle->init_fct;
251 result[step_cnt].__end_fct = shlib_handle->end_fct;
253 else
254 #endif
255 /* It's a builtin transformation. */
256 __gconv_get_builtin_trans (current->code->module_name,
257 &result[step_cnt]);
259 result[step_cnt].__counter = 1;
261 /* Call the init function. */
262 result[step_cnt].__data = NULL;
263 if (result[step_cnt].__init_fct != NULL)
265 status = DL_CALL_FCT (result[step_cnt].__init_fct,
266 (&result[step_cnt]));
268 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
270 failed = 1;
271 /* Make sure we unload this modules. */
272 --step_cnt;
273 result[step_cnt].__end_fct = NULL;
274 break;
278 current = current->last;
281 if (__builtin_expect (failed, 0) != 0)
283 /* Something went wrong while initializing the modules. */
284 while (++step_cnt < *nsteps)
285 release_step (&result[step_cnt]);
286 free (result);
287 *nsteps = 0;
288 *handle = NULL;
289 if (status == __GCONV_OK)
290 status = __GCONV_NOCONV;
292 else
293 *handle = result;
295 else
297 *nsteps = 0;
298 *handle = NULL;
301 return status;
305 #ifndef STATIC_GCONV
306 static int
307 internal_function
308 increment_counter (struct __gconv_step *steps, size_t nsteps)
310 /* Increment the user counter. */
311 size_t cnt = nsteps;
312 int result = __GCONV_OK;
314 while (cnt-- > 0)
316 struct __gconv_step *step = &steps[cnt];
318 if (step->__counter++ == 0)
320 /* Skip builtin modules. */
321 if (step->__modname != NULL)
323 /* Reopen a previously used module. */
324 step->__shlib_handle = __gconv_find_shlib (step->__modname);
325 if (step->__shlib_handle == NULL)
327 /* Oops, this is the second time we use this module
328 (after unloading) and this time loading failed!? */
329 --step->__counter;
330 while (++cnt < nsteps)
331 release_step (&steps[cnt]);
332 result = __GCONV_NOCONV;
333 break;
336 /* The function addresses defined by the module may
337 have changed. */
338 step->__fct = step->__shlib_handle->fct;
339 step->__init_fct = step->__shlib_handle->init_fct;
340 step->__end_fct = step->__shlib_handle->end_fct;
343 if (step->__init_fct != NULL)
344 DL_CALL_FCT (step->__init_fct, (step));
347 return result;
349 #endif
352 /* The main function: find a possible derivation from the `fromset' (either
353 the given name or the alias) to the `toset' (again with alias). */
354 static int
355 internal_function
356 find_derivation (const char *toset, const char *toset_expand,
357 const char *fromset, const char *fromset_expand,
358 struct __gconv_step **handle, size_t *nsteps)
360 struct derivation_step *first, *current, **lastp, *solution = NULL;
361 int best_cost_hi = INT_MAX;
362 int best_cost_lo = INT_MAX;
363 int result;
365 /* Look whether an earlier call to `find_derivation' has already
366 computed a possible derivation. If so, return it immediately. */
367 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
368 handle, nsteps);
369 if (result == __GCONV_OK)
371 #ifndef STATIC_GCONV
372 result = increment_counter (*handle, *nsteps);
373 #endif
374 return result;
377 /* The task is to find a sequence of transformations, backed by the
378 existing modules - whether builtin or dynamically loadable -,
379 starting at `fromset' (or `fromset_expand') and ending at `toset'
380 (or `toset_expand'), and with minimal cost.
382 For computer scientists, this is a shortest path search in the
383 graph where the nodes are all possible charsets and the edges are
384 the transformations listed in __gconv_modules_db.
386 For now we use a simple algorithm with quadratic runtime behaviour.
387 A breadth-first search, starting at `fromset' and `fromset_expand'.
388 The list starting at `first' contains all nodes that have been
389 visited up to now, in the order in which they have been visited --
390 excluding the goal nodes `toset' and `toset_expand' which get
391 managed in the list starting at `solution'.
392 `current' walks through the list starting at `first' and looks
393 which nodes are reachable from the current node, adding them to
394 the end of the list [`first' or `solution' respectively] (if
395 they are visited the first time) or updating them in place (if
396 they have have already been visited).
397 In each node of either list, cost_lo and cost_hi contain the
398 minimum cost over any paths found up to now, starting at `fromset'
399 or `fromset_expand', ending at that node. best_cost_lo and
400 best_cost_hi represent the minimum over the elements of the
401 `solution' list. */
403 if (fromset_expand != NULL)
405 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
406 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
407 lastp = &first->next->next;
409 else
411 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
412 lastp = &first->next;
415 for (current = first; current != NULL; current = current->next)
417 /* Now match all the available module specifications against the
418 current charset name. If any of them matches check whether
419 we already have a derivation for this charset. If yes, use the
420 one with the lower costs. Otherwise add the new charset at the
421 end.
423 The module database is organized in a tree form which allows
424 searching for prefixes. So we search for the first entry with a
425 matching prefix and any other matching entry can be found from
426 this place. */
427 struct gconv_module *node;
429 /* Maybe it is not necessary anymore to look for a solution for
430 this entry since the cost is already as high (or higher) as
431 the cost for the best solution so far. */
432 if (current->cost_hi > best_cost_hi
433 || (current->cost_hi == best_cost_hi
434 && current->cost_lo >= best_cost_lo))
435 continue;
437 node = __gconv_modules_db;
438 while (node != NULL)
440 int cmpres = strcmp (current->result_set, node->from_string);
441 if (cmpres == 0)
443 /* Walk through the list of modules with this prefix and
444 try to match the name. */
445 struct gconv_module *runp;
447 /* Check all the modules with this prefix. */
448 runp = node;
451 const char *result_set = (strcmp (runp->to_string, "-") == 0
452 ? (toset_expand ?: toset)
453 : runp->to_string);
454 int cost_hi = runp->cost_hi + current->cost_hi;
455 int cost_lo = runp->cost_lo + current->cost_lo;
456 struct derivation_step *step;
458 /* We managed to find a derivation. First see whether
459 we have reached one of the goal nodes. */
460 if (strcmp (result_set, toset) == 0
461 || (toset_expand != NULL
462 && strcmp (result_set, toset_expand) == 0))
464 /* Append to the `solution' list if there
465 is no entry with this name. */
466 for (step = solution; step != NULL; step = step->next)
467 if (strcmp (result_set, step->result_set) == 0)
468 break;
470 if (step == NULL)
472 step = NEW_STEP (result_set,
473 cost_hi, cost_lo,
474 runp, current);
475 step->next = solution;
476 solution = step;
478 else if (step->cost_hi > cost_hi
479 || (step->cost_hi == cost_hi
480 && step->cost_lo > cost_lo))
482 /* A better path was found for the node,
483 on the `solution' list. */
484 step->code = runp;
485 step->last = current;
486 step->cost_hi = cost_hi;
487 step->cost_lo = cost_lo;
490 /* Update best_cost accordingly. */
491 if (cost_hi < best_cost_hi
492 || (cost_hi == best_cost_hi
493 && cost_lo < best_cost_lo))
495 best_cost_hi = cost_hi;
496 best_cost_lo = cost_lo;
499 else if (cost_hi < best_cost_hi
500 || (cost_hi == best_cost_hi
501 && cost_lo < best_cost_lo))
503 /* Append at the end of the `first' list if there
504 is no entry with this name. */
505 for (step = first; step != NULL; step = step->next)
506 if (strcmp (result_set, step->result_set) == 0)
507 break;
509 if (step == NULL)
511 *lastp = NEW_STEP (result_set,
512 cost_hi, cost_lo,
513 runp, current);
514 lastp = &(*lastp)->next;
516 else if (step->cost_hi > cost_hi
517 || (step->cost_hi == cost_hi
518 && step->cost_lo > cost_lo))
520 /* A better path was found for the node,
521 on the `first' list. */
522 step->code = runp;
523 step->last = current;
525 /* Update the cost for all steps. */
526 for (step = first; step != NULL;
527 step = step->next)
528 /* But don't update the start nodes. */
529 if (step->code != NULL)
531 struct derivation_step *back;
532 int hi, lo;
534 hi = step->code->cost_hi;
535 lo = step->code->cost_lo;
537 for (back = step->last; back->code != NULL;
538 back = back->last)
540 hi += back->code->cost_hi;
541 lo += back->code->cost_lo;
544 step->cost_hi = hi;
545 step->cost_lo = lo;
548 /* Likewise for the nodes on the solution list.
549 Also update best_cost accordingly. */
550 for (step = solution; step != NULL;
551 step = step->next)
553 step->cost_hi = (step->code->cost_hi
554 + step->last->cost_hi);
555 step->cost_lo = (step->code->cost_lo
556 + step->last->cost_lo);
558 if (step->cost_hi < best_cost_hi
559 || (step->cost_hi == best_cost_hi
560 && step->cost_lo < best_cost_lo))
562 best_cost_hi = step->cost_hi;
563 best_cost_lo = step->cost_lo;
569 runp = runp->same;
571 while (runp != NULL);
573 break;
575 else if (cmpres < 0)
576 node = node->left;
577 else
578 node = node->right;
582 if (solution != NULL)
584 /* We really found a way to do the transformation. */
586 /* Choose the best solution. This is easy because we know that
587 the solution list has at most length 2 (one for every possible
588 goal node). */
589 if (solution->next != NULL)
591 struct derivation_step *solution2 = solution->next;
593 if (solution2->cost_hi < solution->cost_hi
594 || (solution2->cost_hi == solution->cost_hi
595 && solution2->cost_lo < solution->cost_lo))
596 solution = solution2;
599 /* Now build a data structure describing the transformation steps. */
600 result = gen_steps (solution, toset_expand ?: toset,
601 fromset_expand ?: fromset, handle, nsteps);
603 else
605 /* We haven't found a transformation. Clear the result values. */
606 *handle = NULL;
607 *nsteps = 0;
610 /* Add result in any case to list of known derivations. */
611 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
612 *handle, *nsteps);
614 return result;
618 /* Control of initialization. */
619 __libc_once_define (static, once);
622 static const char *
623 do_lookup_alias (const char *name)
625 struct gconv_alias key;
626 struct gconv_alias **found;
628 key.fromname = (char *) name;
629 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
630 return found != NULL ? (*found)->toname : NULL;
634 const char *
635 __gconv_lookup_alias (const char *name)
637 /* Ensure that the configuration data is read. */
638 __libc_once (once, __gconv_read_conf);
640 return do_lookup_alias (name) ?: name;
645 internal_function
646 __gconv_find_transform (const char *toset, const char *fromset,
647 struct __gconv_step **handle, size_t *nsteps,
648 int flags)
650 const char *fromset_expand = NULL;
651 const char *toset_expand = NULL;
652 int result;
654 /* Ensure that the configuration data is read. */
655 __libc_once (once, __gconv_read_conf);
657 /* Acquire the lock. */
658 __libc_lock_lock (lock);
660 /* If we don't have a module database return with an error. */
661 if (__gconv_modules_db == NULL)
663 __libc_lock_unlock (lock);
664 return __GCONV_NOCONV;
667 /* See whether the names are aliases. */
668 if (__gconv_alias_db != NULL)
670 fromset_expand = do_lookup_alias (fromset);
671 toset_expand = do_lookup_alias (toset);
674 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
675 /* We are not supposed to create a pseudo transformation (means
676 copying) when the input and output character set are the same. */
677 && (strcmp (toset, fromset) == 0
678 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
679 || (fromset_expand != NULL
680 && (strcmp (toset, fromset_expand) == 0
681 || (toset_expand != NULL
682 && strcmp (toset_expand, fromset_expand) == 0)))))
684 /* Both character sets are the same. */
685 __libc_lock_unlock (lock);
686 return __GCONV_NOCONV;
689 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
690 handle, nsteps);
692 /* Release the lock. */
693 __libc_lock_unlock (lock);
695 /* The following code is necessary since `find_derivation' will return
696 GCONV_OK even when no derivation was found but the same request
697 was processed before. I.e., negative results will also be cached. */
698 return (result == __GCONV_OK
699 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
700 : result);
704 /* Release the entries of the modules list. */
706 internal_function
707 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
709 int result = __GCONV_OK;
711 #ifndef STATIC_GCONV
712 /* Acquire the lock. */
713 __libc_lock_lock (lock);
715 while (nsteps-- > 0)
716 release_step (&steps[nsteps]);
718 /* Release the lock. */
719 __libc_lock_unlock (lock);
720 #endif
722 return result;
726 /* Free the modules mentioned. */
727 static void
728 internal_function
729 free_modules_db (struct gconv_module *node)
731 if (node->left != NULL)
732 free_modules_db (node->left);
733 if (node->right != NULL)
734 free_modules_db (node->right);
737 struct gconv_module *act = node;
738 node = node->same;
739 if (act->module_name[0] == '/')
740 free (act);
742 while (node != NULL);
746 /* Free all resources if necessary. */
747 static void __attribute__ ((unused))
748 free_mem (void)
750 if (__gconv_alias_db != NULL)
751 __tdestroy (__gconv_alias_db, free);
753 if (__gconv_modules_db != NULL)
754 free_modules_db (__gconv_modules_db);
756 if (known_derivations != NULL)
757 __tdestroy (known_derivations, free_derivation);
760 text_set_element (__libc_subfreeres, free_mem);