Fix printf format error
[glibc.git] / iconv / gconv_db.c
blob7d752bcebf81840d3ea6223abadc3930b10f4190
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 #include <assert.h>
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
27 #include <locale/localeinfo.h>
29 #include <dlfcn.h>
30 #include <gconv_int.h>
31 #include <sysdep.h>
34 /* Simple data structure for alias mapping. We have two names, `from'
35 and `to'. */
36 void *__gconv_alias_db;
38 /* Array with available modules. */
39 struct gconv_module *__gconv_modules_db;
41 /* We modify global data. */
42 __libc_lock_define_initialized (, __gconv_lock)
45 /* Provide access to module database. */
46 struct gconv_module *
47 __gconv_get_modules_db (void)
49 return __gconv_modules_db;
52 void *
53 __gconv_get_alias_db (void)
55 return __gconv_alias_db;
59 /* Function for searching alias. */
60 int
61 __gconv_alias_compare (const void *p1, const void *p2)
63 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
64 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
65 return strcmp (s1->fromname, s2->fromname);
69 /* To search for a derivation we create a list of intermediate steps.
70 Each element contains a pointer to the element which precedes it
71 in the derivation order. */
72 struct derivation_step
74 const char *result_set;
75 size_t result_set_len;
76 int cost_lo;
77 int cost_hi;
78 struct gconv_module *code;
79 struct derivation_step *last;
80 struct derivation_step *next;
83 #define NEW_STEP(result, hi, lo, module, last_mod) \
84 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
85 newp->result_set = result; \
86 newp->result_set_len = strlen (result); \
87 newp->cost_hi = hi; \
88 newp->cost_lo = lo; \
89 newp->code = module; \
90 newp->last = last_mod; \
91 newp->next = NULL; \
92 newp; })
95 /* If a specific transformation is used more than once we should not need
96 to start looking for it again. Instead cache each successful result. */
97 struct known_derivation
99 const char *from;
100 const char *to;
101 struct __gconv_step *steps;
102 size_t nsteps;
105 /* Compare function for database of found derivations. */
106 static int
107 derivation_compare (const void *p1, const void *p2)
109 const struct known_derivation *s1 = (const struct known_derivation *) p1;
110 const struct known_derivation *s2 = (const struct known_derivation *) p2;
111 int result;
113 result = strcmp (s1->from, s2->from);
114 if (result == 0)
115 result = strcmp (s1->to, s2->to);
116 return result;
119 /* The search tree for known derivations. */
120 static void *known_derivations;
122 /* Look up whether given transformation was already requested before. */
123 static int
124 internal_function
125 derivation_lookup (const char *fromset, const char *toset,
126 struct __gconv_step **handle, size_t *nsteps)
128 struct known_derivation key = { fromset, toset, NULL, 0 };
129 struct known_derivation **result;
131 result = __tfind (&key, &known_derivations, derivation_compare);
133 if (result == NULL)
134 return __GCONV_NOCONV;
136 *handle = (*result)->steps;
137 *nsteps = (*result)->nsteps;
139 /* Please note that we return GCONV_OK even if the last search for
140 this transformation was unsuccessful. */
141 return __GCONV_OK;
144 /* Add new derivation to list of known ones. */
145 static void
146 internal_function
147 add_derivation (const char *fromset, const char *toset,
148 struct __gconv_step *handle, size_t nsteps)
150 struct known_derivation *new_deriv;
151 size_t fromset_len = strlen (fromset) + 1;
152 size_t toset_len = strlen (toset) + 1;
154 new_deriv = (struct known_derivation *)
155 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
156 if (new_deriv != NULL)
158 new_deriv->from = (char *) (new_deriv + 1);
159 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
160 toset, toset_len);
162 new_deriv->steps = handle;
163 new_deriv->nsteps = nsteps;
165 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
166 == NULL)
167 /* There is some kind of memory allocation problem. */
168 free (new_deriv);
170 /* Please note that we don't complain if the allocation failed. This
171 is not tragically but in case we use the memory debugging facilities
172 not all memory will be freed. */
175 static void __libc_freeres_fn_section
176 free_derivation (void *p)
178 struct known_derivation *deriv = (struct known_derivation *) p;
179 size_t cnt;
181 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
182 if (deriv->steps[cnt].__counter > 0
183 && deriv->steps[cnt].__end_fct != NULL)
185 assert (deriv->steps[cnt].__shlib_handle != NULL);
187 __gconv_end_fct end_fct = deriv->steps[cnt].__end_fct;
188 #ifdef PTR_DEMANGLE
189 PTR_DEMANGLE (end_fct);
190 #endif
191 DL_CALL_FCT (end_fct, (&deriv->steps[cnt]));
194 /* Free the name strings. */
195 if (deriv->steps != NULL)
197 free ((char *) deriv->steps[0].__from_name);
198 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
199 free ((struct __gconv_step *) deriv->steps);
202 free (deriv);
206 /* Decrement the reference count for a single step in a steps array. */
207 void
208 internal_function
209 __gconv_release_step (struct __gconv_step *step)
211 /* Skip builtin modules; they are not reference counted. */
212 if (step->__shlib_handle != NULL && --step->__counter == 0)
214 /* Call the destructor. */
215 if (step->__end_fct != NULL)
217 assert (step->__shlib_handle != NULL);
219 __gconv_end_fct end_fct = step->__end_fct;
220 #ifdef PTR_DEMANGLE
221 PTR_DEMANGLE (end_fct);
222 #endif
223 DL_CALL_FCT (end_fct, (step));
226 #ifndef STATIC_GCONV
227 /* Release the loaded module. */
228 __gconv_release_shlib (step->__shlib_handle);
229 step->__shlib_handle = NULL;
230 #endif
232 else if (step->__shlib_handle == NULL)
233 /* Builtin modules should not have end functions. */
234 assert (step->__end_fct == NULL);
237 static int
238 internal_function
239 gen_steps (struct derivation_step *best, const char *toset,
240 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
242 size_t step_cnt = 0;
243 struct __gconv_step *result;
244 struct derivation_step *current;
245 int status = __GCONV_NOMEM;
247 /* First determine number of steps. */
248 for (current = best; current->last != NULL; current = current->last)
249 ++step_cnt;
251 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
252 * step_cnt);
253 if (result != NULL)
255 int failed = 0;
257 status = __GCONV_OK;
258 *nsteps = step_cnt;
259 current = best;
260 while (step_cnt-- > 0)
262 result[step_cnt].__from_name = (step_cnt == 0
263 ? __strdup (fromset)
264 : (char *)current->last->result_set);
265 result[step_cnt].__to_name = (step_cnt + 1 == *nsteps
266 ? __strdup (current->result_set)
267 : result[step_cnt + 1].__from_name);
269 result[step_cnt].__counter = 1;
270 result[step_cnt].__data = NULL;
272 #ifndef STATIC_GCONV
273 if (current->code->module_name[0] == '/')
275 /* Load the module, return handle for it. */
276 struct __gconv_loaded_object *shlib_handle =
277 __gconv_find_shlib (current->code->module_name);
279 if (shlib_handle == NULL)
281 failed = 1;
282 break;
285 result[step_cnt].__shlib_handle = shlib_handle;
286 result[step_cnt].__modname = shlib_handle->name;
287 result[step_cnt].__fct = shlib_handle->fct;
288 result[step_cnt].__init_fct = shlib_handle->init_fct;
289 result[step_cnt].__end_fct = shlib_handle->end_fct;
291 /* These settings can be overridden by the init function. */
292 result[step_cnt].__btowc_fct = NULL;
294 /* Call the init function. */
295 __gconv_init_fct init_fct = result[step_cnt].__init_fct;
296 if (init_fct != NULL)
298 assert (result[step_cnt].__shlib_handle != NULL);
300 # ifdef PTR_DEMANGLE
301 PTR_DEMANGLE (init_fct);
302 # endif
303 status = DL_CALL_FCT (init_fct, (&result[step_cnt]));
305 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
307 failed = 1;
308 /* Make sure we unload this modules. */
309 --step_cnt;
310 result[step_cnt].__end_fct = NULL;
311 break;
314 # ifdef PTR_MANGLE
315 if (result[step_cnt].__btowc_fct != NULL)
316 PTR_MANGLE (result[step_cnt].__btowc_fct);
317 # endif
320 else
321 #endif
322 /* It's a builtin transformation. */
323 __gconv_get_builtin_trans (current->code->module_name,
324 &result[step_cnt]);
326 current = current->last;
329 if (__builtin_expect (failed, 0) != 0)
331 /* Something went wrong while initializing the modules. */
332 while (++step_cnt < *nsteps)
333 __gconv_release_step (&result[step_cnt]);
334 free (result);
335 *nsteps = 0;
336 *handle = NULL;
337 if (status == __GCONV_OK)
338 status = __GCONV_NOCONV;
340 else
341 *handle = result;
343 else
345 *nsteps = 0;
346 *handle = NULL;
349 return status;
353 #ifndef STATIC_GCONV
354 static int
355 internal_function
356 increment_counter (struct __gconv_step *steps, size_t nsteps)
358 /* Increment the user counter. */
359 size_t cnt = nsteps;
360 int result = __GCONV_OK;
362 while (cnt-- > 0)
364 struct __gconv_step *step = &steps[cnt];
366 if (step->__counter++ == 0)
368 /* Skip builtin modules. */
369 if (step->__modname != NULL)
371 /* Reopen a previously used module. */
372 step->__shlib_handle = __gconv_find_shlib (step->__modname);
373 if (step->__shlib_handle == NULL)
375 /* Oops, this is the second time we use this module
376 (after unloading) and this time loading failed!? */
377 --step->__counter;
378 while (++cnt < nsteps)
379 __gconv_release_step (&steps[cnt]);
380 result = __GCONV_NOCONV;
381 break;
384 /* The function addresses defined by the module may
385 have changed. */
386 step->__fct = step->__shlib_handle->fct;
387 step->__init_fct = step->__shlib_handle->init_fct;
388 step->__end_fct = step->__shlib_handle->end_fct;
390 /* These settings can be overridden by the init function. */
391 step->__btowc_fct = NULL;
394 /* Call the init function. */
395 __gconv_init_fct init_fct = step->__init_fct;
396 if (init_fct != NULL)
398 #ifdef PTR_DEMANGLE
399 PTR_DEMANGLE (init_fct);
400 #endif
401 DL_CALL_FCT (init_fct, (step));
403 #ifdef PTR_MANGLE
404 if (step->__btowc_fct != NULL)
405 PTR_MANGLE (step->__btowc_fct);
406 #endif
410 return result;
412 #endif
415 /* The main function: find a possible derivation from the `fromset' (either
416 the given name or the alias) to the `toset' (again with alias). */
417 static int
418 internal_function
419 find_derivation (const char *toset, const char *toset_expand,
420 const char *fromset, const char *fromset_expand,
421 struct __gconv_step **handle, size_t *nsteps)
423 struct derivation_step *first, *current, **lastp, *solution = NULL;
424 int best_cost_hi = INT_MAX;
425 int best_cost_lo = INT_MAX;
426 int result;
428 /* Look whether an earlier call to `find_derivation' has already
429 computed a possible derivation. If so, return it immediately. */
430 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
431 handle, nsteps);
432 if (result == __GCONV_OK)
434 #ifndef STATIC_GCONV
435 result = increment_counter (*handle, *nsteps);
436 #endif
437 return result;
440 /* The task is to find a sequence of transformations, backed by the
441 existing modules - whether builtin or dynamically loadable -,
442 starting at `fromset' (or `fromset_expand') and ending at `toset'
443 (or `toset_expand'), and with minimal cost.
445 For computer scientists, this is a shortest path search in the
446 graph where the nodes are all possible charsets and the edges are
447 the transformations listed in __gconv_modules_db.
449 For now we use a simple algorithm with quadratic runtime behaviour.
450 A breadth-first search, starting at `fromset' and `fromset_expand'.
451 The list starting at `first' contains all nodes that have been
452 visited up to now, in the order in which they have been visited --
453 excluding the goal nodes `toset' and `toset_expand' which get
454 managed in the list starting at `solution'.
455 `current' walks through the list starting at `first' and looks
456 which nodes are reachable from the current node, adding them to
457 the end of the list [`first' or `solution' respectively] (if
458 they are visited the first time) or updating them in place (if
459 they have have already been visited).
460 In each node of either list, cost_lo and cost_hi contain the
461 minimum cost over any paths found up to now, starting at `fromset'
462 or `fromset_expand', ending at that node. best_cost_lo and
463 best_cost_hi represent the minimum over the elements of the
464 `solution' list. */
466 if (fromset_expand != NULL)
468 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
469 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
470 lastp = &first->next->next;
472 else
474 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
475 lastp = &first->next;
478 for (current = first; current != NULL; current = current->next)
480 /* Now match all the available module specifications against the
481 current charset name. If any of them matches check whether
482 we already have a derivation for this charset. If yes, use the
483 one with the lower costs. Otherwise add the new charset at the
484 end.
486 The module database is organized in a tree form which allows
487 searching for prefixes. So we search for the first entry with a
488 matching prefix and any other matching entry can be found from
489 this place. */
490 struct gconv_module *node;
492 /* Maybe it is not necessary anymore to look for a solution for
493 this entry since the cost is already as high (or higher) as
494 the cost for the best solution so far. */
495 if (current->cost_hi > best_cost_hi
496 || (current->cost_hi == best_cost_hi
497 && current->cost_lo >= best_cost_lo))
498 continue;
500 node = __gconv_modules_db;
501 while (node != NULL)
503 int cmpres = strcmp (current->result_set, node->from_string);
504 if (cmpres == 0)
506 /* Walk through the list of modules with this prefix and
507 try to match the name. */
508 struct gconv_module *runp;
510 /* Check all the modules with this prefix. */
511 runp = node;
514 const char *result_set = (strcmp (runp->to_string, "-") == 0
515 ? (toset_expand ?: toset)
516 : runp->to_string);
517 int cost_hi = runp->cost_hi + current->cost_hi;
518 int cost_lo = runp->cost_lo + current->cost_lo;
519 struct derivation_step *step;
521 /* We managed to find a derivation. First see whether
522 we have reached one of the goal nodes. */
523 if (strcmp (result_set, toset) == 0
524 || (toset_expand != NULL
525 && strcmp (result_set, toset_expand) == 0))
527 /* Append to the `solution' list if there
528 is no entry with this name. */
529 for (step = solution; step != NULL; step = step->next)
530 if (strcmp (result_set, step->result_set) == 0)
531 break;
533 if (step == NULL)
535 step = NEW_STEP (result_set,
536 cost_hi, cost_lo,
537 runp, current);
538 step->next = solution;
539 solution = step;
541 else if (step->cost_hi > cost_hi
542 || (step->cost_hi == cost_hi
543 && step->cost_lo > cost_lo))
545 /* A better path was found for the node,
546 on the `solution' list. */
547 step->code = runp;
548 step->last = current;
549 step->cost_hi = cost_hi;
550 step->cost_lo = cost_lo;
553 /* Update best_cost accordingly. */
554 if (cost_hi < best_cost_hi
555 || (cost_hi == best_cost_hi
556 && cost_lo < best_cost_lo))
558 best_cost_hi = cost_hi;
559 best_cost_lo = cost_lo;
562 else if (cost_hi < best_cost_hi
563 || (cost_hi == best_cost_hi
564 && cost_lo < best_cost_lo))
566 /* Append at the end of the `first' list if there
567 is no entry with this name. */
568 for (step = first; step != NULL; step = step->next)
569 if (strcmp (result_set, step->result_set) == 0)
570 break;
572 if (step == NULL)
574 *lastp = NEW_STEP (result_set,
575 cost_hi, cost_lo,
576 runp, current);
577 lastp = &(*lastp)->next;
579 else if (step->cost_hi > cost_hi
580 || (step->cost_hi == cost_hi
581 && step->cost_lo > cost_lo))
583 /* A better path was found for the node,
584 on the `first' list. */
585 step->code = runp;
586 step->last = current;
588 /* Update the cost for all steps. */
589 for (step = first; step != NULL;
590 step = step->next)
591 /* But don't update the start nodes. */
592 if (step->code != NULL)
594 struct derivation_step *back;
595 int hi, lo;
597 hi = step->code->cost_hi;
598 lo = step->code->cost_lo;
600 for (back = step->last; back->code != NULL;
601 back = back->last)
603 hi += back->code->cost_hi;
604 lo += back->code->cost_lo;
607 step->cost_hi = hi;
608 step->cost_lo = lo;
611 /* Likewise for the nodes on the solution list.
612 Also update best_cost accordingly. */
613 for (step = solution; step != NULL;
614 step = step->next)
616 step->cost_hi = (step->code->cost_hi
617 + step->last->cost_hi);
618 step->cost_lo = (step->code->cost_lo
619 + step->last->cost_lo);
621 if (step->cost_hi < best_cost_hi
622 || (step->cost_hi == best_cost_hi
623 && step->cost_lo < best_cost_lo))
625 best_cost_hi = step->cost_hi;
626 best_cost_lo = step->cost_lo;
632 runp = runp->same;
634 while (runp != NULL);
636 break;
638 else if (cmpres < 0)
639 node = node->left;
640 else
641 node = node->right;
645 if (solution != NULL)
647 /* We really found a way to do the transformation. */
649 /* Choose the best solution. This is easy because we know that
650 the solution list has at most length 2 (one for every possible
651 goal node). */
652 if (solution->next != NULL)
654 struct derivation_step *solution2 = solution->next;
656 if (solution2->cost_hi < solution->cost_hi
657 || (solution2->cost_hi == solution->cost_hi
658 && solution2->cost_lo < solution->cost_lo))
659 solution = solution2;
662 /* Now build a data structure describing the transformation steps. */
663 result = gen_steps (solution, toset_expand ?: toset,
664 fromset_expand ?: fromset, handle, nsteps);
666 else
668 /* We haven't found a transformation. Clear the result values. */
669 *handle = NULL;
670 *nsteps = 0;
673 /* Add result in any case to list of known derivations. */
674 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
675 *handle, *nsteps);
677 return result;
681 /* Control of initialization. */
682 __libc_once_define (static, once);
685 static const char *
686 do_lookup_alias (const char *name)
688 struct gconv_alias key;
689 struct gconv_alias **found;
691 key.fromname = (char *) name;
692 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
693 return found != NULL ? (*found)->toname : NULL;
698 internal_function
699 __gconv_compare_alias (const char *name1, const char *name2)
701 int result;
703 /* Ensure that the configuration data is read. */
704 __libc_once (once, __gconv_read_conf);
706 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
707 result = strcmp (do_lookup_alias (name1) ?: name1,
708 do_lookup_alias (name2) ?: name2);
710 return result;
715 internal_function
716 __gconv_find_transform (const char *toset, const char *fromset,
717 struct __gconv_step **handle, size_t *nsteps,
718 int flags)
720 const char *fromset_expand;
721 const char *toset_expand;
722 int result;
724 /* Ensure that the configuration data is read. */
725 __libc_once (once, __gconv_read_conf);
727 /* Acquire the lock. */
728 __libc_lock_lock (__gconv_lock);
730 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
731 if (result != __GCONV_NODB)
733 /* We have a cache and could resolve the request, successful or not. */
734 __libc_lock_unlock (__gconv_lock);
735 return result;
738 /* If we don't have a module database return with an error. */
739 if (__gconv_modules_db == NULL)
741 __libc_lock_unlock (__gconv_lock);
742 return __GCONV_NOCONV;
745 /* See whether the names are aliases. */
746 fromset_expand = do_lookup_alias (fromset);
747 toset_expand = do_lookup_alias (toset);
749 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
750 /* We are not supposed to create a pseudo transformation (means
751 copying) when the input and output character set are the same. */
752 && (strcmp (toset, fromset) == 0
753 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
754 || (fromset_expand != NULL
755 && (strcmp (toset, fromset_expand) == 0
756 || (toset_expand != NULL
757 && strcmp (toset_expand, fromset_expand) == 0)))))
759 /* Both character sets are the same. */
760 __libc_lock_unlock (__gconv_lock);
761 return __GCONV_NULCONV;
764 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
765 handle, nsteps);
767 /* Release the lock. */
768 __libc_lock_unlock (__gconv_lock);
770 /* The following code is necessary since `find_derivation' will return
771 GCONV_OK even when no derivation was found but the same request
772 was processed before. I.e., negative results will also be cached. */
773 return (result == __GCONV_OK
774 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
775 : result);
779 /* Release the entries of the modules list. */
781 internal_function
782 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
784 int result = __GCONV_OK;
785 size_t cnt;
787 /* Acquire the lock. */
788 __libc_lock_lock (__gconv_lock);
790 #ifndef STATIC_GCONV
791 cnt = nsteps;
792 while (cnt-- > 0)
793 __gconv_release_step (&steps[cnt]);
794 #endif
796 /* If we use the cache we free a bit more since we don't keep any
797 transformation records around, they are cheap enough to
798 recreate. */
799 __gconv_release_cache (steps, nsteps);
801 /* Release the lock. */
802 __libc_lock_unlock (__gconv_lock);
804 return result;
808 /* Free the modules mentioned. */
809 static void
810 internal_function __libc_freeres_fn_section
811 free_modules_db (struct gconv_module *node)
813 if (node->left != NULL)
814 free_modules_db (node->left);
815 if (node->right != NULL)
816 free_modules_db (node->right);
819 struct gconv_module *act = node;
820 node = node->same;
821 if (act->module_name[0] == '/')
822 free (act);
824 while (node != NULL);
828 /* Free all resources if necessary. */
829 libc_freeres_fn (free_mem)
831 /* First free locale memory. This needs to be done before freeing derivations,
832 as ctype cleanup functions dereference steps arrays which we free below. */
833 _nl_locale_subfreeres ();
835 /* finddomain.c has similar problem. */
836 extern void _nl_finddomain_subfreeres (void) attribute_hidden;
837 _nl_finddomain_subfreeres ();
839 if (__gconv_alias_db != NULL)
840 __tdestroy (__gconv_alias_db, free);
842 if (__gconv_modules_db != NULL)
843 free_modules_db (__gconv_modules_db);
845 if (known_derivations != NULL)
846 __tdestroy (known_derivations, free_derivation);