1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
25 #include <sys/param.h>
26 #include <libc-lock.h>
27 #include <locale/localeinfo.h>
30 #include <gconv_int.h>
34 /* Simple data structure for alias mapping. We have two names, `from'
36 void *__gconv_alias_db
;
38 /* Array with available modules. */
39 struct gconv_module
*__gconv_modules_db
;
41 /* We modify global data. */
42 __libc_lock_define_initialized (, __gconv_lock
)
45 /* Provide access to module database. */
47 __gconv_get_modules_db (void)
49 return __gconv_modules_db
;
53 __gconv_get_alias_db (void)
55 return __gconv_alias_db
;
59 /* Function for searching alias. */
61 __gconv_alias_compare (const void *p1
, const void *p2
)
63 const struct gconv_alias
*s1
= (const struct gconv_alias
*) p1
;
64 const struct gconv_alias
*s2
= (const struct gconv_alias
*) p2
;
65 return strcmp (s1
->fromname
, s2
->fromname
);
69 /* To search for a derivation we create a list of intermediate steps.
70 Each element contains a pointer to the element which precedes it
71 in the derivation order. */
72 struct derivation_step
74 const char *result_set
;
75 size_t result_set_len
;
78 struct gconv_module
*code
;
79 struct derivation_step
*last
;
80 struct derivation_step
*next
;
83 #define NEW_STEP(result, hi, lo, module, last_mod) \
84 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
85 newp->result_set = result; \
86 newp->result_set_len = strlen (result); \
89 newp->code = module; \
90 newp->last = last_mod; \
95 /* If a specific transformation is used more than once we should not need
96 to start looking for it again. Instead cache each successful result. */
97 struct known_derivation
101 struct __gconv_step
*steps
;
105 /* Compare function for database of found derivations. */
107 derivation_compare (const void *p1
, const void *p2
)
109 const struct known_derivation
*s1
= (const struct known_derivation
*) p1
;
110 const struct known_derivation
*s2
= (const struct known_derivation
*) p2
;
113 result
= strcmp (s1
->from
, s2
->from
);
115 result
= strcmp (s1
->to
, s2
->to
);
119 /* The search tree for known derivations. */
120 static void *known_derivations
;
122 /* Look up whether given transformation was already requested before. */
125 derivation_lookup (const char *fromset
, const char *toset
,
126 struct __gconv_step
**handle
, size_t *nsteps
)
128 struct known_derivation key
= { fromset
, toset
, NULL
, 0 };
129 struct known_derivation
**result
;
131 result
= __tfind (&key
, &known_derivations
, derivation_compare
);
134 return __GCONV_NOCONV
;
136 *handle
= (*result
)->steps
;
137 *nsteps
= (*result
)->nsteps
;
139 /* Please note that we return GCONV_OK even if the last search for
140 this transformation was unsuccessful. */
144 /* Add new derivation to list of known ones. */
147 add_derivation (const char *fromset
, const char *toset
,
148 struct __gconv_step
*handle
, size_t nsteps
)
150 struct known_derivation
*new_deriv
;
151 size_t fromset_len
= strlen (fromset
) + 1;
152 size_t toset_len
= strlen (toset
) + 1;
154 new_deriv
= (struct known_derivation
*)
155 malloc (sizeof (struct known_derivation
) + fromset_len
+ toset_len
);
156 if (new_deriv
!= NULL
)
158 new_deriv
->from
= (char *) (new_deriv
+ 1);
159 new_deriv
->to
= memcpy (__mempcpy (new_deriv
+ 1, fromset
, fromset_len
),
162 new_deriv
->steps
= handle
;
163 new_deriv
->nsteps
= nsteps
;
165 if (__tsearch (new_deriv
, &known_derivations
, derivation_compare
)
167 /* There is some kind of memory allocation problem. */
170 /* Please note that we don't complain if the allocation failed. This
171 is not tragically but in case we use the memory debugging facilities
172 not all memory will be freed. */
175 static void __libc_freeres_fn_section
176 free_derivation (void *p
)
178 struct known_derivation
*deriv
= (struct known_derivation
*) p
;
181 for (cnt
= 0; cnt
< deriv
->nsteps
; ++cnt
)
182 if (deriv
->steps
[cnt
].__counter
> 0
183 && deriv
->steps
[cnt
].__end_fct
!= NULL
)
185 assert (deriv
->steps
[cnt
].__shlib_handle
!= NULL
);
187 __gconv_end_fct end_fct
= deriv
->steps
[cnt
].__end_fct
;
189 PTR_DEMANGLE (end_fct
);
191 DL_CALL_FCT (end_fct
, (&deriv
->steps
[cnt
]));
194 /* Free the name strings. */
195 if (deriv
->steps
!= NULL
)
197 free ((char *) deriv
->steps
[0].__from_name
);
198 free ((char *) deriv
->steps
[deriv
->nsteps
- 1].__to_name
);
199 free ((struct __gconv_step
*) deriv
->steps
);
206 /* Decrement the reference count for a single step in a steps array. */
209 __gconv_release_step (struct __gconv_step
*step
)
211 /* Skip builtin modules; they are not reference counted. */
212 if (step
->__shlib_handle
!= NULL
&& --step
->__counter
== 0)
214 /* Call the destructor. */
215 if (step
->__end_fct
!= NULL
)
217 assert (step
->__shlib_handle
!= NULL
);
219 __gconv_end_fct end_fct
= step
->__end_fct
;
221 PTR_DEMANGLE (end_fct
);
223 DL_CALL_FCT (end_fct
, (step
));
227 /* Release the loaded module. */
228 __gconv_release_shlib (step
->__shlib_handle
);
229 step
->__shlib_handle
= NULL
;
232 else if (step
->__shlib_handle
== NULL
)
233 /* Builtin modules should not have end functions. */
234 assert (step
->__end_fct
== NULL
);
239 gen_steps (struct derivation_step
*best
, const char *toset
,
240 const char *fromset
, struct __gconv_step
**handle
, size_t *nsteps
)
243 struct __gconv_step
*result
;
244 struct derivation_step
*current
;
245 int status
= __GCONV_NOMEM
;
246 char *from_name
= NULL
;
247 char *to_name
= NULL
;
249 /* First determine number of steps. */
250 for (current
= best
; current
->last
!= NULL
; current
= current
->last
)
253 result
= (struct __gconv_step
*) malloc (sizeof (struct __gconv_step
)
262 while (step_cnt
-- > 0)
266 result
[step_cnt
].__from_name
= from_name
= __strdup (fromset
);
267 if (from_name
== NULL
)
274 result
[step_cnt
].__from_name
= (char *)current
->last
->result_set
;
276 if (step_cnt
+ 1 == *nsteps
)
278 result
[step_cnt
].__to_name
= to_name
279 = __strdup (current
->result_set
);
287 result
[step_cnt
].__to_name
= result
[step_cnt
+ 1].__from_name
;
289 result
[step_cnt
].__counter
= 1;
290 result
[step_cnt
].__data
= NULL
;
293 if (current
->code
->module_name
[0] == '/')
295 /* Load the module, return handle for it. */
296 struct __gconv_loaded_object
*shlib_handle
=
297 __gconv_find_shlib (current
->code
->module_name
);
299 if (shlib_handle
== NULL
)
305 result
[step_cnt
].__shlib_handle
= shlib_handle
;
306 result
[step_cnt
].__modname
= shlib_handle
->name
;
307 result
[step_cnt
].__fct
= shlib_handle
->fct
;
308 result
[step_cnt
].__init_fct
= shlib_handle
->init_fct
;
309 result
[step_cnt
].__end_fct
= shlib_handle
->end_fct
;
311 /* These settings can be overridden by the init function. */
312 result
[step_cnt
].__btowc_fct
= NULL
;
314 /* Call the init function. */
315 __gconv_init_fct init_fct
= result
[step_cnt
].__init_fct
;
316 if (init_fct
!= NULL
)
318 assert (result
[step_cnt
].__shlib_handle
!= NULL
);
321 PTR_DEMANGLE (init_fct
);
323 status
= DL_CALL_FCT (init_fct
, (&result
[step_cnt
]));
325 if (__builtin_expect (status
, __GCONV_OK
) != __GCONV_OK
)
328 /* Make sure we unload this modules. */
330 result
[step_cnt
].__end_fct
= NULL
;
335 if (result
[step_cnt
].__btowc_fct
!= NULL
)
336 PTR_MANGLE (result
[step_cnt
].__btowc_fct
);
342 /* It's a builtin transformation. */
343 __gconv_get_builtin_trans (current
->code
->module_name
,
346 current
= current
->last
;
349 if (__builtin_expect (failed
, 0) != 0)
351 /* Something went wrong while initializing the modules. */
352 while (++step_cnt
< *nsteps
)
353 __gconv_release_step (&result
[step_cnt
]);
359 if (status
== __GCONV_OK
)
360 status
= __GCONV_NOCONV
;
378 increment_counter (struct __gconv_step
*steps
, size_t nsteps
)
380 /* Increment the user counter. */
382 int result
= __GCONV_OK
;
386 struct __gconv_step
*step
= &steps
[cnt
];
388 if (step
->__counter
++ == 0)
390 /* Skip builtin modules. */
391 if (step
->__modname
!= NULL
)
393 /* Reopen a previously used module. */
394 step
->__shlib_handle
= __gconv_find_shlib (step
->__modname
);
395 if (step
->__shlib_handle
== NULL
)
397 /* Oops, this is the second time we use this module
398 (after unloading) and this time loading failed!? */
400 while (++cnt
< nsteps
)
401 __gconv_release_step (&steps
[cnt
]);
402 result
= __GCONV_NOCONV
;
406 /* The function addresses defined by the module may
408 step
->__fct
= step
->__shlib_handle
->fct
;
409 step
->__init_fct
= step
->__shlib_handle
->init_fct
;
410 step
->__end_fct
= step
->__shlib_handle
->end_fct
;
412 /* These settings can be overridden by the init function. */
413 step
->__btowc_fct
= NULL
;
416 /* Call the init function. */
417 __gconv_init_fct init_fct
= step
->__init_fct
;
418 if (init_fct
!= NULL
)
421 PTR_DEMANGLE (init_fct
);
423 DL_CALL_FCT (init_fct
, (step
));
426 if (step
->__btowc_fct
!= NULL
)
427 PTR_MANGLE (step
->__btowc_fct
);
437 /* The main function: find a possible derivation from the `fromset' (either
438 the given name or the alias) to the `toset' (again with alias). */
441 find_derivation (const char *toset
, const char *toset_expand
,
442 const char *fromset
, const char *fromset_expand
,
443 struct __gconv_step
**handle
, size_t *nsteps
)
445 struct derivation_step
*first
, *current
, **lastp
, *solution
= NULL
;
446 int best_cost_hi
= INT_MAX
;
447 int best_cost_lo
= INT_MAX
;
450 /* Look whether an earlier call to `find_derivation' has already
451 computed a possible derivation. If so, return it immediately. */
452 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
454 if (result
== __GCONV_OK
)
457 result
= increment_counter (*handle
, *nsteps
);
462 /* The task is to find a sequence of transformations, backed by the
463 existing modules - whether builtin or dynamically loadable -,
464 starting at `fromset' (or `fromset_expand') and ending at `toset'
465 (or `toset_expand'), and with minimal cost.
467 For computer scientists, this is a shortest path search in the
468 graph where the nodes are all possible charsets and the edges are
469 the transformations listed in __gconv_modules_db.
471 For now we use a simple algorithm with quadratic runtime behaviour.
472 A breadth-first search, starting at `fromset' and `fromset_expand'.
473 The list starting at `first' contains all nodes that have been
474 visited up to now, in the order in which they have been visited --
475 excluding the goal nodes `toset' and `toset_expand' which get
476 managed in the list starting at `solution'.
477 `current' walks through the list starting at `first' and looks
478 which nodes are reachable from the current node, adding them to
479 the end of the list [`first' or `solution' respectively] (if
480 they are visited the first time) or updating them in place (if
481 they have have already been visited).
482 In each node of either list, cost_lo and cost_hi contain the
483 minimum cost over any paths found up to now, starting at `fromset'
484 or `fromset_expand', ending at that node. best_cost_lo and
485 best_cost_hi represent the minimum over the elements of the
488 if (fromset_expand
!= NULL
)
490 first
= NEW_STEP (fromset_expand
, 0, 0, NULL
, NULL
);
491 first
->next
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
492 lastp
= &first
->next
->next
;
496 first
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
497 lastp
= &first
->next
;
500 for (current
= first
; current
!= NULL
; current
= current
->next
)
502 /* Now match all the available module specifications against the
503 current charset name. If any of them matches check whether
504 we already have a derivation for this charset. If yes, use the
505 one with the lower costs. Otherwise add the new charset at the
508 The module database is organized in a tree form which allows
509 searching for prefixes. So we search for the first entry with a
510 matching prefix and any other matching entry can be found from
512 struct gconv_module
*node
;
514 /* Maybe it is not necessary anymore to look for a solution for
515 this entry since the cost is already as high (or higher) as
516 the cost for the best solution so far. */
517 if (current
->cost_hi
> best_cost_hi
518 || (current
->cost_hi
== best_cost_hi
519 && current
->cost_lo
>= best_cost_lo
))
522 node
= __gconv_modules_db
;
525 int cmpres
= strcmp (current
->result_set
, node
->from_string
);
528 /* Walk through the list of modules with this prefix and
529 try to match the name. */
530 struct gconv_module
*runp
;
532 /* Check all the modules with this prefix. */
536 const char *result_set
= (strcmp (runp
->to_string
, "-") == 0
537 ? (toset_expand
?: toset
)
539 int cost_hi
= runp
->cost_hi
+ current
->cost_hi
;
540 int cost_lo
= runp
->cost_lo
+ current
->cost_lo
;
541 struct derivation_step
*step
;
543 /* We managed to find a derivation. First see whether
544 we have reached one of the goal nodes. */
545 if (strcmp (result_set
, toset
) == 0
546 || (toset_expand
!= NULL
547 && strcmp (result_set
, toset_expand
) == 0))
549 /* Append to the `solution' list if there
550 is no entry with this name. */
551 for (step
= solution
; step
!= NULL
; step
= step
->next
)
552 if (strcmp (result_set
, step
->result_set
) == 0)
557 step
= NEW_STEP (result_set
,
560 step
->next
= solution
;
563 else if (step
->cost_hi
> cost_hi
564 || (step
->cost_hi
== cost_hi
565 && step
->cost_lo
> cost_lo
))
567 /* A better path was found for the node,
568 on the `solution' list. */
570 step
->last
= current
;
571 step
->cost_hi
= cost_hi
;
572 step
->cost_lo
= cost_lo
;
575 /* Update best_cost accordingly. */
576 if (cost_hi
< best_cost_hi
577 || (cost_hi
== best_cost_hi
578 && cost_lo
< best_cost_lo
))
580 best_cost_hi
= cost_hi
;
581 best_cost_lo
= cost_lo
;
584 else if (cost_hi
< best_cost_hi
585 || (cost_hi
== best_cost_hi
586 && cost_lo
< best_cost_lo
))
588 /* Append at the end of the `first' list if there
589 is no entry with this name. */
590 for (step
= first
; step
!= NULL
; step
= step
->next
)
591 if (strcmp (result_set
, step
->result_set
) == 0)
596 *lastp
= NEW_STEP (result_set
,
599 lastp
= &(*lastp
)->next
;
601 else if (step
->cost_hi
> cost_hi
602 || (step
->cost_hi
== cost_hi
603 && step
->cost_lo
> cost_lo
))
605 /* A better path was found for the node,
606 on the `first' list. */
608 step
->last
= current
;
610 /* Update the cost for all steps. */
611 for (step
= first
; step
!= NULL
;
613 /* But don't update the start nodes. */
614 if (step
->code
!= NULL
)
616 struct derivation_step
*back
;
619 hi
= step
->code
->cost_hi
;
620 lo
= step
->code
->cost_lo
;
622 for (back
= step
->last
; back
->code
!= NULL
;
625 hi
+= back
->code
->cost_hi
;
626 lo
+= back
->code
->cost_lo
;
633 /* Likewise for the nodes on the solution list.
634 Also update best_cost accordingly. */
635 for (step
= solution
; step
!= NULL
;
638 step
->cost_hi
= (step
->code
->cost_hi
639 + step
->last
->cost_hi
);
640 step
->cost_lo
= (step
->code
->cost_lo
641 + step
->last
->cost_lo
);
643 if (step
->cost_hi
< best_cost_hi
644 || (step
->cost_hi
== best_cost_hi
645 && step
->cost_lo
< best_cost_lo
))
647 best_cost_hi
= step
->cost_hi
;
648 best_cost_lo
= step
->cost_lo
;
656 while (runp
!= NULL
);
667 if (solution
!= NULL
)
669 /* We really found a way to do the transformation. */
671 /* Choose the best solution. This is easy because we know that
672 the solution list has at most length 2 (one for every possible
674 if (solution
->next
!= NULL
)
676 struct derivation_step
*solution2
= solution
->next
;
678 if (solution2
->cost_hi
< solution
->cost_hi
679 || (solution2
->cost_hi
== solution
->cost_hi
680 && solution2
->cost_lo
< solution
->cost_lo
))
681 solution
= solution2
;
684 /* Now build a data structure describing the transformation steps. */
685 result
= gen_steps (solution
, toset_expand
?: toset
,
686 fromset_expand
?: fromset
, handle
, nsteps
);
690 /* We haven't found a transformation. Clear the result values. */
695 /* Add result in any case to list of known derivations. */
696 add_derivation (fromset_expand
?: fromset
, toset_expand
?: toset
,
703 /* Control of initialization. */
704 __libc_once_define (static, once
);
708 do_lookup_alias (const char *name
)
710 struct gconv_alias key
;
711 struct gconv_alias
**found
;
713 key
.fromname
= (char *) name
;
714 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
715 return found
!= NULL
? (*found
)->toname
: NULL
;
721 __gconv_compare_alias (const char *name1
, const char *name2
)
725 /* Ensure that the configuration data is read. */
726 __libc_once (once
, __gconv_read_conf
);
728 if (__gconv_compare_alias_cache (name1
, name2
, &result
) != 0)
729 result
= strcmp (do_lookup_alias (name1
) ?: name1
,
730 do_lookup_alias (name2
) ?: name2
);
738 __gconv_find_transform (const char *toset
, const char *fromset
,
739 struct __gconv_step
**handle
, size_t *nsteps
,
742 const char *fromset_expand
;
743 const char *toset_expand
;
746 /* Ensure that the configuration data is read. */
747 __libc_once (once
, __gconv_read_conf
);
749 /* Acquire the lock. */
750 __libc_lock_lock (__gconv_lock
);
752 result
= __gconv_lookup_cache (toset
, fromset
, handle
, nsteps
, flags
);
753 if (result
!= __GCONV_NODB
)
755 /* We have a cache and could resolve the request, successful or not. */
756 __libc_lock_unlock (__gconv_lock
);
760 /* If we don't have a module database return with an error. */
761 if (__gconv_modules_db
== NULL
)
763 __libc_lock_unlock (__gconv_lock
);
764 return __GCONV_NOCONV
;
767 /* See whether the names are aliases. */
768 fromset_expand
= do_lookup_alias (fromset
);
769 toset_expand
= do_lookup_alias (toset
);
771 if (__builtin_expect (flags
& GCONV_AVOID_NOCONV
, 0)
772 /* We are not supposed to create a pseudo transformation (means
773 copying) when the input and output character set are the same. */
774 && (strcmp (toset
, fromset
) == 0
775 || (toset_expand
!= NULL
&& strcmp (toset_expand
, fromset
) == 0)
776 || (fromset_expand
!= NULL
777 && (strcmp (toset
, fromset_expand
) == 0
778 || (toset_expand
!= NULL
779 && strcmp (toset_expand
, fromset_expand
) == 0)))))
781 /* Both character sets are the same. */
782 __libc_lock_unlock (__gconv_lock
);
783 return __GCONV_NULCONV
;
786 result
= find_derivation (toset
, toset_expand
, fromset
, fromset_expand
,
789 /* Release the lock. */
790 __libc_lock_unlock (__gconv_lock
);
792 /* The following code is necessary since `find_derivation' will return
793 GCONV_OK even when no derivation was found but the same request
794 was processed before. I.e., negative results will also be cached. */
795 return (result
== __GCONV_OK
796 ? (*handle
== NULL
? __GCONV_NOCONV
: __GCONV_OK
)
801 /* Release the entries of the modules list. */
804 __gconv_close_transform (struct __gconv_step
*steps
, size_t nsteps
)
806 int result
= __GCONV_OK
;
809 /* Acquire the lock. */
810 __libc_lock_lock (__gconv_lock
);
815 __gconv_release_step (&steps
[cnt
]);
818 /* If we use the cache we free a bit more since we don't keep any
819 transformation records around, they are cheap enough to
821 __gconv_release_cache (steps
, nsteps
);
823 /* Release the lock. */
824 __libc_lock_unlock (__gconv_lock
);
830 /* Free the modules mentioned. */
832 internal_function __libc_freeres_fn_section
833 free_modules_db (struct gconv_module
*node
)
835 if (node
->left
!= NULL
)
836 free_modules_db (node
->left
);
837 if (node
->right
!= NULL
)
838 free_modules_db (node
->right
);
841 struct gconv_module
*act
= node
;
843 if (act
->module_name
[0] == '/')
846 while (node
!= NULL
);
850 /* Free all resources if necessary. */
851 libc_freeres_fn (free_mem
)
853 /* First free locale memory. This needs to be done before freeing
854 derivations, as ctype cleanup functions dereference steps arrays which we
856 _nl_locale_subfreeres ();
858 /* finddomain.c has similar problem. */
859 extern void _nl_finddomain_subfreeres (void) attribute_hidden
;
860 _nl_finddomain_subfreeres ();
862 if (__gconv_alias_db
!= NULL
)
863 __tdestroy (__gconv_alias_db
, free
);
865 if (__gconv_modules_db
!= NULL
)
866 free_modules_db (__gconv_modules_db
);
868 if (known_derivations
!= NULL
)
869 __tdestroy (known_derivations
, free_derivation
);