1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2012 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
27 #include <locale/localeinfo.h>
30 #include <gconv_int.h>
34 /* Simple data structure for alias mapping. We have two names, `from'
36 void *__gconv_alias_db
;
38 /* Array with available modules. */
39 struct gconv_module
*__gconv_modules_db
;
41 /* We modify global data. */
42 __libc_lock_define_initialized (, __gconv_lock
)
45 /* Provide access to module database. */
47 __gconv_get_modules_db (void)
49 return __gconv_modules_db
;
53 __gconv_get_alias_db (void)
55 return __gconv_alias_db
;
59 /* Function for searching alias. */
61 __gconv_alias_compare (const void *p1
, const void *p2
)
63 const struct gconv_alias
*s1
= (const struct gconv_alias
*) p1
;
64 const struct gconv_alias
*s2
= (const struct gconv_alias
*) p2
;
65 return strcmp (s1
->fromname
, s2
->fromname
);
69 /* To search for a derivation we create a list of intermediate steps.
70 Each element contains a pointer to the element which precedes it
71 in the derivation order. */
72 struct derivation_step
74 const char *result_set
;
75 size_t result_set_len
;
78 struct gconv_module
*code
;
79 struct derivation_step
*last
;
80 struct derivation_step
*next
;
83 #define NEW_STEP(result, hi, lo, module, last_mod) \
84 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
85 newp->result_set = result; \
86 newp->result_set_len = strlen (result); \
89 newp->code = module; \
90 newp->last = last_mod; \
95 /* If a specific transformation is used more than once we should not need
96 to start looking for it again. Instead cache each successful result. */
97 struct known_derivation
101 struct __gconv_step
*steps
;
105 /* Compare function for database of found derivations. */
107 derivation_compare (const void *p1
, const void *p2
)
109 const struct known_derivation
*s1
= (const struct known_derivation
*) p1
;
110 const struct known_derivation
*s2
= (const struct known_derivation
*) p2
;
113 result
= strcmp (s1
->from
, s2
->from
);
115 result
= strcmp (s1
->to
, s2
->to
);
119 /* The search tree for known derivations. */
120 static void *known_derivations
;
122 /* Look up whether given transformation was already requested before. */
125 derivation_lookup (const char *fromset
, const char *toset
,
126 struct __gconv_step
**handle
, size_t *nsteps
)
128 struct known_derivation key
= { fromset
, toset
, NULL
, 0 };
129 struct known_derivation
**result
;
131 result
= __tfind (&key
, &known_derivations
, derivation_compare
);
134 return __GCONV_NOCONV
;
136 *handle
= (*result
)->steps
;
137 *nsteps
= (*result
)->nsteps
;
139 /* Please note that we return GCONV_OK even if the last search for
140 this transformation was unsuccessful. */
144 /* Add new derivation to list of known ones. */
147 add_derivation (const char *fromset
, const char *toset
,
148 struct __gconv_step
*handle
, size_t nsteps
)
150 struct known_derivation
*new_deriv
;
151 size_t fromset_len
= strlen (fromset
) + 1;
152 size_t toset_len
= strlen (toset
) + 1;
154 new_deriv
= (struct known_derivation
*)
155 malloc (sizeof (struct known_derivation
) + fromset_len
+ toset_len
);
156 if (new_deriv
!= NULL
)
158 new_deriv
->from
= (char *) (new_deriv
+ 1);
159 new_deriv
->to
= memcpy (__mempcpy (new_deriv
+ 1, fromset
, fromset_len
),
162 new_deriv
->steps
= handle
;
163 new_deriv
->nsteps
= nsteps
;
165 if (__tsearch (new_deriv
, &known_derivations
, derivation_compare
)
167 /* There is some kind of memory allocation problem. */
170 /* Please note that we don't complain if the allocation failed. This
171 is not tragically but in case we use the memory debugging facilities
172 not all memory will be freed. */
175 static void __libc_freeres_fn_section
176 free_derivation (void *p
)
178 struct known_derivation
*deriv
= (struct known_derivation
*) p
;
181 for (cnt
= 0; cnt
< deriv
->nsteps
; ++cnt
)
182 if (deriv
->steps
[cnt
].__counter
> 0
183 && deriv
->steps
[cnt
].__end_fct
!= NULL
)
185 assert (deriv
->steps
[cnt
].__shlib_handle
!= NULL
);
187 __gconv_end_fct end_fct
= deriv
->steps
[cnt
].__end_fct
;
189 PTR_DEMANGLE (end_fct
);
191 DL_CALL_FCT (end_fct
, (&deriv
->steps
[cnt
]));
194 /* Free the name strings. */
195 if (deriv
->steps
!= NULL
)
197 free ((char *) deriv
->steps
[0].__from_name
);
198 free ((char *) deriv
->steps
[deriv
->nsteps
- 1].__to_name
);
199 free ((struct __gconv_step
*) deriv
->steps
);
206 /* Decrement the reference count for a single step in a steps array. */
209 __gconv_release_step (struct __gconv_step
*step
)
211 /* Skip builtin modules; they are not reference counted. */
212 if (step
->__shlib_handle
!= NULL
&& --step
->__counter
== 0)
214 /* Call the destructor. */
215 if (step
->__end_fct
!= NULL
)
217 assert (step
->__shlib_handle
!= NULL
);
219 __gconv_end_fct end_fct
= step
->__end_fct
;
221 PTR_DEMANGLE (end_fct
);
223 DL_CALL_FCT (end_fct
, (step
));
227 /* Release the loaded module. */
228 __gconv_release_shlib (step
->__shlib_handle
);
229 step
->__shlib_handle
= NULL
;
232 else if (step
->__shlib_handle
== NULL
)
233 /* Builtin modules should not have end functions. */
234 assert (step
->__end_fct
== NULL
);
239 gen_steps (struct derivation_step
*best
, const char *toset
,
240 const char *fromset
, struct __gconv_step
**handle
, size_t *nsteps
)
243 struct __gconv_step
*result
;
244 struct derivation_step
*current
;
245 int status
= __GCONV_NOMEM
;
247 /* First determine number of steps. */
248 for (current
= best
; current
->last
!= NULL
; current
= current
->last
)
251 result
= (struct __gconv_step
*) malloc (sizeof (struct __gconv_step
)
260 while (step_cnt
-- > 0)
262 result
[step_cnt
].__from_name
= (step_cnt
== 0
264 : (char *)current
->last
->result_set
);
265 result
[step_cnt
].__to_name
= (step_cnt
+ 1 == *nsteps
266 ? __strdup (current
->result_set
)
267 : result
[step_cnt
+ 1].__from_name
);
269 result
[step_cnt
].__counter
= 1;
270 result
[step_cnt
].__data
= NULL
;
273 if (current
->code
->module_name
[0] == '/')
275 /* Load the module, return handle for it. */
276 struct __gconv_loaded_object
*shlib_handle
=
277 __gconv_find_shlib (current
->code
->module_name
);
279 if (shlib_handle
== NULL
)
285 result
[step_cnt
].__shlib_handle
= shlib_handle
;
286 result
[step_cnt
].__modname
= shlib_handle
->name
;
287 result
[step_cnt
].__fct
= shlib_handle
->fct
;
288 result
[step_cnt
].__init_fct
= shlib_handle
->init_fct
;
289 result
[step_cnt
].__end_fct
= shlib_handle
->end_fct
;
291 /* These settings can be overridden by the init function. */
292 result
[step_cnt
].__btowc_fct
= NULL
;
294 /* Call the init function. */
295 __gconv_init_fct init_fct
= result
[step_cnt
].__init_fct
;
296 if (init_fct
!= NULL
)
298 assert (result
[step_cnt
].__shlib_handle
!= NULL
);
301 PTR_DEMANGLE (init_fct
);
303 status
= DL_CALL_FCT (init_fct
, (&result
[step_cnt
]));
305 if (__builtin_expect (status
, __GCONV_OK
) != __GCONV_OK
)
308 /* Make sure we unload this modules. */
310 result
[step_cnt
].__end_fct
= NULL
;
315 if (result
[step_cnt
].__btowc_fct
!= NULL
)
316 PTR_MANGLE (result
[step_cnt
].__btowc_fct
);
322 /* It's a builtin transformation. */
323 __gconv_get_builtin_trans (current
->code
->module_name
,
326 current
= current
->last
;
329 if (__builtin_expect (failed
, 0) != 0)
331 /* Something went wrong while initializing the modules. */
332 while (++step_cnt
< *nsteps
)
333 __gconv_release_step (&result
[step_cnt
]);
337 if (status
== __GCONV_OK
)
338 status
= __GCONV_NOCONV
;
356 increment_counter (struct __gconv_step
*steps
, size_t nsteps
)
358 /* Increment the user counter. */
360 int result
= __GCONV_OK
;
364 struct __gconv_step
*step
= &steps
[cnt
];
366 if (step
->__counter
++ == 0)
368 /* Skip builtin modules. */
369 if (step
->__modname
!= NULL
)
371 /* Reopen a previously used module. */
372 step
->__shlib_handle
= __gconv_find_shlib (step
->__modname
);
373 if (step
->__shlib_handle
== NULL
)
375 /* Oops, this is the second time we use this module
376 (after unloading) and this time loading failed!? */
378 while (++cnt
< nsteps
)
379 __gconv_release_step (&steps
[cnt
]);
380 result
= __GCONV_NOCONV
;
384 /* The function addresses defined by the module may
386 step
->__fct
= step
->__shlib_handle
->fct
;
387 step
->__init_fct
= step
->__shlib_handle
->init_fct
;
388 step
->__end_fct
= step
->__shlib_handle
->end_fct
;
390 /* These settings can be overridden by the init function. */
391 step
->__btowc_fct
= NULL
;
394 /* Call the init function. */
395 __gconv_init_fct init_fct
= step
->__init_fct
;
396 if (init_fct
!= NULL
)
399 PTR_DEMANGLE (init_fct
);
401 DL_CALL_FCT (init_fct
, (step
));
404 if (step
->__btowc_fct
!= NULL
)
405 PTR_MANGLE (step
->__btowc_fct
);
415 /* The main function: find a possible derivation from the `fromset' (either
416 the given name or the alias) to the `toset' (again with alias). */
419 find_derivation (const char *toset
, const char *toset_expand
,
420 const char *fromset
, const char *fromset_expand
,
421 struct __gconv_step
**handle
, size_t *nsteps
)
423 struct derivation_step
*first
, *current
, **lastp
, *solution
= NULL
;
424 int best_cost_hi
= INT_MAX
;
425 int best_cost_lo
= INT_MAX
;
428 /* Look whether an earlier call to `find_derivation' has already
429 computed a possible derivation. If so, return it immediately. */
430 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
432 if (result
== __GCONV_OK
)
435 result
= increment_counter (*handle
, *nsteps
);
440 /* The task is to find a sequence of transformations, backed by the
441 existing modules - whether builtin or dynamically loadable -,
442 starting at `fromset' (or `fromset_expand') and ending at `toset'
443 (or `toset_expand'), and with minimal cost.
445 For computer scientists, this is a shortest path search in the
446 graph where the nodes are all possible charsets and the edges are
447 the transformations listed in __gconv_modules_db.
449 For now we use a simple algorithm with quadratic runtime behaviour.
450 A breadth-first search, starting at `fromset' and `fromset_expand'.
451 The list starting at `first' contains all nodes that have been
452 visited up to now, in the order in which they have been visited --
453 excluding the goal nodes `toset' and `toset_expand' which get
454 managed in the list starting at `solution'.
455 `current' walks through the list starting at `first' and looks
456 which nodes are reachable from the current node, adding them to
457 the end of the list [`first' or `solution' respectively] (if
458 they are visited the first time) or updating them in place (if
459 they have have already been visited).
460 In each node of either list, cost_lo and cost_hi contain the
461 minimum cost over any paths found up to now, starting at `fromset'
462 or `fromset_expand', ending at that node. best_cost_lo and
463 best_cost_hi represent the minimum over the elements of the
466 if (fromset_expand
!= NULL
)
468 first
= NEW_STEP (fromset_expand
, 0, 0, NULL
, NULL
);
469 first
->next
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
470 lastp
= &first
->next
->next
;
474 first
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
475 lastp
= &first
->next
;
478 for (current
= first
; current
!= NULL
; current
= current
->next
)
480 /* Now match all the available module specifications against the
481 current charset name. If any of them matches check whether
482 we already have a derivation for this charset. If yes, use the
483 one with the lower costs. Otherwise add the new charset at the
486 The module database is organized in a tree form which allows
487 searching for prefixes. So we search for the first entry with a
488 matching prefix and any other matching entry can be found from
490 struct gconv_module
*node
;
492 /* Maybe it is not necessary anymore to look for a solution for
493 this entry since the cost is already as high (or higher) as
494 the cost for the best solution so far. */
495 if (current
->cost_hi
> best_cost_hi
496 || (current
->cost_hi
== best_cost_hi
497 && current
->cost_lo
>= best_cost_lo
))
500 node
= __gconv_modules_db
;
503 int cmpres
= strcmp (current
->result_set
, node
->from_string
);
506 /* Walk through the list of modules with this prefix and
507 try to match the name. */
508 struct gconv_module
*runp
;
510 /* Check all the modules with this prefix. */
514 const char *result_set
= (strcmp (runp
->to_string
, "-") == 0
515 ? (toset_expand
?: toset
)
517 int cost_hi
= runp
->cost_hi
+ current
->cost_hi
;
518 int cost_lo
= runp
->cost_lo
+ current
->cost_lo
;
519 struct derivation_step
*step
;
521 /* We managed to find a derivation. First see whether
522 we have reached one of the goal nodes. */
523 if (strcmp (result_set
, toset
) == 0
524 || (toset_expand
!= NULL
525 && strcmp (result_set
, toset_expand
) == 0))
527 /* Append to the `solution' list if there
528 is no entry with this name. */
529 for (step
= solution
; step
!= NULL
; step
= step
->next
)
530 if (strcmp (result_set
, step
->result_set
) == 0)
535 step
= NEW_STEP (result_set
,
538 step
->next
= solution
;
541 else if (step
->cost_hi
> cost_hi
542 || (step
->cost_hi
== cost_hi
543 && step
->cost_lo
> cost_lo
))
545 /* A better path was found for the node,
546 on the `solution' list. */
548 step
->last
= current
;
549 step
->cost_hi
= cost_hi
;
550 step
->cost_lo
= cost_lo
;
553 /* Update best_cost accordingly. */
554 if (cost_hi
< best_cost_hi
555 || (cost_hi
== best_cost_hi
556 && cost_lo
< best_cost_lo
))
558 best_cost_hi
= cost_hi
;
559 best_cost_lo
= cost_lo
;
562 else if (cost_hi
< best_cost_hi
563 || (cost_hi
== best_cost_hi
564 && cost_lo
< best_cost_lo
))
566 /* Append at the end of the `first' list if there
567 is no entry with this name. */
568 for (step
= first
; step
!= NULL
; step
= step
->next
)
569 if (strcmp (result_set
, step
->result_set
) == 0)
574 *lastp
= NEW_STEP (result_set
,
577 lastp
= &(*lastp
)->next
;
579 else if (step
->cost_hi
> cost_hi
580 || (step
->cost_hi
== cost_hi
581 && step
->cost_lo
> cost_lo
))
583 /* A better path was found for the node,
584 on the `first' list. */
586 step
->last
= current
;
588 /* Update the cost for all steps. */
589 for (step
= first
; step
!= NULL
;
591 /* But don't update the start nodes. */
592 if (step
->code
!= NULL
)
594 struct derivation_step
*back
;
597 hi
= step
->code
->cost_hi
;
598 lo
= step
->code
->cost_lo
;
600 for (back
= step
->last
; back
->code
!= NULL
;
603 hi
+= back
->code
->cost_hi
;
604 lo
+= back
->code
->cost_lo
;
611 /* Likewise for the nodes on the solution list.
612 Also update best_cost accordingly. */
613 for (step
= solution
; step
!= NULL
;
616 step
->cost_hi
= (step
->code
->cost_hi
617 + step
->last
->cost_hi
);
618 step
->cost_lo
= (step
->code
->cost_lo
619 + step
->last
->cost_lo
);
621 if (step
->cost_hi
< best_cost_hi
622 || (step
->cost_hi
== best_cost_hi
623 && step
->cost_lo
< best_cost_lo
))
625 best_cost_hi
= step
->cost_hi
;
626 best_cost_lo
= step
->cost_lo
;
634 while (runp
!= NULL
);
645 if (solution
!= NULL
)
647 /* We really found a way to do the transformation. */
649 /* Choose the best solution. This is easy because we know that
650 the solution list has at most length 2 (one for every possible
652 if (solution
->next
!= NULL
)
654 struct derivation_step
*solution2
= solution
->next
;
656 if (solution2
->cost_hi
< solution
->cost_hi
657 || (solution2
->cost_hi
== solution
->cost_hi
658 && solution2
->cost_lo
< solution
->cost_lo
))
659 solution
= solution2
;
662 /* Now build a data structure describing the transformation steps. */
663 result
= gen_steps (solution
, toset_expand
?: toset
,
664 fromset_expand
?: fromset
, handle
, nsteps
);
668 /* We haven't found a transformation. Clear the result values. */
673 /* Add result in any case to list of known derivations. */
674 add_derivation (fromset_expand
?: fromset
, toset_expand
?: toset
,
681 /* Control of initialization. */
682 __libc_once_define (static, once
);
686 do_lookup_alias (const char *name
)
688 struct gconv_alias key
;
689 struct gconv_alias
**found
;
691 key
.fromname
= (char *) name
;
692 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
693 return found
!= NULL
? (*found
)->toname
: NULL
;
699 __gconv_compare_alias (const char *name1
, const char *name2
)
703 /* Ensure that the configuration data is read. */
704 __libc_once (once
, __gconv_read_conf
);
706 if (__gconv_compare_alias_cache (name1
, name2
, &result
) != 0)
707 result
= strcmp (do_lookup_alias (name1
) ?: name1
,
708 do_lookup_alias (name2
) ?: name2
);
716 __gconv_find_transform (const char *toset
, const char *fromset
,
717 struct __gconv_step
**handle
, size_t *nsteps
,
720 const char *fromset_expand
;
721 const char *toset_expand
;
724 /* Ensure that the configuration data is read. */
725 __libc_once (once
, __gconv_read_conf
);
727 /* Acquire the lock. */
728 __libc_lock_lock (__gconv_lock
);
730 result
= __gconv_lookup_cache (toset
, fromset
, handle
, nsteps
, flags
);
731 if (result
!= __GCONV_NODB
)
733 /* We have a cache and could resolve the request, successful or not. */
734 __libc_lock_unlock (__gconv_lock
);
738 /* If we don't have a module database return with an error. */
739 if (__gconv_modules_db
== NULL
)
741 __libc_lock_unlock (__gconv_lock
);
742 return __GCONV_NOCONV
;
745 /* See whether the names are aliases. */
746 fromset_expand
= do_lookup_alias (fromset
);
747 toset_expand
= do_lookup_alias (toset
);
749 if (__builtin_expect (flags
& GCONV_AVOID_NOCONV
, 0)
750 /* We are not supposed to create a pseudo transformation (means
751 copying) when the input and output character set are the same. */
752 && (strcmp (toset
, fromset
) == 0
753 || (toset_expand
!= NULL
&& strcmp (toset_expand
, fromset
) == 0)
754 || (fromset_expand
!= NULL
755 && (strcmp (toset
, fromset_expand
) == 0
756 || (toset_expand
!= NULL
757 && strcmp (toset_expand
, fromset_expand
) == 0)))))
759 /* Both character sets are the same. */
760 __libc_lock_unlock (__gconv_lock
);
761 return __GCONV_NULCONV
;
764 result
= find_derivation (toset
, toset_expand
, fromset
, fromset_expand
,
767 /* Release the lock. */
768 __libc_lock_unlock (__gconv_lock
);
770 /* The following code is necessary since `find_derivation' will return
771 GCONV_OK even when no derivation was found but the same request
772 was processed before. I.e., negative results will also be cached. */
773 return (result
== __GCONV_OK
774 ? (*handle
== NULL
? __GCONV_NOCONV
: __GCONV_OK
)
779 /* Release the entries of the modules list. */
782 __gconv_close_transform (struct __gconv_step
*steps
, size_t nsteps
)
784 int result
= __GCONV_OK
;
787 /* Acquire the lock. */
788 __libc_lock_lock (__gconv_lock
);
793 __gconv_release_step (&steps
[cnt
]);
796 /* If we use the cache we free a bit more since we don't keep any
797 transformation records around, they are cheap enough to
799 __gconv_release_cache (steps
, nsteps
);
801 /* Release the lock. */
802 __libc_lock_unlock (__gconv_lock
);
808 /* Free the modules mentioned. */
810 internal_function __libc_freeres_fn_section
811 free_modules_db (struct gconv_module
*node
)
813 if (node
->left
!= NULL
)
814 free_modules_db (node
->left
);
815 if (node
->right
!= NULL
)
816 free_modules_db (node
->right
);
819 struct gconv_module
*act
= node
;
821 if (act
->module_name
[0] == '/')
824 while (node
!= NULL
);
828 /* Free all resources if necessary. */
829 libc_freeres_fn (free_mem
)
831 /* First free locale memory. This needs to be done before freeing derivations,
832 as ctype cleanup functions dereference steps arrays which we free below. */
833 _nl_locale_subfreeres ();
835 /* finddomain.c has similar problem. */
836 extern void _nl_finddomain_subfreeres (void) attribute_hidden
;
837 _nl_finddomain_subfreeres ();
839 if (__gconv_alias_db
!= NULL
)
840 __tdestroy (__gconv_alias_db
, free
);
842 if (__gconv_modules_db
!= NULL
)
843 free_modules_db (__gconv_modules_db
);
845 if (known_derivations
!= NULL
)
846 __tdestroy (known_derivations
, free_derivation
);