1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2003, 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
26 #include <sys/param.h>
27 #include <bits/libc-lock.h>
28 #include <locale/localeinfo.h>
31 #include <gconv_int.h>
35 /* Simple data structure for alias mapping. We have two names, `from'
37 void *__gconv_alias_db
;
39 /* Array with available modules. */
40 struct gconv_module
*__gconv_modules_db
;
42 /* We modify global data. */
43 __libc_lock_define_initialized (, __gconv_lock
)
46 /* Provide access to module database. */
48 __gconv_get_modules_db (void)
50 return __gconv_modules_db
;
54 __gconv_get_alias_db (void)
56 return __gconv_alias_db
;
60 /* Function for searching alias. */
62 __gconv_alias_compare (const void *p1
, const void *p2
)
64 const struct gconv_alias
*s1
= (const struct gconv_alias
*) p1
;
65 const struct gconv_alias
*s2
= (const struct gconv_alias
*) p2
;
66 return strcmp (s1
->fromname
, s2
->fromname
);
70 /* To search for a derivation we create a list of intermediate steps.
71 Each element contains a pointer to the element which precedes it
72 in the derivation order. */
73 struct derivation_step
75 const char *result_set
;
76 size_t result_set_len
;
79 struct gconv_module
*code
;
80 struct derivation_step
*last
;
81 struct derivation_step
*next
;
84 #define NEW_STEP(result, hi, lo, module, last_mod) \
85 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
86 newp->result_set = result; \
87 newp->result_set_len = strlen (result); \
90 newp->code = module; \
91 newp->last = last_mod; \
96 /* If a specific transformation is used more than once we should not need
97 to start looking for it again. Instead cache each successful result. */
98 struct known_derivation
102 struct __gconv_step
*steps
;
106 /* Compare function for database of found derivations. */
108 derivation_compare (const void *p1
, const void *p2
)
110 const struct known_derivation
*s1
= (const struct known_derivation
*) p1
;
111 const struct known_derivation
*s2
= (const struct known_derivation
*) p2
;
114 result
= strcmp (s1
->from
, s2
->from
);
116 result
= strcmp (s1
->to
, s2
->to
);
120 /* The search tree for known derivations. */
121 static void *known_derivations
;
123 /* Look up whether given transformation was already requested before. */
126 derivation_lookup (const char *fromset
, const char *toset
,
127 struct __gconv_step
**handle
, size_t *nsteps
)
129 struct known_derivation key
= { fromset
, toset
, NULL
, 0 };
130 struct known_derivation
**result
;
132 result
= __tfind (&key
, &known_derivations
, derivation_compare
);
135 return __GCONV_NOCONV
;
137 *handle
= (*result
)->steps
;
138 *nsteps
= (*result
)->nsteps
;
140 /* Please note that we return GCONV_OK even if the last search for
141 this transformation was unsuccessful. */
145 /* Add new derivation to list of known ones. */
148 add_derivation (const char *fromset
, const char *toset
,
149 struct __gconv_step
*handle
, size_t nsteps
)
151 struct known_derivation
*new_deriv
;
152 size_t fromset_len
= strlen (fromset
) + 1;
153 size_t toset_len
= strlen (toset
) + 1;
155 new_deriv
= (struct known_derivation
*)
156 malloc (sizeof (struct known_derivation
) + fromset_len
+ toset_len
);
157 if (new_deriv
!= NULL
)
159 new_deriv
->from
= (char *) (new_deriv
+ 1);
160 new_deriv
->to
= memcpy (__mempcpy (new_deriv
+ 1, fromset
, fromset_len
),
163 new_deriv
->steps
= handle
;
164 new_deriv
->nsteps
= nsteps
;
166 if (__tsearch (new_deriv
, &known_derivations
, derivation_compare
)
168 /* There is some kind of memory allocation problem. */
171 /* Please note that we don't complain if the allocation failed. This
172 is not tragically but in case we use the memory debugging facilities
173 not all memory will be freed. */
176 static void __libc_freeres_fn_section
177 free_derivation (void *p
)
179 struct known_derivation
*deriv
= (struct known_derivation
*) p
;
182 for (cnt
= 0; cnt
< deriv
->nsteps
; ++cnt
)
183 if (deriv
->steps
[cnt
].__counter
> 0
184 && deriv
->steps
[cnt
].__end_fct
!= NULL
)
186 assert (deriv
->steps
[cnt
].__shlib_handle
!= NULL
);
188 __gconv_end_fct end_fct
= deriv
->steps
[cnt
].__end_fct
;
190 PTR_DEMANGLE (end_fct
);
192 DL_CALL_FCT (end_fct
, (&deriv
->steps
[cnt
]));
195 /* Free the name strings. */
196 free ((char *) deriv
->steps
[0].__from_name
);
197 free ((char *) deriv
->steps
[deriv
->nsteps
- 1].__to_name
);
199 free ((struct __gconv_step
*) deriv
->steps
);
204 /* Decrement the reference count for a single step in a steps array. */
207 __gconv_release_step (struct __gconv_step
*step
)
209 /* Skip builtin modules; they are not reference counted. */
210 if (step
->__shlib_handle
!= NULL
&& --step
->__counter
== 0)
212 /* Call the destructor. */
213 if (step
->__end_fct
!= NULL
)
215 assert (step
->__shlib_handle
!= NULL
);
217 __gconv_end_fct end_fct
= step
->__end_fct
;
219 PTR_DEMANGLE (end_fct
);
221 DL_CALL_FCT (end_fct
, (step
));
225 /* Release the loaded module. */
226 __gconv_release_shlib (step
->__shlib_handle
);
227 step
->__shlib_handle
= NULL
;
230 else if (step
->__shlib_handle
== NULL
)
231 /* Builtin modules should not have end functions. */
232 assert (step
->__end_fct
== NULL
);
237 gen_steps (struct derivation_step
*best
, const char *toset
,
238 const char *fromset
, struct __gconv_step
**handle
, size_t *nsteps
)
241 struct __gconv_step
*result
;
242 struct derivation_step
*current
;
243 int status
= __GCONV_NOMEM
;
245 /* First determine number of steps. */
246 for (current
= best
; current
->last
!= NULL
; current
= current
->last
)
249 result
= (struct __gconv_step
*) malloc (sizeof (struct __gconv_step
)
258 while (step_cnt
-- > 0)
260 result
[step_cnt
].__from_name
= (step_cnt
== 0
262 : (char *)current
->last
->result_set
);
263 result
[step_cnt
].__to_name
= (step_cnt
+ 1 == *nsteps
264 ? __strdup (current
->result_set
)
265 : result
[step_cnt
+ 1].__from_name
);
267 result
[step_cnt
].__counter
= 1;
268 result
[step_cnt
].__data
= NULL
;
271 if (current
->code
->module_name
[0] == '/')
273 /* Load the module, return handle for it. */
274 struct __gconv_loaded_object
*shlib_handle
=
275 __gconv_find_shlib (current
->code
->module_name
);
277 if (shlib_handle
== NULL
)
283 result
[step_cnt
].__shlib_handle
= shlib_handle
;
284 result
[step_cnt
].__modname
= shlib_handle
->name
;
285 result
[step_cnt
].__fct
= shlib_handle
->fct
;
286 result
[step_cnt
].__init_fct
= shlib_handle
->init_fct
;
287 result
[step_cnt
].__end_fct
= shlib_handle
->end_fct
;
289 /* These settings can be overridden by the init function. */
290 result
[step_cnt
].__btowc_fct
= NULL
;
292 /* Call the init function. */
293 __gconv_init_fct init_fct
= result
[step_cnt
].__init_fct
;
294 if (init_fct
!= NULL
)
296 assert (result
[step_cnt
].__shlib_handle
!= NULL
);
299 PTR_DEMANGLE (init_fct
);
301 status
= DL_CALL_FCT (init_fct
, (&result
[step_cnt
]));
303 if (__builtin_expect (status
, __GCONV_OK
) != __GCONV_OK
)
306 /* Make sure we unload this modules. */
308 result
[step_cnt
].__end_fct
= NULL
;
313 if (result
[step_cnt
].__btowc_fct
!= NULL
)
314 PTR_MANGLE (result
[step_cnt
].__btowc_fct
);
320 /* It's a builtin transformation. */
321 __gconv_get_builtin_trans (current
->code
->module_name
,
324 current
= current
->last
;
327 if (__builtin_expect (failed
, 0) != 0)
329 /* Something went wrong while initializing the modules. */
330 while (++step_cnt
< *nsteps
)
331 __gconv_release_step (&result
[step_cnt
]);
335 if (status
== __GCONV_OK
)
336 status
= __GCONV_NOCONV
;
354 increment_counter (struct __gconv_step
*steps
, size_t nsteps
)
356 /* Increment the user counter. */
358 int result
= __GCONV_OK
;
362 struct __gconv_step
*step
= &steps
[cnt
];
364 if (step
->__counter
++ == 0)
366 /* Skip builtin modules. */
367 if (step
->__modname
!= NULL
)
369 /* Reopen a previously used module. */
370 step
->__shlib_handle
= __gconv_find_shlib (step
->__modname
);
371 if (step
->__shlib_handle
== NULL
)
373 /* Oops, this is the second time we use this module
374 (after unloading) and this time loading failed!? */
376 while (++cnt
< nsteps
)
377 __gconv_release_step (&steps
[cnt
]);
378 result
= __GCONV_NOCONV
;
382 /* The function addresses defined by the module may
384 step
->__fct
= step
->__shlib_handle
->fct
;
385 step
->__init_fct
= step
->__shlib_handle
->init_fct
;
386 step
->__end_fct
= step
->__shlib_handle
->end_fct
;
388 /* These settings can be overridden by the init function. */
389 step
->__btowc_fct
= NULL
;
392 /* Call the init function. */
393 __gconv_init_fct init_fct
= step
->__init_fct
;
394 if (init_fct
!= NULL
)
397 PTR_DEMANGLE (init_fct
);
399 DL_CALL_FCT (init_fct
, (step
));
402 if (step
->__btowc_fct
!= NULL
)
403 PTR_MANGLE (step
->__btowc_fct
);
413 /* The main function: find a possible derivation from the `fromset' (either
414 the given name or the alias) to the `toset' (again with alias). */
417 find_derivation (const char *toset
, const char *toset_expand
,
418 const char *fromset
, const char *fromset_expand
,
419 struct __gconv_step
**handle
, size_t *nsteps
)
421 struct derivation_step
*first
, *current
, **lastp
, *solution
= NULL
;
422 int best_cost_hi
= INT_MAX
;
423 int best_cost_lo
= INT_MAX
;
426 /* Look whether an earlier call to `find_derivation' has already
427 computed a possible derivation. If so, return it immediately. */
428 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
430 if (result
== __GCONV_OK
)
433 result
= increment_counter (*handle
, *nsteps
);
438 /* The task is to find a sequence of transformations, backed by the
439 existing modules - whether builtin or dynamically loadable -,
440 starting at `fromset' (or `fromset_expand') and ending at `toset'
441 (or `toset_expand'), and with minimal cost.
443 For computer scientists, this is a shortest path search in the
444 graph where the nodes are all possible charsets and the edges are
445 the transformations listed in __gconv_modules_db.
447 For now we use a simple algorithm with quadratic runtime behaviour.
448 A breadth-first search, starting at `fromset' and `fromset_expand'.
449 The list starting at `first' contains all nodes that have been
450 visited up to now, in the order in which they have been visited --
451 excluding the goal nodes `toset' and `toset_expand' which get
452 managed in the list starting at `solution'.
453 `current' walks through the list starting at `first' and looks
454 which nodes are reachable from the current node, adding them to
455 the end of the list [`first' or `solution' respectively] (if
456 they are visited the first time) or updating them in place (if
457 they have have already been visited).
458 In each node of either list, cost_lo and cost_hi contain the
459 minimum cost over any paths found up to now, starting at `fromset'
460 or `fromset_expand', ending at that node. best_cost_lo and
461 best_cost_hi represent the minimum over the elements of the
464 if (fromset_expand
!= NULL
)
466 first
= NEW_STEP (fromset_expand
, 0, 0, NULL
, NULL
);
467 first
->next
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
468 lastp
= &first
->next
->next
;
472 first
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
473 lastp
= &first
->next
;
476 for (current
= first
; current
!= NULL
; current
= current
->next
)
478 /* Now match all the available module specifications against the
479 current charset name. If any of them matches check whether
480 we already have a derivation for this charset. If yes, use the
481 one with the lower costs. Otherwise add the new charset at the
484 The module database is organized in a tree form which allows
485 searching for prefixes. So we search for the first entry with a
486 matching prefix and any other matching entry can be found from
488 struct gconv_module
*node
;
490 /* Maybe it is not necessary anymore to look for a solution for
491 this entry since the cost is already as high (or higher) as
492 the cost for the best solution so far. */
493 if (current
->cost_hi
> best_cost_hi
494 || (current
->cost_hi
== best_cost_hi
495 && current
->cost_lo
>= best_cost_lo
))
498 node
= __gconv_modules_db
;
501 int cmpres
= strcmp (current
->result_set
, node
->from_string
);
504 /* Walk through the list of modules with this prefix and
505 try to match the name. */
506 struct gconv_module
*runp
;
508 /* Check all the modules with this prefix. */
512 const char *result_set
= (strcmp (runp
->to_string
, "-") == 0
513 ? (toset_expand
?: toset
)
515 int cost_hi
= runp
->cost_hi
+ current
->cost_hi
;
516 int cost_lo
= runp
->cost_lo
+ current
->cost_lo
;
517 struct derivation_step
*step
;
519 /* We managed to find a derivation. First see whether
520 we have reached one of the goal nodes. */
521 if (strcmp (result_set
, toset
) == 0
522 || (toset_expand
!= NULL
523 && strcmp (result_set
, toset_expand
) == 0))
525 /* Append to the `solution' list if there
526 is no entry with this name. */
527 for (step
= solution
; step
!= NULL
; step
= step
->next
)
528 if (strcmp (result_set
, step
->result_set
) == 0)
533 step
= NEW_STEP (result_set
,
536 step
->next
= solution
;
539 else if (step
->cost_hi
> cost_hi
540 || (step
->cost_hi
== cost_hi
541 && step
->cost_lo
> cost_lo
))
543 /* A better path was found for the node,
544 on the `solution' list. */
546 step
->last
= current
;
547 step
->cost_hi
= cost_hi
;
548 step
->cost_lo
= cost_lo
;
551 /* Update best_cost accordingly. */
552 if (cost_hi
< best_cost_hi
553 || (cost_hi
== best_cost_hi
554 && cost_lo
< best_cost_lo
))
556 best_cost_hi
= cost_hi
;
557 best_cost_lo
= cost_lo
;
560 else if (cost_hi
< best_cost_hi
561 || (cost_hi
== best_cost_hi
562 && cost_lo
< best_cost_lo
))
564 /* Append at the end of the `first' list if there
565 is no entry with this name. */
566 for (step
= first
; step
!= NULL
; step
= step
->next
)
567 if (strcmp (result_set
, step
->result_set
) == 0)
572 *lastp
= NEW_STEP (result_set
,
575 lastp
= &(*lastp
)->next
;
577 else if (step
->cost_hi
> cost_hi
578 || (step
->cost_hi
== cost_hi
579 && step
->cost_lo
> cost_lo
))
581 /* A better path was found for the node,
582 on the `first' list. */
584 step
->last
= current
;
586 /* Update the cost for all steps. */
587 for (step
= first
; step
!= NULL
;
589 /* But don't update the start nodes. */
590 if (step
->code
!= NULL
)
592 struct derivation_step
*back
;
595 hi
= step
->code
->cost_hi
;
596 lo
= step
->code
->cost_lo
;
598 for (back
= step
->last
; back
->code
!= NULL
;
601 hi
+= back
->code
->cost_hi
;
602 lo
+= back
->code
->cost_lo
;
609 /* Likewise for the nodes on the solution list.
610 Also update best_cost accordingly. */
611 for (step
= solution
; step
!= NULL
;
614 step
->cost_hi
= (step
->code
->cost_hi
615 + step
->last
->cost_hi
);
616 step
->cost_lo
= (step
->code
->cost_lo
617 + step
->last
->cost_lo
);
619 if (step
->cost_hi
< best_cost_hi
620 || (step
->cost_hi
== best_cost_hi
621 && step
->cost_lo
< best_cost_lo
))
623 best_cost_hi
= step
->cost_hi
;
624 best_cost_lo
= step
->cost_lo
;
632 while (runp
!= NULL
);
643 if (solution
!= NULL
)
645 /* We really found a way to do the transformation. */
647 /* Choose the best solution. This is easy because we know that
648 the solution list has at most length 2 (one for every possible
650 if (solution
->next
!= NULL
)
652 struct derivation_step
*solution2
= solution
->next
;
654 if (solution2
->cost_hi
< solution
->cost_hi
655 || (solution2
->cost_hi
== solution
->cost_hi
656 && solution2
->cost_lo
< solution
->cost_lo
))
657 solution
= solution2
;
660 /* Now build a data structure describing the transformation steps. */
661 result
= gen_steps (solution
, toset_expand
?: toset
,
662 fromset_expand
?: fromset
, handle
, nsteps
);
666 /* We haven't found a transformation. Clear the result values. */
671 /* Add result in any case to list of known derivations. */
672 add_derivation (fromset_expand
?: fromset
, toset_expand
?: toset
,
679 /* Control of initialization. */
680 __libc_once_define (static, once
);
684 do_lookup_alias (const char *name
)
686 struct gconv_alias key
;
687 struct gconv_alias
**found
;
689 key
.fromname
= (char *) name
;
690 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
691 return found
!= NULL
? (*found
)->toname
: NULL
;
697 __gconv_compare_alias (const char *name1
, const char *name2
)
701 /* Ensure that the configuration data is read. */
702 __libc_once (once
, __gconv_read_conf
);
704 if (__gconv_compare_alias_cache (name1
, name2
, &result
) != 0)
705 result
= strcmp (do_lookup_alias (name1
) ?: name1
,
706 do_lookup_alias (name2
) ?: name2
);
714 __gconv_find_transform (const char *toset
, const char *fromset
,
715 struct __gconv_step
**handle
, size_t *nsteps
,
718 const char *fromset_expand
;
719 const char *toset_expand
;
722 /* Ensure that the configuration data is read. */
723 __libc_once (once
, __gconv_read_conf
);
725 /* Acquire the lock. */
726 __libc_lock_lock (__gconv_lock
);
728 result
= __gconv_lookup_cache (toset
, fromset
, handle
, nsteps
, flags
);
729 if (result
!= __GCONV_NODB
)
731 /* We have a cache and could resolve the request, successful or not. */
732 __libc_lock_unlock (__gconv_lock
);
736 /* If we don't have a module database return with an error. */
737 if (__gconv_modules_db
== NULL
)
739 __libc_lock_unlock (__gconv_lock
);
740 return __GCONV_NOCONV
;
743 /* See whether the names are aliases. */
744 fromset_expand
= do_lookup_alias (fromset
);
745 toset_expand
= do_lookup_alias (toset
);
747 if (__builtin_expect (flags
& GCONV_AVOID_NOCONV
, 0)
748 /* We are not supposed to create a pseudo transformation (means
749 copying) when the input and output character set are the same. */
750 && (strcmp (toset
, fromset
) == 0
751 || (toset_expand
!= NULL
&& strcmp (toset_expand
, fromset
) == 0)
752 || (fromset_expand
!= NULL
753 && (strcmp (toset
, fromset_expand
) == 0
754 || (toset_expand
!= NULL
755 && strcmp (toset_expand
, fromset_expand
) == 0)))))
757 /* Both character sets are the same. */
758 __libc_lock_unlock (__gconv_lock
);
759 return __GCONV_NULCONV
;
762 result
= find_derivation (toset
, toset_expand
, fromset
, fromset_expand
,
765 /* Release the lock. */
766 __libc_lock_unlock (__gconv_lock
);
768 /* The following code is necessary since `find_derivation' will return
769 GCONV_OK even when no derivation was found but the same request
770 was processed before. I.e., negative results will also be cached. */
771 return (result
== __GCONV_OK
772 ? (*handle
== NULL
? __GCONV_NOCONV
: __GCONV_OK
)
777 /* Release the entries of the modules list. */
780 __gconv_close_transform (struct __gconv_step
*steps
, size_t nsteps
)
782 int result
= __GCONV_OK
;
785 /* Acquire the lock. */
786 __libc_lock_lock (__gconv_lock
);
791 __gconv_release_step (&steps
[cnt
]);
794 /* If we use the cache we free a bit more since we don't keep any
795 transformation records around, they are cheap enough to
797 __gconv_release_cache (steps
, nsteps
);
799 /* Release the lock. */
800 __libc_lock_unlock (__gconv_lock
);
806 /* Free the modules mentioned. */
808 internal_function __libc_freeres_fn_section
809 free_modules_db (struct gconv_module
*node
)
811 if (node
->left
!= NULL
)
812 free_modules_db (node
->left
);
813 if (node
->right
!= NULL
)
814 free_modules_db (node
->right
);
817 struct gconv_module
*act
= node
;
819 if (act
->module_name
[0] == '/')
822 while (node
!= NULL
);
826 /* Free all resources if necessary. */
827 libc_freeres_fn (free_mem
)
829 /* First free locale memory. This needs to be done before freeing derivations,
830 as ctype cleanup functions dereference steps arrays which we free below. */
831 _nl_locale_subfreeres ();
833 /* finddomain.c has similar problem. */
834 extern void _nl_finddomain_subfreeres (void) attribute_hidden
;
835 _nl_finddomain_subfreeres ();
837 if (__gconv_alias_db
!= NULL
)
838 __tdestroy (__gconv_alias_db
, free
);
840 if (__gconv_modules_db
!= NULL
)
841 free_modules_db (__gconv_modules_db
);
843 if (known_derivations
!= NULL
)
844 __tdestroy (known_derivations
, free_derivation
);