1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2003, 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, write to the Free
19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 #include <sys/param.h>
28 #include <bits/libc-lock.h>
29 #include <locale/localeinfo.h>
32 #include <gconv_int.h>
36 /* Simple data structure for alias mapping. We have two names, `from'
38 void *__gconv_alias_db
;
40 /* Array with available modules. */
41 struct gconv_module
*__gconv_modules_db
;
43 /* We modify global data. */
44 __libc_lock_define_initialized (, __gconv_lock
)
47 /* Provide access to module database. */
49 __gconv_get_modules_db (void)
51 return __gconv_modules_db
;
55 __gconv_get_alias_db (void)
57 return __gconv_alias_db
;
61 /* Function for searching alias. */
63 __gconv_alias_compare (const void *p1
, const void *p2
)
65 const struct gconv_alias
*s1
= (const struct gconv_alias
*) p1
;
66 const struct gconv_alias
*s2
= (const struct gconv_alias
*) p2
;
67 return strcmp (s1
->fromname
, s2
->fromname
);
71 /* To search for a derivation we create a list of intermediate steps.
72 Each element contains a pointer to the element which precedes it
73 in the derivation order. */
74 struct derivation_step
76 const char *result_set
;
77 size_t result_set_len
;
80 struct gconv_module
*code
;
81 struct derivation_step
*last
;
82 struct derivation_step
*next
;
85 #define NEW_STEP(result, hi, lo, module, last_mod) \
86 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
87 newp->result_set = result; \
88 newp->result_set_len = strlen (result); \
91 newp->code = module; \
92 newp->last = last_mod; \
97 /* If a specific transformation is used more than once we should not need
98 to start looking for it again. Instead cache each successful result. */
99 struct known_derivation
103 struct __gconv_step
*steps
;
107 /* Compare function for database of found derivations. */
109 derivation_compare (const void *p1
, const void *p2
)
111 const struct known_derivation
*s1
= (const struct known_derivation
*) p1
;
112 const struct known_derivation
*s2
= (const struct known_derivation
*) p2
;
115 result
= strcmp (s1
->from
, s2
->from
);
117 result
= strcmp (s1
->to
, s2
->to
);
121 /* The search tree for known derivations. */
122 static void *known_derivations
;
124 /* Look up whether given transformation was already requested before. */
127 derivation_lookup (const char *fromset
, const char *toset
,
128 struct __gconv_step
**handle
, size_t *nsteps
)
130 struct known_derivation key
= { fromset
, toset
, NULL
, 0 };
131 struct known_derivation
**result
;
133 result
= __tfind (&key
, &known_derivations
, derivation_compare
);
136 return __GCONV_NOCONV
;
138 *handle
= (*result
)->steps
;
139 *nsteps
= (*result
)->nsteps
;
141 /* Please note that we return GCONV_OK even if the last search for
142 this transformation was unsuccessful. */
146 /* Add new derivation to list of known ones. */
149 add_derivation (const char *fromset
, const char *toset
,
150 struct __gconv_step
*handle
, size_t nsteps
)
152 struct known_derivation
*new_deriv
;
153 size_t fromset_len
= strlen (fromset
) + 1;
154 size_t toset_len
= strlen (toset
) + 1;
156 new_deriv
= (struct known_derivation
*)
157 malloc (sizeof (struct known_derivation
) + fromset_len
+ toset_len
);
158 if (new_deriv
!= NULL
)
160 new_deriv
->from
= (char *) (new_deriv
+ 1);
161 new_deriv
->to
= memcpy (__mempcpy (new_deriv
+ 1, fromset
, fromset_len
),
164 new_deriv
->steps
= handle
;
165 new_deriv
->nsteps
= nsteps
;
167 if (__tsearch (new_deriv
, &known_derivations
, derivation_compare
)
169 /* There is some kind of memory allocation problem. */
172 /* Please note that we don't complain if the allocation failed. This
173 is not tragically but in case we use the memory debugging facilities
174 not all memory will be freed. */
177 static void __libc_freeres_fn_section
178 free_derivation (void *p
)
180 struct known_derivation
*deriv
= (struct known_derivation
*) p
;
183 for (cnt
= 0; cnt
< deriv
->nsteps
; ++cnt
)
184 if (deriv
->steps
[cnt
].__counter
> 0
185 && deriv
->steps
[cnt
].__end_fct
!= NULL
)
187 assert (deriv
->steps
[cnt
].__shlib_handle
!= NULL
);
189 __gconv_end_fct end_fct
= deriv
->steps
[cnt
].__end_fct
;
191 PTR_DEMANGLE (end_fct
);
193 DL_CALL_FCT (end_fct
, (&deriv
->steps
[cnt
]));
196 /* Free the name strings. */
197 free ((char *) deriv
->steps
[0].__from_name
);
198 free ((char *) deriv
->steps
[deriv
->nsteps
- 1].__to_name
);
200 free ((struct __gconv_step
*) deriv
->steps
);
205 /* Decrement the reference count for a single step in a steps array. */
208 __gconv_release_step (struct __gconv_step
*step
)
210 /* Skip builtin modules; they are not reference counted. */
211 if (step
->__shlib_handle
!= NULL
&& --step
->__counter
== 0)
213 /* Call the destructor. */
214 if (step
->__end_fct
!= NULL
)
216 assert (step
->__shlib_handle
!= NULL
);
218 __gconv_end_fct end_fct
= step
->__end_fct
;
220 PTR_DEMANGLE (end_fct
);
222 DL_CALL_FCT (end_fct
, (step
));
226 /* Release the loaded module. */
227 __gconv_release_shlib (step
->__shlib_handle
);
228 step
->__shlib_handle
= NULL
;
231 else if (step
->__shlib_handle
== NULL
)
232 /* Builtin modules should not have end functions. */
233 assert (step
->__end_fct
== NULL
);
238 gen_steps (struct derivation_step
*best
, const char *toset
,
239 const char *fromset
, struct __gconv_step
**handle
, size_t *nsteps
)
242 struct __gconv_step
*result
;
243 struct derivation_step
*current
;
244 int status
= __GCONV_NOMEM
;
246 /* First determine number of steps. */
247 for (current
= best
; current
->last
!= NULL
; current
= current
->last
)
250 result
= (struct __gconv_step
*) malloc (sizeof (struct __gconv_step
)
259 while (step_cnt
-- > 0)
261 result
[step_cnt
].__from_name
= (step_cnt
== 0
263 : (char *)current
->last
->result_set
);
264 result
[step_cnt
].__to_name
= (step_cnt
+ 1 == *nsteps
265 ? __strdup (current
->result_set
)
266 : result
[step_cnt
+ 1].__from_name
);
268 result
[step_cnt
].__counter
= 1;
269 result
[step_cnt
].__data
= NULL
;
272 if (current
->code
->module_name
[0] == '/')
274 /* Load the module, return handle for it. */
275 struct __gconv_loaded_object
*shlib_handle
=
276 __gconv_find_shlib (current
->code
->module_name
);
278 if (shlib_handle
== NULL
)
284 result
[step_cnt
].__shlib_handle
= shlib_handle
;
285 result
[step_cnt
].__modname
= shlib_handle
->name
;
286 result
[step_cnt
].__fct
= shlib_handle
->fct
;
287 result
[step_cnt
].__init_fct
= shlib_handle
->init_fct
;
288 result
[step_cnt
].__end_fct
= shlib_handle
->end_fct
;
290 /* These settings can be overridden by the init function. */
291 result
[step_cnt
].__btowc_fct
= NULL
;
293 /* Call the init function. */
294 __gconv_init_fct init_fct
= result
[step_cnt
].__init_fct
;
295 if (init_fct
!= NULL
)
297 assert (result
[step_cnt
].__shlib_handle
!= NULL
);
300 PTR_DEMANGLE (init_fct
);
302 status
= DL_CALL_FCT (init_fct
, (&result
[step_cnt
]));
304 if (__builtin_expect (status
, __GCONV_OK
) != __GCONV_OK
)
307 /* Make sure we unload this modules. */
309 result
[step_cnt
].__end_fct
= NULL
;
314 if (result
[step_cnt
].__btowc_fct
!= NULL
)
315 PTR_MANGLE (result
[step_cnt
].__btowc_fct
);
321 /* It's a builtin transformation. */
322 __gconv_get_builtin_trans (current
->code
->module_name
,
325 current
= current
->last
;
328 if (__builtin_expect (failed
, 0) != 0)
330 /* Something went wrong while initializing the modules. */
331 while (++step_cnt
< *nsteps
)
332 __gconv_release_step (&result
[step_cnt
]);
336 if (status
== __GCONV_OK
)
337 status
= __GCONV_NOCONV
;
355 increment_counter (struct __gconv_step
*steps
, size_t nsteps
)
357 /* Increment the user counter. */
359 int result
= __GCONV_OK
;
363 struct __gconv_step
*step
= &steps
[cnt
];
365 if (step
->__counter
++ == 0)
367 /* Skip builtin modules. */
368 if (step
->__modname
!= NULL
)
370 /* Reopen a previously used module. */
371 step
->__shlib_handle
= __gconv_find_shlib (step
->__modname
);
372 if (step
->__shlib_handle
== NULL
)
374 /* Oops, this is the second time we use this module
375 (after unloading) and this time loading failed!? */
377 while (++cnt
< nsteps
)
378 __gconv_release_step (&steps
[cnt
]);
379 result
= __GCONV_NOCONV
;
383 /* The function addresses defined by the module may
385 step
->__fct
= step
->__shlib_handle
->fct
;
386 step
->__init_fct
= step
->__shlib_handle
->init_fct
;
387 step
->__end_fct
= step
->__shlib_handle
->end_fct
;
389 /* These settings can be overridden by the init function. */
390 step
->__btowc_fct
= NULL
;
393 /* Call the init function. */
394 __gconv_init_fct init_fct
= step
->__init_fct
;
395 if (init_fct
!= NULL
)
398 PTR_DEMANGLE (init_fct
);
400 DL_CALL_FCT (init_fct
, (step
));
403 if (step
->__btowc_fct
!= NULL
)
404 PTR_MANGLE (step
->__btowc_fct
);
414 /* The main function: find a possible derivation from the `fromset' (either
415 the given name or the alias) to the `toset' (again with alias). */
418 find_derivation (const char *toset
, const char *toset_expand
,
419 const char *fromset
, const char *fromset_expand
,
420 struct __gconv_step
**handle
, size_t *nsteps
)
422 struct derivation_step
*first
, *current
, **lastp
, *solution
= NULL
;
423 int best_cost_hi
= INT_MAX
;
424 int best_cost_lo
= INT_MAX
;
427 /* Look whether an earlier call to `find_derivation' has already
428 computed a possible derivation. If so, return it immediately. */
429 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
431 if (result
== __GCONV_OK
)
434 result
= increment_counter (*handle
, *nsteps
);
439 /* The task is to find a sequence of transformations, backed by the
440 existing modules - whether builtin or dynamically loadable -,
441 starting at `fromset' (or `fromset_expand') and ending at `toset'
442 (or `toset_expand'), and with minimal cost.
444 For computer scientists, this is a shortest path search in the
445 graph where the nodes are all possible charsets and the edges are
446 the transformations listed in __gconv_modules_db.
448 For now we use a simple algorithm with quadratic runtime behaviour.
449 A breadth-first search, starting at `fromset' and `fromset_expand'.
450 The list starting at `first' contains all nodes that have been
451 visited up to now, in the order in which they have been visited --
452 excluding the goal nodes `toset' and `toset_expand' which get
453 managed in the list starting at `solution'.
454 `current' walks through the list starting at `first' and looks
455 which nodes are reachable from the current node, adding them to
456 the end of the list [`first' or `solution' respectively] (if
457 they are visited the first time) or updating them in place (if
458 they have have already been visited).
459 In each node of either list, cost_lo and cost_hi contain the
460 minimum cost over any paths found up to now, starting at `fromset'
461 or `fromset_expand', ending at that node. best_cost_lo and
462 best_cost_hi represent the minimum over the elements of the
465 if (fromset_expand
!= NULL
)
467 first
= NEW_STEP (fromset_expand
, 0, 0, NULL
, NULL
);
468 first
->next
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
469 lastp
= &first
->next
->next
;
473 first
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
474 lastp
= &first
->next
;
477 for (current
= first
; current
!= NULL
; current
= current
->next
)
479 /* Now match all the available module specifications against the
480 current charset name. If any of them matches check whether
481 we already have a derivation for this charset. If yes, use the
482 one with the lower costs. Otherwise add the new charset at the
485 The module database is organized in a tree form which allows
486 searching for prefixes. So we search for the first entry with a
487 matching prefix and any other matching entry can be found from
489 struct gconv_module
*node
;
491 /* Maybe it is not necessary anymore to look for a solution for
492 this entry since the cost is already as high (or higher) as
493 the cost for the best solution so far. */
494 if (current
->cost_hi
> best_cost_hi
495 || (current
->cost_hi
== best_cost_hi
496 && current
->cost_lo
>= best_cost_lo
))
499 node
= __gconv_modules_db
;
502 int cmpres
= strcmp (current
->result_set
, node
->from_string
);
505 /* Walk through the list of modules with this prefix and
506 try to match the name. */
507 struct gconv_module
*runp
;
509 /* Check all the modules with this prefix. */
513 const char *result_set
= (strcmp (runp
->to_string
, "-") == 0
514 ? (toset_expand
?: toset
)
516 int cost_hi
= runp
->cost_hi
+ current
->cost_hi
;
517 int cost_lo
= runp
->cost_lo
+ current
->cost_lo
;
518 struct derivation_step
*step
;
520 /* We managed to find a derivation. First see whether
521 we have reached one of the goal nodes. */
522 if (strcmp (result_set
, toset
) == 0
523 || (toset_expand
!= NULL
524 && strcmp (result_set
, toset_expand
) == 0))
526 /* Append to the `solution' list if there
527 is no entry with this name. */
528 for (step
= solution
; step
!= NULL
; step
= step
->next
)
529 if (strcmp (result_set
, step
->result_set
) == 0)
534 step
= NEW_STEP (result_set
,
537 step
->next
= solution
;
540 else if (step
->cost_hi
> cost_hi
541 || (step
->cost_hi
== cost_hi
542 && step
->cost_lo
> cost_lo
))
544 /* A better path was found for the node,
545 on the `solution' list. */
547 step
->last
= current
;
548 step
->cost_hi
= cost_hi
;
549 step
->cost_lo
= cost_lo
;
552 /* Update best_cost accordingly. */
553 if (cost_hi
< best_cost_hi
554 || (cost_hi
== best_cost_hi
555 && cost_lo
< best_cost_lo
))
557 best_cost_hi
= cost_hi
;
558 best_cost_lo
= cost_lo
;
561 else if (cost_hi
< best_cost_hi
562 || (cost_hi
== best_cost_hi
563 && cost_lo
< best_cost_lo
))
565 /* Append at the end of the `first' list if there
566 is no entry with this name. */
567 for (step
= first
; step
!= NULL
; step
= step
->next
)
568 if (strcmp (result_set
, step
->result_set
) == 0)
573 *lastp
= NEW_STEP (result_set
,
576 lastp
= &(*lastp
)->next
;
578 else if (step
->cost_hi
> cost_hi
579 || (step
->cost_hi
== cost_hi
580 && step
->cost_lo
> cost_lo
))
582 /* A better path was found for the node,
583 on the `first' list. */
585 step
->last
= current
;
587 /* Update the cost for all steps. */
588 for (step
= first
; step
!= NULL
;
590 /* But don't update the start nodes. */
591 if (step
->code
!= NULL
)
593 struct derivation_step
*back
;
596 hi
= step
->code
->cost_hi
;
597 lo
= step
->code
->cost_lo
;
599 for (back
= step
->last
; back
->code
!= NULL
;
602 hi
+= back
->code
->cost_hi
;
603 lo
+= back
->code
->cost_lo
;
610 /* Likewise for the nodes on the solution list.
611 Also update best_cost accordingly. */
612 for (step
= solution
; step
!= NULL
;
615 step
->cost_hi
= (step
->code
->cost_hi
616 + step
->last
->cost_hi
);
617 step
->cost_lo
= (step
->code
->cost_lo
618 + step
->last
->cost_lo
);
620 if (step
->cost_hi
< best_cost_hi
621 || (step
->cost_hi
== best_cost_hi
622 && step
->cost_lo
< best_cost_lo
))
624 best_cost_hi
= step
->cost_hi
;
625 best_cost_lo
= step
->cost_lo
;
633 while (runp
!= NULL
);
644 if (solution
!= NULL
)
646 /* We really found a way to do the transformation. */
648 /* Choose the best solution. This is easy because we know that
649 the solution list has at most length 2 (one for every possible
651 if (solution
->next
!= NULL
)
653 struct derivation_step
*solution2
= solution
->next
;
655 if (solution2
->cost_hi
< solution
->cost_hi
656 || (solution2
->cost_hi
== solution
->cost_hi
657 && solution2
->cost_lo
< solution
->cost_lo
))
658 solution
= solution2
;
661 /* Now build a data structure describing the transformation steps. */
662 result
= gen_steps (solution
, toset_expand
?: toset
,
663 fromset_expand
?: fromset
, handle
, nsteps
);
667 /* We haven't found a transformation. Clear the result values. */
672 /* Add result in any case to list of known derivations. */
673 add_derivation (fromset_expand
?: fromset
, toset_expand
?: toset
,
680 /* Control of initialization. */
681 __libc_once_define (static, once
);
685 do_lookup_alias (const char *name
)
687 struct gconv_alias key
;
688 struct gconv_alias
**found
;
690 key
.fromname
= (char *) name
;
691 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
692 return found
!= NULL
? (*found
)->toname
: NULL
;
698 __gconv_compare_alias (const char *name1
, const char *name2
)
702 /* Ensure that the configuration data is read. */
703 __libc_once (once
, __gconv_read_conf
);
705 if (__gconv_compare_alias_cache (name1
, name2
, &result
) != 0)
706 result
= strcmp (do_lookup_alias (name1
) ?: name1
,
707 do_lookup_alias (name2
) ?: name2
);
715 __gconv_find_transform (const char *toset
, const char *fromset
,
716 struct __gconv_step
**handle
, size_t *nsteps
,
719 const char *fromset_expand
;
720 const char *toset_expand
;
723 /* Ensure that the configuration data is read. */
724 __libc_once (once
, __gconv_read_conf
);
726 /* Acquire the lock. */
727 __libc_lock_lock (__gconv_lock
);
729 result
= __gconv_lookup_cache (toset
, fromset
, handle
, nsteps
, flags
);
730 if (result
!= __GCONV_NODB
)
732 /* We have a cache and could resolve the request, successful or not. */
733 __libc_lock_unlock (__gconv_lock
);
737 /* If we don't have a module database return with an error. */
738 if (__gconv_modules_db
== NULL
)
740 __libc_lock_unlock (__gconv_lock
);
741 return __GCONV_NOCONV
;
744 /* See whether the names are aliases. */
745 fromset_expand
= do_lookup_alias (fromset
);
746 toset_expand
= do_lookup_alias (toset
);
748 if (__builtin_expect (flags
& GCONV_AVOID_NOCONV
, 0)
749 /* We are not supposed to create a pseudo transformation (means
750 copying) when the input and output character set are the same. */
751 && (strcmp (toset
, fromset
) == 0
752 || (toset_expand
!= NULL
&& strcmp (toset_expand
, fromset
) == 0)
753 || (fromset_expand
!= NULL
754 && (strcmp (toset
, fromset_expand
) == 0
755 || (toset_expand
!= NULL
756 && strcmp (toset_expand
, fromset_expand
) == 0)))))
758 /* Both character sets are the same. */
759 __libc_lock_unlock (__gconv_lock
);
760 return __GCONV_NULCONV
;
763 result
= find_derivation (toset
, toset_expand
, fromset
, fromset_expand
,
766 /* Release the lock. */
767 __libc_lock_unlock (__gconv_lock
);
769 /* The following code is necessary since `find_derivation' will return
770 GCONV_OK even when no derivation was found but the same request
771 was processed before. I.e., negative results will also be cached. */
772 return (result
== __GCONV_OK
773 ? (*handle
== NULL
? __GCONV_NOCONV
: __GCONV_OK
)
778 /* Release the entries of the modules list. */
781 __gconv_close_transform (struct __gconv_step
*steps
, size_t nsteps
)
783 int result
= __GCONV_OK
;
786 /* Acquire the lock. */
787 __libc_lock_lock (__gconv_lock
);
792 __gconv_release_step (&steps
[cnt
]);
795 /* If we use the cache we free a bit more since we don't keep any
796 transformation records around, they are cheap enough to
798 __gconv_release_cache (steps
, nsteps
);
800 /* Release the lock. */
801 __libc_lock_unlock (__gconv_lock
);
807 /* Free the modules mentioned. */
809 internal_function __libc_freeres_fn_section
810 free_modules_db (struct gconv_module
*node
)
812 if (node
->left
!= NULL
)
813 free_modules_db (node
->left
);
814 if (node
->right
!= NULL
)
815 free_modules_db (node
->right
);
818 struct gconv_module
*act
= node
;
820 if (act
->module_name
[0] == '/')
823 while (node
!= NULL
);
827 /* Free all resources if necessary. */
828 libc_freeres_fn (free_mem
)
830 /* First free locale memory. This needs to be done before freeing derivations,
831 as ctype cleanup functions dereference steps arrays which we free below. */
832 _nl_locale_subfreeres ();
834 /* finddomain.c has similar problem. */
835 extern void _nl_finddomain_subfreeres (void) attribute_hidden
;
836 _nl_finddomain_subfreeres ();
838 if (__gconv_alias_db
!= NULL
)
839 __tdestroy (__gconv_alias_db
, free
);
841 if (__gconv_modules_db
!= NULL
)
842 free_modules_db (__gconv_modules_db
);
844 if (known_derivations
!= NULL
)
845 __tdestroy (known_derivations
, free_derivation
);