1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
25 #include <sys/param.h>
26 #include <libc-lock.h>
27 #include <locale/localeinfo.h>
30 #include <gconv_int.h>
34 /* Simple data structure for alias mapping. We have two names, `from'
36 void *__gconv_alias_db
;
38 /* Array with available modules. */
39 struct gconv_module
*__gconv_modules_db
;
41 /* We modify global data. */
42 __libc_lock_define_initialized (, __gconv_lock
)
45 /* Provide access to module database. */
47 __gconv_get_modules_db (void)
49 return __gconv_modules_db
;
53 __gconv_get_alias_db (void)
55 return __gconv_alias_db
;
59 /* Function for searching alias. */
61 __gconv_alias_compare (const void *p1
, const void *p2
)
63 const struct gconv_alias
*s1
= (const struct gconv_alias
*) p1
;
64 const struct gconv_alias
*s2
= (const struct gconv_alias
*) p2
;
65 return strcmp (s1
->fromname
, s2
->fromname
);
69 /* To search for a derivation we create a list of intermediate steps.
70 Each element contains a pointer to the element which precedes it
71 in the derivation order. */
72 struct derivation_step
74 const char *result_set
;
75 size_t result_set_len
;
78 struct gconv_module
*code
;
79 struct derivation_step
*last
;
80 struct derivation_step
*next
;
83 #define NEW_STEP(result, hi, lo, module, last_mod) \
84 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
85 newp->result_set = result; \
86 newp->result_set_len = strlen (result); \
89 newp->code = module; \
90 newp->last = last_mod; \
95 /* If a specific transformation is used more than once we should not need
96 to start looking for it again. Instead cache each successful result. */
97 struct known_derivation
101 struct __gconv_step
*steps
;
105 /* Compare function for database of found derivations. */
107 derivation_compare (const void *p1
, const void *p2
)
109 const struct known_derivation
*s1
= (const struct known_derivation
*) p1
;
110 const struct known_derivation
*s2
= (const struct known_derivation
*) p2
;
113 result
= strcmp (s1
->from
, s2
->from
);
115 result
= strcmp (s1
->to
, s2
->to
);
119 /* The search tree for known derivations. */
120 static void *known_derivations
;
122 /* Look up whether given transformation was already requested before. */
124 derivation_lookup (const char *fromset
, const char *toset
,
125 struct __gconv_step
**handle
, size_t *nsteps
)
127 struct known_derivation key
= { fromset
, toset
, NULL
, 0 };
128 struct known_derivation
**result
;
130 result
= __tfind (&key
, &known_derivations
, derivation_compare
);
133 return __GCONV_NOCONV
;
135 *handle
= (*result
)->steps
;
136 *nsteps
= (*result
)->nsteps
;
138 /* Please note that we return GCONV_OK even if the last search for
139 this transformation was unsuccessful. */
143 /* Add new derivation to list of known ones. */
145 add_derivation (const char *fromset
, const char *toset
,
146 struct __gconv_step
*handle
, size_t nsteps
)
148 struct known_derivation
*new_deriv
;
149 size_t fromset_len
= strlen (fromset
) + 1;
150 size_t toset_len
= strlen (toset
) + 1;
152 new_deriv
= (struct known_derivation
*)
153 malloc (sizeof (struct known_derivation
) + fromset_len
+ toset_len
);
154 if (new_deriv
!= NULL
)
156 new_deriv
->from
= (char *) (new_deriv
+ 1);
157 new_deriv
->to
= memcpy (__mempcpy (new_deriv
+ 1, fromset
, fromset_len
),
160 new_deriv
->steps
= handle
;
161 new_deriv
->nsteps
= nsteps
;
163 if (__tsearch (new_deriv
, &known_derivations
, derivation_compare
)
165 /* There is some kind of memory allocation problem. */
168 /* Please note that we don't complain if the allocation failed. This
169 is not tragically but in case we use the memory debugging facilities
170 not all memory will be freed. */
173 static void __libc_freeres_fn_section
174 free_derivation (void *p
)
176 struct known_derivation
*deriv
= (struct known_derivation
*) p
;
179 for (cnt
= 0; cnt
< deriv
->nsteps
; ++cnt
)
180 if (deriv
->steps
[cnt
].__counter
> 0
181 && deriv
->steps
[cnt
].__shlib_handle
!= NULL
)
183 __gconv_end_fct end_fct
= deriv
->steps
[cnt
].__end_fct
;
185 PTR_DEMANGLE (end_fct
);
188 DL_CALL_FCT (end_fct
, (&deriv
->steps
[cnt
]));
191 /* Free the name strings. */
192 if (deriv
->steps
!= NULL
)
194 free ((char *) deriv
->steps
[0].__from_name
);
195 free ((char *) deriv
->steps
[deriv
->nsteps
- 1].__to_name
);
196 free ((struct __gconv_step
*) deriv
->steps
);
203 /* Decrement the reference count for a single step in a steps array. */
205 __gconv_release_step (struct __gconv_step
*step
)
207 /* Skip builtin modules; they are not reference counted. */
208 if (step
->__shlib_handle
!= NULL
&& --step
->__counter
== 0)
210 /* Call the destructor. */
211 __gconv_end_fct end_fct
= step
->__end_fct
;
213 PTR_DEMANGLE (end_fct
);
216 DL_CALL_FCT (end_fct
, (step
));
219 /* Release the loaded module. */
220 __gconv_release_shlib (step
->__shlib_handle
);
221 step
->__shlib_handle
= NULL
;
224 else if (step
->__shlib_handle
== NULL
)
225 /* Builtin modules should not have end functions. */
226 assert (step
->__end_fct
== NULL
);
230 gen_steps (struct derivation_step
*best
, const char *toset
,
231 const char *fromset
, struct __gconv_step
**handle
, size_t *nsteps
)
234 struct __gconv_step
*result
;
235 struct derivation_step
*current
;
236 int status
= __GCONV_NOMEM
;
237 char *from_name
= NULL
;
238 char *to_name
= NULL
;
240 /* First determine number of steps. */
241 for (current
= best
; current
->last
!= NULL
; current
= current
->last
)
244 result
= (struct __gconv_step
*) malloc (sizeof (struct __gconv_step
)
253 while (step_cnt
-- > 0)
257 result
[step_cnt
].__from_name
= from_name
= __strdup (fromset
);
258 if (from_name
== NULL
)
265 result
[step_cnt
].__from_name
= (char *)current
->last
->result_set
;
267 if (step_cnt
+ 1 == *nsteps
)
269 result
[step_cnt
].__to_name
= to_name
270 = __strdup (current
->result_set
);
278 result
[step_cnt
].__to_name
= result
[step_cnt
+ 1].__from_name
;
280 result
[step_cnt
].__counter
= 1;
281 result
[step_cnt
].__data
= NULL
;
284 if (current
->code
->module_name
[0] == '/')
286 /* Load the module, return handle for it. */
287 struct __gconv_loaded_object
*shlib_handle
=
288 __gconv_find_shlib (current
->code
->module_name
);
290 if (shlib_handle
== NULL
)
296 result
[step_cnt
].__shlib_handle
= shlib_handle
;
297 result
[step_cnt
].__modname
= shlib_handle
->name
;
298 result
[step_cnt
].__fct
= shlib_handle
->fct
;
299 result
[step_cnt
].__init_fct
= shlib_handle
->init_fct
;
300 result
[step_cnt
].__end_fct
= shlib_handle
->end_fct
;
302 /* These settings can be overridden by the init function. */
303 result
[step_cnt
].__btowc_fct
= NULL
;
305 /* Call the init function. */
306 __gconv_init_fct init_fct
= result
[step_cnt
].__init_fct
;
308 PTR_DEMANGLE (init_fct
);
310 if (init_fct
!= NULL
)
312 status
= DL_CALL_FCT (init_fct
, (&result
[step_cnt
]));
314 if (__builtin_expect (status
, __GCONV_OK
) != __GCONV_OK
)
317 /* Do not call the end function because the init
318 function has failed. */
319 result
[step_cnt
].__end_fct
= NULL
;
321 PTR_MANGLE (result
[step_cnt
].__end_fct
);
323 /* Make sure we unload this module. */
329 PTR_MANGLE (result
[step_cnt
].__btowc_fct
);
334 /* It's a builtin transformation. */
335 __gconv_get_builtin_trans (current
->code
->module_name
,
338 current
= current
->last
;
341 if (__builtin_expect (failed
, 0) != 0)
343 /* Something went wrong while initializing the modules. */
344 while (++step_cnt
< *nsteps
)
345 __gconv_release_step (&result
[step_cnt
]);
351 if (status
== __GCONV_OK
)
352 status
= __GCONV_NOCONV
;
369 increment_counter (struct __gconv_step
*steps
, size_t nsteps
)
371 /* Increment the user counter. */
373 int result
= __GCONV_OK
;
377 struct __gconv_step
*step
= &steps
[cnt
];
379 if (step
->__counter
++ == 0)
381 /* Skip builtin modules. */
382 if (step
->__modname
!= NULL
)
384 /* Reopen a previously used module. */
385 step
->__shlib_handle
= __gconv_find_shlib (step
->__modname
);
386 if (step
->__shlib_handle
== NULL
)
388 /* Oops, this is the second time we use this module
389 (after unloading) and this time loading failed!? */
391 while (++cnt
< nsteps
)
392 __gconv_release_step (&steps
[cnt
]);
393 result
= __GCONV_NOCONV
;
397 /* The function addresses defined by the module may
399 step
->__fct
= step
->__shlib_handle
->fct
;
400 step
->__init_fct
= step
->__shlib_handle
->init_fct
;
401 step
->__end_fct
= step
->__shlib_handle
->end_fct
;
403 /* These settings can be overridden by the init function. */
404 step
->__btowc_fct
= NULL
;
406 /* Call the init function. */
407 __gconv_init_fct init_fct
= step
->__init_fct
;
409 PTR_DEMANGLE (init_fct
);
411 if (init_fct
!= NULL
)
412 DL_CALL_FCT (init_fct
, (step
));
415 PTR_MANGLE (step
->__btowc_fct
);
425 /* The main function: find a possible derivation from the `fromset' (either
426 the given name or the alias) to the `toset' (again with alias). */
428 find_derivation (const char *toset
, const char *toset_expand
,
429 const char *fromset
, const char *fromset_expand
,
430 struct __gconv_step
**handle
, size_t *nsteps
)
432 struct derivation_step
*first
, *current
, **lastp
, *solution
= NULL
;
433 int best_cost_hi
= INT_MAX
;
434 int best_cost_lo
= INT_MAX
;
437 /* Look whether an earlier call to `find_derivation' has already
438 computed a possible derivation. If so, return it immediately. */
439 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
441 if (result
== __GCONV_OK
)
444 result
= increment_counter (*handle
, *nsteps
);
449 /* The task is to find a sequence of transformations, backed by the
450 existing modules - whether builtin or dynamically loadable -,
451 starting at `fromset' (or `fromset_expand') and ending at `toset'
452 (or `toset_expand'), and with minimal cost.
454 For computer scientists, this is a shortest path search in the
455 graph where the nodes are all possible charsets and the edges are
456 the transformations listed in __gconv_modules_db.
458 For now we use a simple algorithm with quadratic runtime behaviour.
459 A breadth-first search, starting at `fromset' and `fromset_expand'.
460 The list starting at `first' contains all nodes that have been
461 visited up to now, in the order in which they have been visited --
462 excluding the goal nodes `toset' and `toset_expand' which get
463 managed in the list starting at `solution'.
464 `current' walks through the list starting at `first' and looks
465 which nodes are reachable from the current node, adding them to
466 the end of the list [`first' or `solution' respectively] (if
467 they are visited the first time) or updating them in place (if
468 they have have already been visited).
469 In each node of either list, cost_lo and cost_hi contain the
470 minimum cost over any paths found up to now, starting at `fromset'
471 or `fromset_expand', ending at that node. best_cost_lo and
472 best_cost_hi represent the minimum over the elements of the
475 if (fromset_expand
!= NULL
)
477 first
= NEW_STEP (fromset_expand
, 0, 0, NULL
, NULL
);
478 first
->next
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
479 lastp
= &first
->next
->next
;
483 first
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
484 lastp
= &first
->next
;
487 for (current
= first
; current
!= NULL
; current
= current
->next
)
489 /* Now match all the available module specifications against the
490 current charset name. If any of them matches check whether
491 we already have a derivation for this charset. If yes, use the
492 one with the lower costs. Otherwise add the new charset at the
495 The module database is organized in a tree form which allows
496 searching for prefixes. So we search for the first entry with a
497 matching prefix and any other matching entry can be found from
499 struct gconv_module
*node
;
501 /* Maybe it is not necessary anymore to look for a solution for
502 this entry since the cost is already as high (or higher) as
503 the cost for the best solution so far. */
504 if (current
->cost_hi
> best_cost_hi
505 || (current
->cost_hi
== best_cost_hi
506 && current
->cost_lo
>= best_cost_lo
))
509 node
= __gconv_modules_db
;
512 int cmpres
= strcmp (current
->result_set
, node
->from_string
);
515 /* Walk through the list of modules with this prefix and
516 try to match the name. */
517 struct gconv_module
*runp
;
519 /* Check all the modules with this prefix. */
523 const char *result_set
= (strcmp (runp
->to_string
, "-") == 0
524 ? (toset_expand
?: toset
)
526 int cost_hi
= runp
->cost_hi
+ current
->cost_hi
;
527 int cost_lo
= runp
->cost_lo
+ current
->cost_lo
;
528 struct derivation_step
*step
;
530 /* We managed to find a derivation. First see whether
531 we have reached one of the goal nodes. */
532 if (strcmp (result_set
, toset
) == 0
533 || (toset_expand
!= NULL
534 && strcmp (result_set
, toset_expand
) == 0))
536 /* Append to the `solution' list if there
537 is no entry with this name. */
538 for (step
= solution
; step
!= NULL
; step
= step
->next
)
539 if (strcmp (result_set
, step
->result_set
) == 0)
544 step
= NEW_STEP (result_set
,
547 step
->next
= solution
;
550 else if (step
->cost_hi
> cost_hi
551 || (step
->cost_hi
== cost_hi
552 && step
->cost_lo
> cost_lo
))
554 /* A better path was found for the node,
555 on the `solution' list. */
557 step
->last
= current
;
558 step
->cost_hi
= cost_hi
;
559 step
->cost_lo
= cost_lo
;
562 /* Update best_cost accordingly. */
563 if (cost_hi
< best_cost_hi
564 || (cost_hi
== best_cost_hi
565 && cost_lo
< best_cost_lo
))
567 best_cost_hi
= cost_hi
;
568 best_cost_lo
= cost_lo
;
571 else if (cost_hi
< best_cost_hi
572 || (cost_hi
== best_cost_hi
573 && cost_lo
< best_cost_lo
))
575 /* Append at the end of the `first' list if there
576 is no entry with this name. */
577 for (step
= first
; step
!= NULL
; step
= step
->next
)
578 if (strcmp (result_set
, step
->result_set
) == 0)
583 *lastp
= NEW_STEP (result_set
,
586 lastp
= &(*lastp
)->next
;
588 else if (step
->cost_hi
> cost_hi
589 || (step
->cost_hi
== cost_hi
590 && step
->cost_lo
> cost_lo
))
592 /* A better path was found for the node,
593 on the `first' list. */
595 step
->last
= current
;
597 /* Update the cost for all steps. */
598 for (step
= first
; step
!= NULL
;
600 /* But don't update the start nodes. */
601 if (step
->code
!= NULL
)
603 struct derivation_step
*back
;
606 hi
= step
->code
->cost_hi
;
607 lo
= step
->code
->cost_lo
;
609 for (back
= step
->last
; back
->code
!= NULL
;
612 hi
+= back
->code
->cost_hi
;
613 lo
+= back
->code
->cost_lo
;
620 /* Likewise for the nodes on the solution list.
621 Also update best_cost accordingly. */
622 for (step
= solution
; step
!= NULL
;
625 step
->cost_hi
= (step
->code
->cost_hi
626 + step
->last
->cost_hi
);
627 step
->cost_lo
= (step
->code
->cost_lo
628 + step
->last
->cost_lo
);
630 if (step
->cost_hi
< best_cost_hi
631 || (step
->cost_hi
== best_cost_hi
632 && step
->cost_lo
< best_cost_lo
))
634 best_cost_hi
= step
->cost_hi
;
635 best_cost_lo
= step
->cost_lo
;
643 while (runp
!= NULL
);
654 if (solution
!= NULL
)
656 /* We really found a way to do the transformation. */
658 /* Choose the best solution. This is easy because we know that
659 the solution list has at most length 2 (one for every possible
661 if (solution
->next
!= NULL
)
663 struct derivation_step
*solution2
= solution
->next
;
665 if (solution2
->cost_hi
< solution
->cost_hi
666 || (solution2
->cost_hi
== solution
->cost_hi
667 && solution2
->cost_lo
< solution
->cost_lo
))
668 solution
= solution2
;
671 /* Now build a data structure describing the transformation steps. */
672 result
= gen_steps (solution
, toset_expand
?: toset
,
673 fromset_expand
?: fromset
, handle
, nsteps
);
677 /* We haven't found a transformation. Clear the result values. */
682 /* Add result in any case to list of known derivations. */
683 add_derivation (fromset_expand
?: fromset
, toset_expand
?: toset
,
690 /* Control of initialization. */
691 __libc_once_define (static, once
);
695 do_lookup_alias (const char *name
)
697 struct gconv_alias key
;
698 struct gconv_alias
**found
;
700 key
.fromname
= (char *) name
;
701 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
702 return found
!= NULL
? (*found
)->toname
: NULL
;
707 __gconv_compare_alias (const char *name1
, const char *name2
)
711 /* Ensure that the configuration data is read. */
712 __libc_once (once
, __gconv_read_conf
);
714 if (__gconv_compare_alias_cache (name1
, name2
, &result
) != 0)
715 result
= strcmp (do_lookup_alias (name1
) ?: name1
,
716 do_lookup_alias (name2
) ?: name2
);
723 __gconv_find_transform (const char *toset
, const char *fromset
,
724 struct __gconv_step
**handle
, size_t *nsteps
,
727 const char *fromset_expand
;
728 const char *toset_expand
;
731 /* Ensure that the configuration data is read. */
732 __libc_once (once
, __gconv_read_conf
);
734 /* Acquire the lock. */
735 __libc_lock_lock (__gconv_lock
);
737 result
= __gconv_lookup_cache (toset
, fromset
, handle
, nsteps
, flags
);
738 if (result
!= __GCONV_NODB
)
740 /* We have a cache and could resolve the request, successful or not. */
741 __libc_lock_unlock (__gconv_lock
);
745 /* If we don't have a module database return with an error. */
746 if (__gconv_modules_db
== NULL
)
748 __libc_lock_unlock (__gconv_lock
);
749 return __GCONV_NOCONV
;
752 /* See whether the names are aliases. */
753 fromset_expand
= do_lookup_alias (fromset
);
754 toset_expand
= do_lookup_alias (toset
);
756 if (__builtin_expect (flags
& GCONV_AVOID_NOCONV
, 0)
757 /* We are not supposed to create a pseudo transformation (means
758 copying) when the input and output character set are the same. */
759 && (strcmp (toset
, fromset
) == 0
760 || (toset_expand
!= NULL
&& strcmp (toset_expand
, fromset
) == 0)
761 || (fromset_expand
!= NULL
762 && (strcmp (toset
, fromset_expand
) == 0
763 || (toset_expand
!= NULL
764 && strcmp (toset_expand
, fromset_expand
) == 0)))))
766 /* Both character sets are the same. */
767 __libc_lock_unlock (__gconv_lock
);
768 return __GCONV_NULCONV
;
771 result
= find_derivation (toset
, toset_expand
, fromset
, fromset_expand
,
774 /* Release the lock. */
775 __libc_lock_unlock (__gconv_lock
);
777 /* The following code is necessary since `find_derivation' will return
778 GCONV_OK even when no derivation was found but the same request
779 was processed before. I.e., negative results will also be cached. */
780 return (result
== __GCONV_OK
781 ? (*handle
== NULL
? __GCONV_NOCONV
: __GCONV_OK
)
786 /* Release the entries of the modules list. */
788 __gconv_close_transform (struct __gconv_step
*steps
, size_t nsteps
)
790 int result
= __GCONV_OK
;
793 /* Acquire the lock. */
794 __libc_lock_lock (__gconv_lock
);
799 __gconv_release_step (&steps
[cnt
]);
802 /* If we use the cache we free a bit more since we don't keep any
803 transformation records around, they are cheap enough to
805 __gconv_release_cache (steps
, nsteps
);
807 /* Release the lock. */
808 __libc_lock_unlock (__gconv_lock
);
814 /* Free the modules mentioned. */
816 __libc_freeres_fn_section
817 free_modules_db (struct gconv_module
*node
)
819 if (node
->left
!= NULL
)
820 free_modules_db (node
->left
);
821 if (node
->right
!= NULL
)
822 free_modules_db (node
->right
);
825 struct gconv_module
*act
= node
;
827 if (act
->module_name
[0] == '/')
830 while (node
!= NULL
);
834 /* Free all resources if necessary. */
835 libc_freeres_fn (free_mem
)
837 /* First free locale memory. This needs to be done before freeing
838 derivations, as ctype cleanup functions dereference steps arrays which we
840 _nl_locale_subfreeres ();
842 /* finddomain.c has similar problem. */
843 extern void _nl_finddomain_subfreeres (void) attribute_hidden
;
844 _nl_finddomain_subfreeres ();
846 if (__gconv_alias_db
!= NULL
)
847 __tdestroy (__gconv_alias_db
, free
);
849 if (__gconv_modules_db
!= NULL
)
850 free_modules_db (__gconv_modules_db
);
852 if (known_derivations
!= NULL
)
853 __tdestroy (known_derivations
, free_derivation
);