Define ISO 639-3 "tok" [BZ #28950]
[glibc.git] / iconv / gconv_db.c
blobbf385ac7b1c336ff425ade83e5a5a60e05c9d252
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <limits.h>
21 #include <search.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/param.h>
25 #include <libc-lock.h>
26 #include <locale/localeinfo.h>
28 #include <dlfcn.h>
29 #include <gconv_int.h>
30 #include <sysdep.h>
33 /* Simple data structure for alias mapping. We have two names, `from'
34 and `to'. */
35 void *__gconv_alias_db;
37 /* Array with available modules. */
38 struct gconv_module *__gconv_modules_db;
40 /* We modify global data. */
41 __libc_lock_define_initialized (, __gconv_lock)
44 /* Provide access to module database. */
45 struct gconv_module *
46 __gconv_get_modules_db (void)
48 return __gconv_modules_db;
51 void *
52 __gconv_get_alias_db (void)
54 return __gconv_alias_db;
58 /* Function for searching alias. */
59 int
60 __gconv_alias_compare (const void *p1, const void *p2)
62 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
63 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
64 return strcmp (s1->fromname, s2->fromname);
68 /* To search for a derivation we create a list of intermediate steps.
69 Each element contains a pointer to the element which precedes it
70 in the derivation order. */
71 struct derivation_step
73 const char *result_set;
74 size_t result_set_len;
75 int cost_lo;
76 int cost_hi;
77 struct gconv_module *code;
78 struct derivation_step *last;
79 struct derivation_step *next;
82 #define NEW_STEP(result, hi, lo, module, last_mod) \
83 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
84 newp->result_set = result; \
85 newp->result_set_len = strlen (result); \
86 newp->cost_hi = hi; \
87 newp->cost_lo = lo; \
88 newp->code = module; \
89 newp->last = last_mod; \
90 newp->next = NULL; \
91 newp; })
94 /* If a specific transformation is used more than once we should not need
95 to start looking for it again. Instead cache each successful result. */
96 struct known_derivation
98 const char *from;
99 const char *to;
100 struct __gconv_step *steps;
101 size_t nsteps;
104 /* Compare function for database of found derivations. */
105 static int
106 derivation_compare (const void *p1, const void *p2)
108 const struct known_derivation *s1 = (const struct known_derivation *) p1;
109 const struct known_derivation *s2 = (const struct known_derivation *) p2;
110 int result;
112 result = strcmp (s1->from, s2->from);
113 if (result == 0)
114 result = strcmp (s1->to, s2->to);
115 return result;
118 /* The search tree for known derivations. */
119 static void *known_derivations;
121 /* Look up whether given transformation was already requested before. */
122 static int
123 derivation_lookup (const char *fromset, const char *toset,
124 struct __gconv_step **handle, size_t *nsteps)
126 struct known_derivation key = { fromset, toset, NULL, 0 };
127 struct known_derivation **result;
129 result = __tfind (&key, &known_derivations, derivation_compare);
131 if (result == NULL)
132 return __GCONV_NOCONV;
134 *handle = (*result)->steps;
135 *nsteps = (*result)->nsteps;
137 /* Please note that we return GCONV_OK even if the last search for
138 this transformation was unsuccessful. */
139 return __GCONV_OK;
142 /* Add new derivation to list of known ones. */
143 static void
144 add_derivation (const char *fromset, const char *toset,
145 struct __gconv_step *handle, size_t nsteps)
147 struct known_derivation *new_deriv;
148 size_t fromset_len = strlen (fromset) + 1;
149 size_t toset_len = strlen (toset) + 1;
151 new_deriv = (struct known_derivation *)
152 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
153 if (new_deriv != NULL)
155 new_deriv->from = (char *) (new_deriv + 1);
156 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
157 toset, toset_len);
159 new_deriv->steps = handle;
160 new_deriv->nsteps = nsteps;
162 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
163 == NULL)
164 /* There is some kind of memory allocation problem. */
165 free (new_deriv);
167 /* Please note that we don't complain if the allocation failed. This
168 is not tragically but in case we use the memory debugging facilities
169 not all memory will be freed. */
172 static void __libc_freeres_fn_section
173 free_derivation (void *p)
175 struct known_derivation *deriv = (struct known_derivation *) p;
176 size_t cnt;
178 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
179 if (deriv->steps[cnt].__counter > 0
180 && deriv->steps[cnt].__shlib_handle != NULL)
182 __gconv_end_fct end_fct = deriv->steps[cnt].__end_fct;
183 #ifdef PTR_DEMANGLE
184 PTR_DEMANGLE (end_fct);
185 #endif
186 if (end_fct != NULL)
187 DL_CALL_FCT (end_fct, (&deriv->steps[cnt]));
190 /* Free the name strings. */
191 if (deriv->steps != NULL)
193 free ((char *) deriv->steps[0].__from_name);
194 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
195 free ((struct __gconv_step *) deriv->steps);
198 free (deriv);
202 /* Decrement the reference count for a single step in a steps array. */
203 void
204 __gconv_release_step (struct __gconv_step *step)
206 /* Skip builtin modules; they are not reference counted. */
207 if (step->__shlib_handle != NULL && --step->__counter == 0)
209 /* Call the destructor. */
210 __gconv_end_fct end_fct = step->__end_fct;
211 #ifdef PTR_DEMANGLE
212 PTR_DEMANGLE (end_fct);
213 #endif
214 if (end_fct != NULL)
215 DL_CALL_FCT (end_fct, (step));
217 #ifndef STATIC_GCONV
218 /* Release the loaded module. */
219 __gconv_release_shlib (step->__shlib_handle);
220 step->__shlib_handle = NULL;
221 #endif
223 else if (step->__shlib_handle == NULL)
224 /* Builtin modules should not have end functions. */
225 assert (step->__end_fct == NULL);
228 static int
229 gen_steps (struct derivation_step *best, const char *toset,
230 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
232 size_t step_cnt = 0;
233 struct __gconv_step *result;
234 struct derivation_step *current;
235 int status = __GCONV_NOMEM;
236 char *from_name = NULL;
237 char *to_name = NULL;
239 /* First determine number of steps. */
240 for (current = best; current->last != NULL; current = current->last)
241 ++step_cnt;
243 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
244 * step_cnt);
245 if (result != NULL)
247 int failed = 0;
249 status = __GCONV_OK;
250 *nsteps = step_cnt;
251 current = best;
252 while (step_cnt-- > 0)
254 if (step_cnt == 0)
256 result[step_cnt].__from_name = from_name = __strdup (fromset);
257 if (from_name == NULL)
259 failed = 1;
260 break;
263 else
264 result[step_cnt].__from_name = (char *)current->last->result_set;
266 if (step_cnt + 1 == *nsteps)
268 result[step_cnt].__to_name = to_name
269 = __strdup (current->result_set);
270 if (to_name == NULL)
272 failed = 1;
273 break;
276 else
277 result[step_cnt].__to_name = result[step_cnt + 1].__from_name;
279 result[step_cnt].__counter = 1;
280 result[step_cnt].__data = NULL;
282 #ifndef STATIC_GCONV
283 if (current->code->module_name[0] == '/')
285 /* Load the module, return handle for it. */
286 struct __gconv_loaded_object *shlib_handle =
287 __gconv_find_shlib (current->code->module_name);
289 if (shlib_handle == NULL)
291 failed = 1;
292 break;
295 result[step_cnt].__shlib_handle = shlib_handle;
296 result[step_cnt].__modname = shlib_handle->name;
297 result[step_cnt].__fct = shlib_handle->fct;
298 result[step_cnt].__init_fct = shlib_handle->init_fct;
299 result[step_cnt].__end_fct = shlib_handle->end_fct;
301 /* These settings can be overridden by the init function. */
302 result[step_cnt].__btowc_fct = NULL;
304 /* Call the init function. */
305 __gconv_init_fct init_fct = result[step_cnt].__init_fct;
306 # ifdef PTR_DEMANGLE
307 PTR_DEMANGLE (init_fct);
308 # endif
309 if (init_fct != NULL)
311 status = DL_CALL_FCT (init_fct, (&result[step_cnt]));
313 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
315 failed = 1;
316 /* Do not call the end function because the init
317 function has failed. */
318 result[step_cnt].__end_fct = NULL;
319 # ifdef PTR_MANGLE
320 PTR_MANGLE (result[step_cnt].__end_fct);
321 # endif
322 /* Make sure we unload this module. */
323 --step_cnt;
324 break;
327 # ifdef PTR_MANGLE
328 PTR_MANGLE (result[step_cnt].__btowc_fct);
329 # endif
331 else
332 #endif
333 /* It's a builtin transformation. */
334 __gconv_get_builtin_trans (current->code->module_name,
335 &result[step_cnt]);
337 current = current->last;
340 if (__builtin_expect (failed, 0) != 0)
342 /* Something went wrong while initializing the modules. */
343 while (++step_cnt < *nsteps)
344 __gconv_release_step (&result[step_cnt]);
345 free (result);
346 free (from_name);
347 free (to_name);
348 *nsteps = 0;
349 *handle = NULL;
350 if (status == __GCONV_OK)
351 status = __GCONV_NOCONV;
353 else
354 *handle = result;
356 else
358 *nsteps = 0;
359 *handle = NULL;
362 return status;
366 #ifndef STATIC_GCONV
367 static int
368 increment_counter (struct __gconv_step *steps, size_t nsteps)
370 /* Increment the user counter. */
371 size_t cnt = nsteps;
372 int result = __GCONV_OK;
374 while (cnt-- > 0)
376 struct __gconv_step *step = &steps[cnt];
378 if (step->__counter++ == 0)
380 /* Skip builtin modules. */
381 if (step->__modname != NULL)
383 /* Reopen a previously used module. */
384 step->__shlib_handle = __gconv_find_shlib (step->__modname);
385 if (step->__shlib_handle == NULL)
387 /* Oops, this is the second time we use this module
388 (after unloading) and this time loading failed!? */
389 --step->__counter;
390 while (++cnt < nsteps)
391 __gconv_release_step (&steps[cnt]);
392 result = __GCONV_NOCONV;
393 break;
396 /* The function addresses defined by the module may
397 have changed. */
398 step->__fct = step->__shlib_handle->fct;
399 step->__init_fct = step->__shlib_handle->init_fct;
400 step->__end_fct = step->__shlib_handle->end_fct;
402 /* These settings can be overridden by the init function. */
403 step->__btowc_fct = NULL;
405 /* Call the init function. */
406 __gconv_init_fct init_fct = step->__init_fct;
407 #ifdef PTR_DEMANGLE
408 PTR_DEMANGLE (init_fct);
409 #endif
410 if (init_fct != NULL)
411 DL_CALL_FCT (init_fct, (step));
413 #ifdef PTR_MANGLE
414 PTR_MANGLE (step->__btowc_fct);
415 #endif
419 return result;
421 #endif
424 /* The main function: find a possible derivation from the `fromset' (either
425 the given name or the alias) to the `toset' (again with alias). */
426 static int
427 find_derivation (const char *toset, const char *toset_expand,
428 const char *fromset, const char *fromset_expand,
429 struct __gconv_step **handle, size_t *nsteps)
431 struct derivation_step *first, *current, **lastp, *solution = NULL;
432 int best_cost_hi = INT_MAX;
433 int best_cost_lo = INT_MAX;
434 int result;
436 /* Look whether an earlier call to `find_derivation' has already
437 computed a possible derivation. If so, return it immediately. */
438 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
439 handle, nsteps);
440 if (result == __GCONV_OK)
442 #ifndef STATIC_GCONV
443 result = increment_counter (*handle, *nsteps);
444 #endif
445 return result;
448 /* The task is to find a sequence of transformations, backed by the
449 existing modules - whether builtin or dynamically loadable -,
450 starting at `fromset' (or `fromset_expand') and ending at `toset'
451 (or `toset_expand'), and with minimal cost.
453 For computer scientists, this is a shortest path search in the
454 graph where the nodes are all possible charsets and the edges are
455 the transformations listed in __gconv_modules_db.
457 For now we use a simple algorithm with quadratic runtime behaviour.
458 A breadth-first search, starting at `fromset' and `fromset_expand'.
459 The list starting at `first' contains all nodes that have been
460 visited up to now, in the order in which they have been visited --
461 excluding the goal nodes `toset' and `toset_expand' which get
462 managed in the list starting at `solution'.
463 `current' walks through the list starting at `first' and looks
464 which nodes are reachable from the current node, adding them to
465 the end of the list [`first' or `solution' respectively] (if
466 they are visited the first time) or updating them in place (if
467 they have have already been visited).
468 In each node of either list, cost_lo and cost_hi contain the
469 minimum cost over any paths found up to now, starting at `fromset'
470 or `fromset_expand', ending at that node. best_cost_lo and
471 best_cost_hi represent the minimum over the elements of the
472 `solution' list. */
474 if (fromset_expand != NULL)
476 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
477 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
478 lastp = &first->next->next;
480 else
482 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
483 lastp = &first->next;
486 for (current = first; current != NULL; current = current->next)
488 /* Now match all the available module specifications against the
489 current charset name. If any of them matches check whether
490 we already have a derivation for this charset. If yes, use the
491 one with the lower costs. Otherwise add the new charset at the
492 end.
494 The module database is organized in a tree form which allows
495 searching for prefixes. So we search for the first entry with a
496 matching prefix and any other matching entry can be found from
497 this place. */
498 struct gconv_module *node;
500 /* Maybe it is not necessary anymore to look for a solution for
501 this entry since the cost is already as high (or higher) as
502 the cost for the best solution so far. */
503 if (current->cost_hi > best_cost_hi
504 || (current->cost_hi == best_cost_hi
505 && current->cost_lo >= best_cost_lo))
506 continue;
508 node = __gconv_modules_db;
509 while (node != NULL)
511 int cmpres = strcmp (current->result_set, node->from_string);
512 if (cmpres == 0)
514 /* Walk through the list of modules with this prefix and
515 try to match the name. */
516 struct gconv_module *runp;
518 /* Check all the modules with this prefix. */
519 runp = node;
522 const char *result_set = (strcmp (runp->to_string, "-") == 0
523 ? (toset_expand ?: toset)
524 : runp->to_string);
525 int cost_hi = runp->cost_hi + current->cost_hi;
526 int cost_lo = runp->cost_lo + current->cost_lo;
527 struct derivation_step *step;
529 /* We managed to find a derivation. First see whether
530 we have reached one of the goal nodes. */
531 if (strcmp (result_set, toset) == 0
532 || (toset_expand != NULL
533 && strcmp (result_set, toset_expand) == 0))
535 /* Append to the `solution' list if there
536 is no entry with this name. */
537 for (step = solution; step != NULL; step = step->next)
538 if (strcmp (result_set, step->result_set) == 0)
539 break;
541 if (step == NULL)
543 step = NEW_STEP (result_set,
544 cost_hi, cost_lo,
545 runp, current);
546 step->next = solution;
547 solution = step;
549 else if (step->cost_hi > cost_hi
550 || (step->cost_hi == cost_hi
551 && step->cost_lo > cost_lo))
553 /* A better path was found for the node,
554 on the `solution' list. */
555 step->code = runp;
556 step->last = current;
557 step->cost_hi = cost_hi;
558 step->cost_lo = cost_lo;
561 /* Update best_cost accordingly. */
562 if (cost_hi < best_cost_hi
563 || (cost_hi == best_cost_hi
564 && cost_lo < best_cost_lo))
566 best_cost_hi = cost_hi;
567 best_cost_lo = cost_lo;
570 else if (cost_hi < best_cost_hi
571 || (cost_hi == best_cost_hi
572 && cost_lo < best_cost_lo))
574 /* Append at the end of the `first' list if there
575 is no entry with this name. */
576 for (step = first; step != NULL; step = step->next)
577 if (strcmp (result_set, step->result_set) == 0)
578 break;
580 if (step == NULL)
582 *lastp = NEW_STEP (result_set,
583 cost_hi, cost_lo,
584 runp, current);
585 lastp = &(*lastp)->next;
587 else if (step->cost_hi > cost_hi
588 || (step->cost_hi == cost_hi
589 && step->cost_lo > cost_lo))
591 /* A better path was found for the node,
592 on the `first' list. */
593 step->code = runp;
594 step->last = current;
596 /* Update the cost for all steps. */
597 for (step = first; step != NULL;
598 step = step->next)
599 /* But don't update the start nodes. */
600 if (step->code != NULL)
602 struct derivation_step *back;
603 int hi, lo;
605 hi = step->code->cost_hi;
606 lo = step->code->cost_lo;
608 for (back = step->last; back->code != NULL;
609 back = back->last)
611 hi += back->code->cost_hi;
612 lo += back->code->cost_lo;
615 step->cost_hi = hi;
616 step->cost_lo = lo;
619 /* Likewise for the nodes on the solution list.
620 Also update best_cost accordingly. */
621 for (step = solution; step != NULL;
622 step = step->next)
624 step->cost_hi = (step->code->cost_hi
625 + step->last->cost_hi);
626 step->cost_lo = (step->code->cost_lo
627 + step->last->cost_lo);
629 if (step->cost_hi < best_cost_hi
630 || (step->cost_hi == best_cost_hi
631 && step->cost_lo < best_cost_lo))
633 best_cost_hi = step->cost_hi;
634 best_cost_lo = step->cost_lo;
640 runp = runp->same;
642 while (runp != NULL);
644 break;
646 else if (cmpres < 0)
647 node = node->left;
648 else
649 node = node->right;
653 if (solution != NULL)
655 /* We really found a way to do the transformation. */
657 /* Choose the best solution. This is easy because we know that
658 the solution list has at most length 2 (one for every possible
659 goal node). */
660 if (solution->next != NULL)
662 struct derivation_step *solution2 = solution->next;
664 if (solution2->cost_hi < solution->cost_hi
665 || (solution2->cost_hi == solution->cost_hi
666 && solution2->cost_lo < solution->cost_lo))
667 solution = solution2;
670 /* Now build a data structure describing the transformation steps. */
671 result = gen_steps (solution, toset_expand ?: toset,
672 fromset_expand ?: fromset, handle, nsteps);
674 else
676 /* We haven't found a transformation. Clear the result values. */
677 *handle = NULL;
678 *nsteps = 0;
681 /* Add result in any case to list of known derivations. */
682 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
683 *handle, *nsteps);
685 return result;
689 static const char *
690 do_lookup_alias (const char *name)
692 struct gconv_alias key;
693 struct gconv_alias **found;
695 key.fromname = (char *) name;
696 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
697 return found != NULL ? (*found)->toname : NULL;
702 __gconv_compare_alias (const char *name1, const char *name2)
704 int result;
706 /* Ensure that the configuration data is read. */
707 __gconv_load_conf ();
709 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
710 result = strcmp (do_lookup_alias (name1) ?: name1,
711 do_lookup_alias (name2) ?: name2);
713 return result;
718 __gconv_find_transform (const char *toset, const char *fromset,
719 struct __gconv_step **handle, size_t *nsteps,
720 int flags)
722 const char *fromset_expand;
723 const char *toset_expand;
724 int result;
726 /* Ensure that the configuration data is read. */
727 __gconv_load_conf ();
729 /* Acquire the lock. */
730 __libc_lock_lock (__gconv_lock);
732 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
733 if (result != __GCONV_NODB)
735 /* We have a cache and could resolve the request, successful or not. */
736 __libc_lock_unlock (__gconv_lock);
737 return result;
740 /* If we don't have a module database return with an error. */
741 if (__gconv_modules_db == NULL)
743 __libc_lock_unlock (__gconv_lock);
744 return __GCONV_NOCONV;
747 /* See whether the names are aliases. */
748 fromset_expand = do_lookup_alias (fromset);
749 toset_expand = do_lookup_alias (toset);
751 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
752 /* We are not supposed to create a pseudo transformation (means
753 copying) when the input and output character set are the same. */
754 && (strcmp (toset, fromset) == 0
755 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
756 || (fromset_expand != NULL
757 && (strcmp (toset, fromset_expand) == 0
758 || (toset_expand != NULL
759 && strcmp (toset_expand, fromset_expand) == 0)))))
761 /* Both character sets are the same. */
762 __libc_lock_unlock (__gconv_lock);
763 return __GCONV_NULCONV;
766 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
767 handle, nsteps);
769 /* Release the lock. */
770 __libc_lock_unlock (__gconv_lock);
772 /* The following code is necessary since `find_derivation' will return
773 GCONV_OK even when no derivation was found but the same request
774 was processed before. I.e., negative results will also be cached. */
775 return (result == __GCONV_OK
776 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
777 : result);
781 /* Release the entries of the modules list. */
783 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
785 int result = __GCONV_OK;
786 size_t cnt;
788 /* Acquire the lock. */
789 __libc_lock_lock (__gconv_lock);
791 #ifndef STATIC_GCONV
792 cnt = nsteps;
793 while (cnt-- > 0)
794 __gconv_release_step (&steps[cnt]);
795 #endif
797 /* If we use the cache we free a bit more since we don't keep any
798 transformation records around, they are cheap enough to
799 recreate. */
800 __gconv_release_cache (steps, nsteps);
802 /* Release the lock. */
803 __libc_lock_unlock (__gconv_lock);
805 return result;
809 /* Free the modules mentioned. */
810 static void
811 __libc_freeres_fn_section
812 free_modules_db (struct gconv_module *node)
814 if (node->left != NULL)
815 free_modules_db (node->left);
816 if (node->right != NULL)
817 free_modules_db (node->right);
820 struct gconv_module *act = node;
821 node = node->same;
822 if (act->module_name[0] == '/')
823 free (act);
825 while (node != NULL);
829 /* Free all resources if necessary. */
830 libc_freeres_fn (free_mem)
832 /* First free locale memory. This needs to be done before freeing
833 derivations, as ctype cleanup functions dereference steps arrays which we
834 free below. */
835 _nl_locale_subfreeres ();
837 /* finddomain.c has similar problem. */
838 extern void _nl_finddomain_subfreeres (void) attribute_hidden;
839 _nl_finddomain_subfreeres ();
841 if (__gconv_alias_db != NULL)
842 __tdestroy (__gconv_alias_db, free);
844 if (__gconv_modules_db != NULL)
845 free_modules_db (__gconv_modules_db);
847 if (known_derivations != NULL)
848 __tdestroy (known_derivations, free_derivation);