<sys/platform/x86.h>: Add AVX-VNNI-INT8 support
[glibc.git] / iconv / gconv_db.c
blob0ee807628727a21f47bfb7d14a5e5b7b52d8b9f3
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <limits.h>
21 #include <search.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/param.h>
25 #include <libc-lock.h>
26 #include <locale/localeinfo.h>
28 #include <dlfcn.h>
29 #include <gconv_int.h>
30 #include <pointer_guard.h>
33 /* Simple data structure for alias mapping. We have two names, `from'
34 and `to'. */
35 void *__gconv_alias_db;
37 /* Array with available modules. */
38 struct gconv_module *__gconv_modules_db;
40 /* We modify global data. */
41 __libc_lock_define_initialized (, __gconv_lock)
44 /* Provide access to module database. */
45 struct gconv_module *
46 __gconv_get_modules_db (void)
48 return __gconv_modules_db;
51 void *
52 __gconv_get_alias_db (void)
54 return __gconv_alias_db;
58 /* Function for searching alias. */
59 int
60 __gconv_alias_compare (const void *p1, const void *p2)
62 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
63 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
64 return strcmp (s1->fromname, s2->fromname);
68 /* To search for a derivation we create a list of intermediate steps.
69 Each element contains a pointer to the element which precedes it
70 in the derivation order. */
71 struct derivation_step
73 const char *result_set;
74 size_t result_set_len;
75 int cost_lo;
76 int cost_hi;
77 struct gconv_module *code;
78 struct derivation_step *last;
79 struct derivation_step *next;
82 #define NEW_STEP(result, hi, lo, module, last_mod) \
83 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
84 newp->result_set = result; \
85 newp->result_set_len = strlen (result); \
86 newp->cost_hi = hi; \
87 newp->cost_lo = lo; \
88 newp->code = module; \
89 newp->last = last_mod; \
90 newp->next = NULL; \
91 newp; })
94 /* If a specific transformation is used more than once we should not need
95 to start looking for it again. Instead cache each successful result. */
96 struct known_derivation
98 const char *from;
99 const char *to;
100 struct __gconv_step *steps;
101 size_t nsteps;
104 /* Compare function for database of found derivations. */
105 static int
106 derivation_compare (const void *p1, const void *p2)
108 const struct known_derivation *s1 = (const struct known_derivation *) p1;
109 const struct known_derivation *s2 = (const struct known_derivation *) p2;
110 int result;
112 result = strcmp (s1->from, s2->from);
113 if (result == 0)
114 result = strcmp (s1->to, s2->to);
115 return result;
118 /* The search tree for known derivations. */
119 static void *known_derivations;
121 /* Look up whether given transformation was already requested before. */
122 static int
123 derivation_lookup (const char *fromset, const char *toset,
124 struct __gconv_step **handle, size_t *nsteps)
126 struct known_derivation key = { fromset, toset, NULL, 0 };
127 struct known_derivation **result;
129 result = __tfind (&key, &known_derivations, derivation_compare);
131 if (result == NULL)
132 return __GCONV_NOCONV;
134 *handle = (*result)->steps;
135 *nsteps = (*result)->nsteps;
137 /* Please note that we return GCONV_OK even if the last search for
138 this transformation was unsuccessful. */
139 return __GCONV_OK;
142 /* Add new derivation to list of known ones. */
143 static void
144 add_derivation (const char *fromset, const char *toset,
145 struct __gconv_step *handle, size_t nsteps)
147 struct known_derivation *new_deriv;
148 size_t fromset_len = strlen (fromset) + 1;
149 size_t toset_len = strlen (toset) + 1;
151 new_deriv = (struct known_derivation *)
152 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
153 if (new_deriv != NULL)
155 new_deriv->from = (char *) (new_deriv + 1);
156 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
157 toset, toset_len);
159 new_deriv->steps = handle;
160 new_deriv->nsteps = nsteps;
162 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
163 == NULL)
164 /* There is some kind of memory allocation problem. */
165 free (new_deriv);
167 /* Please note that we don't complain if the allocation failed. This
168 is not tragically but in case we use the memory debugging facilities
169 not all memory will be freed. */
172 static void
173 free_derivation (void *p)
175 struct known_derivation *deriv = (struct known_derivation *) p;
176 size_t cnt;
178 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
179 if (deriv->steps[cnt].__counter > 0
180 && deriv->steps[cnt].__shlib_handle != NULL)
182 __gconv_end_fct end_fct = deriv->steps[cnt].__end_fct;
183 PTR_DEMANGLE (end_fct);
184 if (end_fct != NULL)
185 DL_CALL_FCT (end_fct, (&deriv->steps[cnt]));
188 /* Free the name strings. */
189 if (deriv->steps != NULL)
191 free ((char *) deriv->steps[0].__from_name);
192 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
193 free ((struct __gconv_step *) deriv->steps);
196 free (deriv);
200 /* Decrement the reference count for a single step in a steps array. */
201 void
202 __gconv_release_step (struct __gconv_step *step)
204 /* Skip builtin modules; they are not reference counted. */
205 if (step->__shlib_handle != NULL && --step->__counter == 0)
207 /* Call the destructor. */
208 __gconv_end_fct end_fct = step->__end_fct;
209 PTR_DEMANGLE (end_fct);
210 if (end_fct != NULL)
211 DL_CALL_FCT (end_fct, (step));
213 #ifndef STATIC_GCONV
214 /* Release the loaded module. */
215 __gconv_release_shlib (step->__shlib_handle);
216 step->__shlib_handle = NULL;
217 #endif
219 else if (step->__shlib_handle == NULL)
220 /* Builtin modules should not have end functions. */
221 assert (step->__end_fct == NULL);
224 static int
225 gen_steps (struct derivation_step *best, const char *toset,
226 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
228 size_t step_cnt = 0;
229 struct __gconv_step *result;
230 struct derivation_step *current;
231 int status = __GCONV_NOMEM;
232 char *from_name = NULL;
233 char *to_name = NULL;
235 /* First determine number of steps. */
236 for (current = best; current->last != NULL; current = current->last)
237 ++step_cnt;
239 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
240 * step_cnt);
241 if (result != NULL)
243 int failed = 0;
245 status = __GCONV_OK;
246 *nsteps = step_cnt;
247 current = best;
248 while (step_cnt-- > 0)
250 if (step_cnt == 0)
252 result[step_cnt].__from_name = from_name = __strdup (fromset);
253 if (from_name == NULL)
255 failed = 1;
256 break;
259 else
260 result[step_cnt].__from_name = (char *)current->last->result_set;
262 if (step_cnt + 1 == *nsteps)
264 result[step_cnt].__to_name = to_name
265 = __strdup (current->result_set);
266 if (to_name == NULL)
268 failed = 1;
269 break;
272 else
273 result[step_cnt].__to_name = result[step_cnt + 1].__from_name;
275 result[step_cnt].__counter = 1;
276 result[step_cnt].__data = NULL;
278 #ifndef STATIC_GCONV
279 if (current->code->module_name[0] == '/')
281 /* Load the module, return handle for it. */
282 struct __gconv_loaded_object *shlib_handle =
283 __gconv_find_shlib (current->code->module_name);
285 if (shlib_handle == NULL)
287 failed = 1;
288 break;
291 result[step_cnt].__shlib_handle = shlib_handle;
292 result[step_cnt].__modname = shlib_handle->name;
293 result[step_cnt].__fct = shlib_handle->fct;
294 result[step_cnt].__init_fct = shlib_handle->init_fct;
295 result[step_cnt].__end_fct = shlib_handle->end_fct;
297 /* These settings can be overridden by the init function. */
298 result[step_cnt].__btowc_fct = NULL;
300 /* Call the init function. */
301 __gconv_init_fct init_fct = result[step_cnt].__init_fct;
302 PTR_DEMANGLE (init_fct);
303 if (init_fct != NULL)
305 status = DL_CALL_FCT (init_fct, (&result[step_cnt]));
307 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
309 failed = 1;
310 /* Do not call the end function because the init
311 function has failed. */
312 result[step_cnt].__end_fct = NULL;
313 PTR_MANGLE (result[step_cnt].__end_fct);
314 /* Make sure we unload this module. */
315 --step_cnt;
316 break;
319 PTR_MANGLE (result[step_cnt].__btowc_fct);
321 else
322 #endif
323 /* It's a builtin transformation. */
324 __gconv_get_builtin_trans (current->code->module_name,
325 &result[step_cnt]);
327 current = current->last;
330 if (__builtin_expect (failed, 0) != 0)
332 /* Something went wrong while initializing the modules. */
333 while (++step_cnt < *nsteps)
334 __gconv_release_step (&result[step_cnt]);
335 free (result);
336 free (from_name);
337 free (to_name);
338 *nsteps = 0;
339 *handle = NULL;
340 if (status == __GCONV_OK)
341 status = __GCONV_NOCONV;
343 else
344 *handle = result;
346 else
348 *nsteps = 0;
349 *handle = NULL;
352 return status;
356 #ifndef STATIC_GCONV
357 static int
358 increment_counter (struct __gconv_step *steps, size_t nsteps)
360 /* Increment the user counter. */
361 size_t cnt = nsteps;
362 int result = __GCONV_OK;
364 while (cnt-- > 0)
366 struct __gconv_step *step = &steps[cnt];
368 if (step->__counter++ == 0)
370 /* Skip builtin modules. */
371 if (step->__modname != NULL)
373 /* Reopen a previously used module. */
374 step->__shlib_handle = __gconv_find_shlib (step->__modname);
375 if (step->__shlib_handle == NULL)
377 /* Oops, this is the second time we use this module
378 (after unloading) and this time loading failed!? */
379 --step->__counter;
380 while (++cnt < nsteps)
381 __gconv_release_step (&steps[cnt]);
382 result = __GCONV_NOCONV;
383 break;
386 /* The function addresses defined by the module may
387 have changed. */
388 step->__fct = step->__shlib_handle->fct;
389 step->__init_fct = step->__shlib_handle->init_fct;
390 step->__end_fct = step->__shlib_handle->end_fct;
392 /* These settings can be overridden by the init function. */
393 step->__btowc_fct = NULL;
395 /* Call the init function. */
396 __gconv_init_fct init_fct = step->__init_fct;
397 PTR_DEMANGLE (init_fct);
398 if (init_fct != NULL)
399 DL_CALL_FCT (init_fct, (step));
400 PTR_MANGLE (step->__btowc_fct);
404 return result;
406 #endif
409 /* The main function: find a possible derivation from the `fromset' (either
410 the given name or the alias) to the `toset' (again with alias). */
411 static int
412 find_derivation (const char *toset, const char *toset_expand,
413 const char *fromset, const char *fromset_expand,
414 struct __gconv_step **handle, size_t *nsteps)
416 struct derivation_step *first, *current, **lastp, *solution = NULL;
417 int best_cost_hi = INT_MAX;
418 int best_cost_lo = INT_MAX;
419 int result;
421 /* Look whether an earlier call to `find_derivation' has already
422 computed a possible derivation. If so, return it immediately. */
423 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
424 handle, nsteps);
425 if (result == __GCONV_OK)
427 #ifndef STATIC_GCONV
428 result = increment_counter (*handle, *nsteps);
429 #endif
430 return result;
433 /* The task is to find a sequence of transformations, backed by the
434 existing modules - whether builtin or dynamically loadable -,
435 starting at `fromset' (or `fromset_expand') and ending at `toset'
436 (or `toset_expand'), and with minimal cost.
438 For computer scientists, this is a shortest path search in the
439 graph where the nodes are all possible charsets and the edges are
440 the transformations listed in __gconv_modules_db.
442 For now we use a simple algorithm with quadratic runtime behaviour.
443 A breadth-first search, starting at `fromset' and `fromset_expand'.
444 The list starting at `first' contains all nodes that have been
445 visited up to now, in the order in which they have been visited --
446 excluding the goal nodes `toset' and `toset_expand' which get
447 managed in the list starting at `solution'.
448 `current' walks through the list starting at `first' and looks
449 which nodes are reachable from the current node, adding them to
450 the end of the list [`first' or `solution' respectively] (if
451 they are visited the first time) or updating them in place (if
452 they have have already been visited).
453 In each node of either list, cost_lo and cost_hi contain the
454 minimum cost over any paths found up to now, starting at `fromset'
455 or `fromset_expand', ending at that node. best_cost_lo and
456 best_cost_hi represent the minimum over the elements of the
457 `solution' list. */
459 if (fromset_expand != NULL)
461 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
462 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
463 lastp = &first->next->next;
465 else
467 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
468 lastp = &first->next;
471 for (current = first; current != NULL; current = current->next)
473 /* Now match all the available module specifications against the
474 current charset name. If any of them matches check whether
475 we already have a derivation for this charset. If yes, use the
476 one with the lower costs. Otherwise add the new charset at the
477 end.
479 The module database is organized in a tree form which allows
480 searching for prefixes. So we search for the first entry with a
481 matching prefix and any other matching entry can be found from
482 this place. */
483 struct gconv_module *node;
485 /* Maybe it is not necessary anymore to look for a solution for
486 this entry since the cost is already as high (or higher) as
487 the cost for the best solution so far. */
488 if (current->cost_hi > best_cost_hi
489 || (current->cost_hi == best_cost_hi
490 && current->cost_lo >= best_cost_lo))
491 continue;
493 node = __gconv_modules_db;
494 while (node != NULL)
496 int cmpres = strcmp (current->result_set, node->from_string);
497 if (cmpres == 0)
499 /* Walk through the list of modules with this prefix and
500 try to match the name. */
501 struct gconv_module *runp;
503 /* Check all the modules with this prefix. */
504 runp = node;
507 const char *result_set = (strcmp (runp->to_string, "-") == 0
508 ? (toset_expand ?: toset)
509 : runp->to_string);
510 int cost_hi = runp->cost_hi + current->cost_hi;
511 int cost_lo = runp->cost_lo + current->cost_lo;
512 struct derivation_step *step;
514 /* We managed to find a derivation. First see whether
515 we have reached one of the goal nodes. */
516 if (strcmp (result_set, toset) == 0
517 || (toset_expand != NULL
518 && strcmp (result_set, toset_expand) == 0))
520 /* Append to the `solution' list if there
521 is no entry with this name. */
522 for (step = solution; step != NULL; step = step->next)
523 if (strcmp (result_set, step->result_set) == 0)
524 break;
526 if (step == NULL)
528 step = NEW_STEP (result_set,
529 cost_hi, cost_lo,
530 runp, current);
531 step->next = solution;
532 solution = step;
534 else if (step->cost_hi > cost_hi
535 || (step->cost_hi == cost_hi
536 && step->cost_lo > cost_lo))
538 /* A better path was found for the node,
539 on the `solution' list. */
540 step->code = runp;
541 step->last = current;
542 step->cost_hi = cost_hi;
543 step->cost_lo = cost_lo;
546 /* Update best_cost accordingly. */
547 if (cost_hi < best_cost_hi
548 || (cost_hi == best_cost_hi
549 && cost_lo < best_cost_lo))
551 best_cost_hi = cost_hi;
552 best_cost_lo = cost_lo;
555 else if (cost_hi < best_cost_hi
556 || (cost_hi == best_cost_hi
557 && cost_lo < best_cost_lo))
559 /* Append at the end of the `first' list if there
560 is no entry with this name. */
561 for (step = first; step != NULL; step = step->next)
562 if (strcmp (result_set, step->result_set) == 0)
563 break;
565 if (step == NULL)
567 *lastp = NEW_STEP (result_set,
568 cost_hi, cost_lo,
569 runp, current);
570 lastp = &(*lastp)->next;
572 else if (step->cost_hi > cost_hi
573 || (step->cost_hi == cost_hi
574 && step->cost_lo > cost_lo))
576 /* A better path was found for the node,
577 on the `first' list. */
578 step->code = runp;
579 step->last = current;
581 /* Update the cost for all steps. */
582 for (step = first; step != NULL;
583 step = step->next)
584 /* But don't update the start nodes. */
585 if (step->code != NULL)
587 struct derivation_step *back;
588 int hi, lo;
590 hi = step->code->cost_hi;
591 lo = step->code->cost_lo;
593 for (back = step->last; back->code != NULL;
594 back = back->last)
596 hi += back->code->cost_hi;
597 lo += back->code->cost_lo;
600 step->cost_hi = hi;
601 step->cost_lo = lo;
604 /* Likewise for the nodes on the solution list.
605 Also update best_cost accordingly. */
606 for (step = solution; step != NULL;
607 step = step->next)
609 step->cost_hi = (step->code->cost_hi
610 + step->last->cost_hi);
611 step->cost_lo = (step->code->cost_lo
612 + step->last->cost_lo);
614 if (step->cost_hi < best_cost_hi
615 || (step->cost_hi == best_cost_hi
616 && step->cost_lo < best_cost_lo))
618 best_cost_hi = step->cost_hi;
619 best_cost_lo = step->cost_lo;
625 runp = runp->same;
627 while (runp != NULL);
629 break;
631 else if (cmpres < 0)
632 node = node->left;
633 else
634 node = node->right;
638 if (solution != NULL)
640 /* We really found a way to do the transformation. */
642 /* Choose the best solution. This is easy because we know that
643 the solution list has at most length 2 (one for every possible
644 goal node). */
645 if (solution->next != NULL)
647 struct derivation_step *solution2 = solution->next;
649 if (solution2->cost_hi < solution->cost_hi
650 || (solution2->cost_hi == solution->cost_hi
651 && solution2->cost_lo < solution->cost_lo))
652 solution = solution2;
655 /* Now build a data structure describing the transformation steps. */
656 result = gen_steps (solution, toset_expand ?: toset,
657 fromset_expand ?: fromset, handle, nsteps);
659 else
661 /* We haven't found a transformation. Clear the result values. */
662 *handle = NULL;
663 *nsteps = 0;
666 /* Add result in any case to list of known derivations. */
667 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
668 *handle, *nsteps);
670 return result;
674 static const char *
675 do_lookup_alias (const char *name)
677 struct gconv_alias key;
678 struct gconv_alias **found;
680 key.fromname = (char *) name;
681 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
682 return found != NULL ? (*found)->toname : NULL;
687 __gconv_compare_alias (const char *name1, const char *name2)
689 int result;
691 /* Ensure that the configuration data is read. */
692 __gconv_load_conf ();
694 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
695 result = strcmp (do_lookup_alias (name1) ?: name1,
696 do_lookup_alias (name2) ?: name2);
698 return result;
703 __gconv_find_transform (const char *toset, const char *fromset,
704 struct __gconv_step **handle, size_t *nsteps,
705 int flags)
707 const char *fromset_expand;
708 const char *toset_expand;
709 int result;
711 /* Ensure that the configuration data is read. */
712 __gconv_load_conf ();
714 /* Acquire the lock. */
715 __libc_lock_lock (__gconv_lock);
717 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
718 if (result != __GCONV_NODB)
720 /* We have a cache and could resolve the request, successful or not. */
721 __libc_lock_unlock (__gconv_lock);
722 return result;
725 /* If we don't have a module database return with an error. */
726 if (__gconv_modules_db == NULL)
728 __libc_lock_unlock (__gconv_lock);
729 return __GCONV_NOCONV;
732 /* See whether the names are aliases. */
733 fromset_expand = do_lookup_alias (fromset);
734 toset_expand = do_lookup_alias (toset);
736 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
737 /* We are not supposed to create a pseudo transformation (means
738 copying) when the input and output character set are the same. */
739 && (strcmp (toset, fromset) == 0
740 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
741 || (fromset_expand != NULL
742 && (strcmp (toset, fromset_expand) == 0
743 || (toset_expand != NULL
744 && strcmp (toset_expand, fromset_expand) == 0)))))
746 /* Both character sets are the same. */
747 __libc_lock_unlock (__gconv_lock);
748 return __GCONV_NULCONV;
751 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
752 handle, nsteps);
754 /* Release the lock. */
755 __libc_lock_unlock (__gconv_lock);
757 /* The following code is necessary since `find_derivation' will return
758 GCONV_OK even when no derivation was found but the same request
759 was processed before. I.e., negative results will also be cached. */
760 return (result == __GCONV_OK
761 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
762 : result);
766 /* Release the entries of the modules list. */
768 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
770 int result = __GCONV_OK;
771 size_t cnt;
773 /* Acquire the lock. */
774 __libc_lock_lock (__gconv_lock);
776 #ifndef STATIC_GCONV
777 cnt = nsteps;
778 while (cnt-- > 0)
779 __gconv_release_step (&steps[cnt]);
780 #endif
782 /* If we use the cache we free a bit more since we don't keep any
783 transformation records around, they are cheap enough to
784 recreate. */
785 __gconv_release_cache (steps, nsteps);
787 /* Release the lock. */
788 __libc_lock_unlock (__gconv_lock);
790 return result;
794 /* Free the modules mentioned. */
795 static void
796 free_modules_db (struct gconv_module *node)
798 if (node->left != NULL)
799 free_modules_db (node->left);
800 if (node->right != NULL)
801 free_modules_db (node->right);
804 struct gconv_module *act = node;
805 node = node->same;
806 if (act->module_name[0] == '/')
807 free (act);
809 while (node != NULL);
813 /* Free all resources if necessary. */
814 void
815 __gconv_db_freemem (void)
817 /* First free locale memory. This needs to be done before freeing
818 derivations, as ctype cleanup functions dereference steps arrays which we
819 free below. */
820 _nl_locale_subfreeres ();
822 /* finddomain.c has similar problem. */
823 extern void _nl_finddomain_subfreeres (void) attribute_hidden;
824 _nl_finddomain_subfreeres ();
826 if (__gconv_alias_db != NULL)
827 __tdestroy (__gconv_alias_db, free);
829 if (__gconv_modules_db != NULL)
830 free_modules_db (__gconv_modules_db);
832 if (known_derivations != NULL)
833 __tdestroy (known_derivations, free_derivation);