Update.
[glibc.git] / iconv / gconv_db.c
blobf4a86e0cd060899986dce68454418464bc6c60f1
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
28 #include <dlfcn.h>
29 #include <gconv_int.h>
32 /* Simple data structure for alias mapping. We have two names, `from'
33 and `to'. */
34 void *__gconv_alias_db;
36 /* Array with available modules. */
37 struct gconv_module *__gconv_modules_db;
39 /* We modify global data. */
40 __libc_lock_define_initialized (static, lock)
43 /* Function for searching alias. */
44 int
45 __gconv_alias_compare (const void *p1, const void *p2)
47 struct gconv_alias *s1 = (struct gconv_alias *) p1;
48 struct gconv_alias *s2 = (struct gconv_alias *) p2;
49 return strcmp (s1->fromname, s2->fromname);
53 /* To search for a derivation we create a list of intermediate steps.
54 Each element contains a pointer to the element which precedes it
55 in the derivation order. */
56 struct derivation_step
58 const char *result_set;
59 size_t result_set_len;
60 int cost_lo;
61 int cost_hi;
62 struct gconv_module *code;
63 struct derivation_step *last;
64 struct derivation_step *next;
67 #define NEW_STEP(result, hi, lo, module, last_mod) \
68 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
69 newp->result_set = result; \
70 newp->result_set_len = strlen (result); \
71 newp->cost_hi = hi; \
72 newp->cost_lo = lo; \
73 newp->code = module; \
74 newp->last = last_mod; \
75 newp->next = NULL; \
76 newp; })
79 /* If a specific transformation is used more than once we should not need
80 to start looking for it again. Instead cache each successful result. */
81 struct known_derivation
83 const char *from;
84 const char *to;
85 struct __gconv_step *steps;
86 size_t nsteps;
89 /* Compare function for database of found derivations. */
90 static int
91 derivation_compare (const void *p1, const void *p2)
93 struct known_derivation *s1 = (struct known_derivation *) p1;
94 struct known_derivation *s2 = (struct known_derivation *) p2;
95 int result;
97 result = strcmp (s1->from, s2->from);
98 if (result == 0)
99 result = strcmp (s1->to, s2->to);
100 return result;
103 /* The search tree for known derivations. */
104 static void *known_derivations;
106 /* Look up whether given transformation was already requested before. */
107 static int
108 internal_function
109 derivation_lookup (const char *fromset, const char *toset,
110 struct __gconv_step **handle, size_t *nsteps)
112 struct known_derivation key = { fromset, toset, NULL, 0 };
113 struct known_derivation **result;
115 result = __tfind (&key, &known_derivations, derivation_compare);
117 if (result == NULL)
118 return __GCONV_NOCONV;
120 *handle = (*result)->steps;
121 *nsteps = (*result)->nsteps;
123 /* Please note that we return GCONV_OK even if the last search for
124 this transformation was unsuccessful. */
125 return __GCONV_OK;
128 /* Add new derivation to list of known ones. */
129 static void
130 internal_function
131 add_derivation (const char *fromset, const char *toset,
132 struct __gconv_step *handle, size_t nsteps)
134 struct known_derivation *new_deriv;
135 size_t fromset_len = strlen (fromset) + 1;
136 size_t toset_len = strlen (toset) + 1;
138 new_deriv = (struct known_derivation *)
139 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
140 if (new_deriv != NULL)
142 new_deriv->from = (char *) (new_deriv + 1);
143 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
144 toset, toset_len);
146 new_deriv->steps = handle;
147 new_deriv->nsteps = nsteps;
149 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
150 == NULL)
151 /* There is some kind of memory allocation problem. */
152 free (new_deriv);
154 /* Please note that we don't complain if the allocation failed. This
155 is not tragically but in case we use the memory debugging facilities
156 not all memory will be freed. */
159 static void
160 free_derivation (void *p)
162 struct known_derivation *deriv = (struct known_derivation *) p;
163 size_t cnt;
165 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
166 if (deriv->steps[cnt].__end_fct)
167 DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt]));
169 /* Free the name strings. */
170 free ((char *) deriv->steps[0].__from_name);
171 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
173 free ((struct __gconv_step *) deriv->steps);
174 free (deriv);
178 static int
179 internal_function
180 gen_steps (struct derivation_step *best, const char *toset,
181 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
183 size_t step_cnt = 0;
184 struct __gconv_step *result;
185 struct derivation_step *current;
186 int status = __GCONV_NOMEM;
188 /* First determine number of steps. */
189 for (current = best; current->last != NULL; current = current->last)
190 ++step_cnt;
192 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
193 * step_cnt);
194 if (result != NULL)
196 int failed = 0;
198 status = __GCONV_OK;
199 *nsteps = step_cnt;
200 current = best;
201 while (step_cnt-- > 0)
203 result[step_cnt].__from_name = (step_cnt == 0
204 ? __strdup (fromset)
205 : current->last->result_set);
206 result[step_cnt].__to_name = (step_cnt + 1 == *nsteps
207 ? __strdup (current->result_set)
208 : result[step_cnt + 1].__from_name);
210 #ifndef STATIC_GCONV
211 if (current->code->module_name[0] == '/')
213 /* Load the module, return handle for it. */
214 struct __gconv_loaded_object *shlib_handle =
215 __gconv_find_shlib (current->code->module_name);
217 if (shlib_handle == NULL)
219 failed = 1;
220 break;
223 result[step_cnt].__shlib_handle = shlib_handle;
224 result[step_cnt].__modname = shlib_handle->name;
225 result[step_cnt].__counter = 1;
226 result[step_cnt].__fct = shlib_handle->fct;
227 result[step_cnt].__init_fct = shlib_handle->init_fct;
228 result[step_cnt].__end_fct = shlib_handle->end_fct;
230 else
231 #endif
232 /* It's a builtin transformation. */
233 __gconv_get_builtin_trans (current->code->module_name,
234 &result[step_cnt]);
236 /* Call the init function. */
237 if (result[step_cnt].__init_fct != NULL)
239 status = DL_CALL_FCT (result[step_cnt].__init_fct,
240 (&result[step_cnt]));
242 if (status != __GCONV_OK)
244 failed = 1;
245 /* Make sure we unload this modules. */
246 --step_cnt;
247 break;
251 current = current->last;
254 if (failed != 0)
256 /* Something went wrong while initializing the modules. */
257 while (++step_cnt < *nsteps)
259 if (result[step_cnt].__end_fct != NULL)
260 DL_CALL_FCT (result[step_cnt].__end_fct, (&result[step_cnt]));
261 #ifndef STATIC_GCONV
262 __gconv_release_shlib (result[step_cnt].__shlib_handle);
263 #endif
265 free (result);
266 *nsteps = 0;
267 *handle = NULL;
268 if (status == __GCONV_OK)
269 status = __GCONV_NOCONV;
271 else
272 *handle = result;
274 else
276 *nsteps = 0;
277 *handle = NULL;
280 return status;
284 #ifndef STATIC_GCONV
285 static int
286 internal_function
287 increment_counter (struct __gconv_step *steps, size_t nsteps)
289 /* Increment the user counter. */
290 size_t cnt = nsteps;
291 int result = __GCONV_OK;
293 while (cnt-- > 0)
294 if (steps[cnt].__counter++ == 0)
296 steps[cnt].__shlib_handle =
297 __gconv_find_shlib (steps[cnt].__modname);
298 if (steps[cnt].__shlib_handle == NULL)
300 /* Oops, this is the second time we use this module (after
301 unloading) and this time loading failed!? */
302 while (++cnt < nsteps)
303 __gconv_release_shlib (steps[cnt].__shlib_handle);
304 result = __GCONV_NOCONV;
305 break;
308 return result;
310 #endif
313 /* The main function: find a possible derivation from the `fromset' (either
314 the given name or the alias) to the `toset' (again with alias). */
315 static int
316 internal_function
317 find_derivation (const char *toset, const char *toset_expand,
318 const char *fromset, const char *fromset_expand,
319 struct __gconv_step **handle, size_t *nsteps)
321 struct derivation_step *first, *current, **lastp, *solution = NULL;
322 int best_cost_hi = INT_MAX;
323 int best_cost_lo = INT_MAX;
324 int result;
326 /* There is a small chance that this derivation is meanwhile found. This
327 can happen if in `find_derivation' we look for this derivation, didn't
328 find it but at the same time another thread looked for this derivation. */
329 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
330 handle, nsteps);
331 if (result == __GCONV_OK)
333 #ifndef STATIC_GCONV
334 result = increment_counter (*handle, *nsteps);
335 #endif
336 return result;
339 /* For now we use a simple algorithm with quadratic runtime behaviour.
340 The task is to match the `toset' with any of the available rules,
341 starting from FROMSET. */
342 if (fromset_expand != NULL)
344 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
345 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
346 lastp = &first->next->next;
348 else
350 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
351 lastp = &first->next;
354 for (current = first; current != NULL; current = current->next)
356 /* Now match all the available module specifications against the
357 current charset name. If any of them matches check whether
358 we already have a derivation for this charset. If yes, use the
359 one with the lower costs. Otherwise add the new charset at the
360 end.
362 The module database is organized in a tree form which allows
363 searching for prefixes. So we search for the first entry with a
364 matching prefix and any other matching entry can be found from
365 this place. */
366 struct gconv_module *node = __gconv_modules_db;
368 /* Maybe it is not necessary anymore to look for a solution for
369 this entry since the cost is already as high (or heigher) as
370 the cost for the best solution so far. */
371 if (current->cost_hi > best_cost_hi
372 || (current->cost_hi == best_cost_hi
373 && current->cost_lo >= best_cost_lo))
374 continue;
376 while (node != NULL)
378 int cmpres = strncmp (current->result_set, node->from_constpfx,
379 MIN (current->result_set_len,
380 node->from_constpfx_len));
382 if (cmpres == 0)
384 /* Walk through the list of modules with this prefix and
385 try to match the name. */
386 struct gconv_module *runp;
388 if (current->result_set_len < node->from_constpfx_len)
389 /* Cannot possibly match. */
390 break;
392 /* Check all the modules with this prefix. */
393 runp = node;
396 const char *result_set = NULL;
398 if (runp->from_pattern == NULL)
400 /* This is a simple entry and therefore we have a
401 found an matching entry if the strings are really
402 equal. */
403 if (current->result_set_len == runp->from_constpfx_len)
405 if (strcmp (runp->to_string, "-") == 0)
406 result_set = toset_expand ?: toset;
407 else
408 result_set = runp->to_string;
411 else
413 /* Compile the regular expression if necessary. */
414 if (runp->from_regex == NULL)
416 if (__regcomp (&runp->from_regex_mem,
417 runp->from_pattern,
418 REG_EXTENDED | REG_ICASE) != 0)
419 /* Something is wrong. Remember this. */
420 runp->from_regex = (regex_t *) -1L;
421 else
422 runp->from_regex = &runp->from_regex_mem;
425 if (runp->from_regex != (regex_t *) -1L)
427 regmatch_t match[4];
429 /* Try to match the regular expression. */
430 if (__regexec (runp->from_regex, current->result_set,
431 4, match, 0) == 0
432 && match[0].rm_so == 0
433 && current->result_set[match[0].rm_eo] == '\0')
435 /* At least the whole <from> string is matched.
436 We must now match sed-like possible
437 subexpressions from the match to the
438 toset expression. */
439 #define ENSURE_LEN(LEN) \
440 if (wp + (LEN) >= constr + len - 1) \
442 char *newp = alloca (len += 128); \
443 wp = __mempcpy (newp, constr, wp - constr); \
444 constr = newp; \
446 size_t len = 128;
447 char *constr = alloca (len);
448 char *wp = constr;
449 const char *cp = runp->to_string;
451 while (*cp != '\0')
453 if (*cp != '\\')
455 ENSURE_LEN (1);
456 *wp++ = *cp++;
458 else if (cp[1] == '\0')
459 /* Backslash at end of string. */
460 break;
461 else
463 ++cp;
464 if (*cp == '\\')
466 *wp++ = *cp++;
467 ENSURE_LEN (1);
469 else if (*cp < '1' || *cp > '3')
470 break;
471 else
473 int idx = *cp - '0';
474 if (match[idx].rm_so == -1)
475 /* No match. */
476 break;
478 ENSURE_LEN (match[idx].rm_eo
479 - match[idx].rm_so);
480 wp = __mempcpy (wp,
481 &current->result_set[match[idx].rm_so],
482 match[idx].rm_eo
483 - match[idx].rm_so);
484 ++cp;
488 if (*cp == '\0' && wp != constr)
490 /* Terminate the constructed string. */
491 *wp = '\0';
492 result_set = constr;
498 if (result_set != NULL)
500 int cost_hi = runp->cost_hi + current->cost_hi;
501 int cost_lo = runp->cost_lo + current->cost_lo;
502 struct derivation_step *step;
504 /* We managed to find a derivation. First see whether
505 this is what we are looking for. */
506 if (strcmp (result_set, toset) == 0
507 || (toset_expand != NULL
508 && strcmp (result_set, toset_expand) == 0))
510 if (solution == NULL || cost_hi < best_cost_hi
511 || (cost_hi == best_cost_hi
512 && cost_lo < best_cost_lo))
514 best_cost_hi = cost_hi;
515 best_cost_lo = cost_lo;
518 /* Append this solution to list. */
519 if (solution == NULL)
520 solution = NEW_STEP (result_set, 0, 0, runp,
521 current);
522 else
524 while (solution->next != NULL)
525 solution = solution->next;
527 solution->next = NEW_STEP (result_set, 0, 0,
528 runp, current);
531 else if (cost_hi < best_cost_hi
532 || (cost_hi == best_cost_hi
533 && cost_lo < best_cost_lo))
535 /* Append at the end if there is no entry with
536 this name. */
537 for (step = first; step != NULL; step = step->next)
538 if (strcmp (result_set, step->result_set) == 0)
539 break;
541 if (step == NULL)
543 *lastp = NEW_STEP (result_set,
544 cost_hi, cost_lo,
545 runp, current);
546 lastp = &(*lastp)->next;
548 else if (step->cost_hi > cost_hi
549 || (step->cost_hi == cost_hi
550 && step->cost_lo > cost_lo))
552 step->code = runp;
553 step->last = current;
555 /* Update the cost for all steps. */
556 for (step = first; step != NULL;
557 step = step->next)
559 struct derivation_step *back;
561 if (step->code == NULL)
562 /* This is one of the entries we started
563 from. */
564 continue;
566 step->cost_hi = step->code->cost_hi;
567 step->cost_lo = step->code->cost_lo;
569 for (back = step->last; back->code != NULL;
570 back = back->last)
572 step->cost_hi += back->code->cost_hi;
573 step->cost_lo += back->code->cost_lo;
577 for (step = solution; step != NULL;
578 step = step->next)
580 step->cost_hi = (step->code->cost_hi
581 + step->last->cost_hi);
582 step->cost_lo = (step->code->cost_lo
583 + step->last->cost_lo);
585 if (step->cost_hi < best_cost_hi
586 || (step->cost_hi == best_cost_hi
587 && step->cost_lo < best_cost_lo))
589 solution = step;
590 best_cost_hi = step->cost_hi;
591 best_cost_lo = step->cost_lo;
598 runp = runp->same;
600 while (runp != NULL);
602 if (current->result_set_len == node->from_constpfx_len)
603 break;
605 node = node->matching;
607 else if (cmpres < 0)
608 node = node->left;
609 else
610 node = node->right;
614 if (solution != NULL)
615 /* We really found a way to do the transformation. Now build a data
616 structure describing the transformation steps.*/
617 result = gen_steps (solution, toset_expand ?: toset,
618 fromset_expand ?: fromset, handle, nsteps);
619 else
621 /* We haven't found a transformation. Clear the result values. */
622 *handle = NULL;
623 *nsteps = 0;
626 /* Add result in any case to list of known derivations. */
627 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
628 *handle, *nsteps);
630 return result;
635 internal_function
636 __gconv_find_transform (const char *toset, const char *fromset,
637 struct __gconv_step **handle, size_t *nsteps,
638 int flags)
640 __libc_once_define (static, once);
641 const char *fromset_expand = NULL;
642 const char *toset_expand = NULL;
643 int result;
645 /* Ensure that the configuration data is read. */
646 __libc_once (once, __gconv_read_conf);
648 /* Acquire the lock. */
649 __libc_lock_lock (lock);
651 /* If we don't have a module database return with an error. */
652 if (__gconv_modules_db == NULL)
654 __libc_lock_unlock (lock);
655 return __GCONV_NOCONV;
658 /* See whether the names are aliases. */
659 if (__gconv_alias_db != NULL)
661 struct gconv_alias key;
662 struct gconv_alias **found;
664 key.fromname = fromset;
665 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
666 fromset_expand = found != NULL ? (*found)->toname : NULL;
668 key.fromname = toset;
669 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
670 toset_expand = found != NULL ? (*found)->toname : NULL;
673 if ((flags & GCONV_AVOID_NOCONV)
674 /* We are not supposed to create a pseudo transformation (means
675 copying) when the input and output character set are the same. */
676 && (strcmp (toset, fromset) == 0
677 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
678 || (fromset_expand != NULL
679 && (strcmp (toset, fromset_expand) == 0
680 || (toset_expand != NULL
681 && strcmp (toset_expand, fromset_expand) == 0)))))
683 /* Both character sets are the same. */
684 __libc_lock_unlock (lock);
685 return __GCONV_NOCONV;
688 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
689 handle, nsteps);
691 /* Release the lock. */
692 __libc_lock_unlock (lock);
694 /* The following code is necessary since `find_derivation' will return
695 GCONV_OK even when no derivation was found but the same request
696 was processed before. I.e., negative results will also be cached. */
697 return (result == __GCONV_OK
698 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
699 : result);
703 /* Release the entries of the modules list. */
705 internal_function
706 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
708 int result = __GCONV_OK;
710 #ifndef STATIC_GCONV
711 /* Acquire the lock. */
712 __libc_lock_lock (lock);
714 while (nsteps-- > 0)
715 if (steps[nsteps].__shlib_handle != NULL
716 && --steps[nsteps].__counter == 0)
718 result = __gconv_release_shlib (steps[nsteps].__shlib_handle);
719 if (result != __GCONV_OK)
720 break;
721 steps[nsteps].__shlib_handle = NULL;
724 /* Release the lock. */
725 __libc_lock_unlock (lock);
726 #endif
728 return result;
732 /* Free the modules mentioned. */
733 static void
734 internal_function
735 free_modules_db (struct gconv_module *node)
737 if (node->left != NULL)
738 free_modules_db (node->left);
739 if (node->right != NULL)
740 free_modules_db (node->right);
741 if (node->same != NULL)
742 free_modules_db (node->same);
745 struct gconv_module *act = node;
746 node = node->matching;
747 if (act->module_name[0] == '/')
748 free (act);
750 while (node != NULL);
754 /* Free all resources if necessary. */
755 static void __attribute__ ((unused))
756 free_mem (void)
758 if (__gconv_alias_db != NULL)
759 __tdestroy (__gconv_alias_db, free);
761 if (__gconv_modules_db != NULL)
762 free_modules_db (__gconv_modules_db);
764 if (known_derivations != NULL)
765 __tdestroy (known_derivations, free_derivation);
768 text_set_element (__libc_subfreeres, free_mem);