Update.
[glibc.git] / iconv / gconv_db.c
blob4abc1ae48b631caadb359fd66da4b47a13b55d36
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <limits.h>
22 #include <search.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
28 #include <ldsodefs.h>
29 #include <gconv_int.h>
32 /* Simple data structure for alias mapping. We have two names, `from'
33 and `to'. */
34 void *__gconv_alias_db;
36 /* Array with available modules. */
37 struct gconv_module *__gconv_modules_db;
39 /* We modify global data. */
40 __libc_lock_define_initialized (static, lock)
43 /* Function for searching alias. */
44 int
45 __gconv_alias_compare (const void *p1, const void *p2)
47 struct gconv_alias *s1 = (struct gconv_alias *) p1;
48 struct gconv_alias *s2 = (struct gconv_alias *) p2;
49 return strcmp (s1->fromname, s2->fromname);
53 /* To search for a derivation we create a list of intermediate steps.
54 Each element contains a pointer to the element which precedes it
55 in the derivation order. */
56 struct derivation_step
58 const char *result_set;
59 size_t result_set_len;
60 int cost_lo;
61 int cost_hi;
62 struct gconv_module *code;
63 struct derivation_step *last;
64 struct derivation_step *next;
67 #define NEW_STEP(result, hi, lo, module, last_mod) \
68 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
69 newp->result_set = result; \
70 newp->result_set_len = strlen (result); \
71 newp->cost_hi = hi; \
72 newp->cost_lo = lo; \
73 newp->code = module; \
74 newp->last = last_mod; \
75 newp->next = NULL; \
76 newp; })
79 /* If a specific transformation is used more than once we should not need
80 to start looking for it again. Instead cache each successful result. */
81 struct known_derivation
83 const char *from;
84 const char *to;
85 struct gconv_step *steps;
86 size_t nsteps;
89 /* Compare function for database of found derivations. */
90 static int
91 derivation_compare (const void *p1, const void *p2)
93 struct known_derivation *s1 = (struct known_derivation *) p1;
94 struct known_derivation *s2 = (struct known_derivation *) p2;
95 int result;
97 result = strcmp (s1->from, s2->from);
98 if (result == 0)
99 result = strcmp (s1->to, s2->to);
100 return result;
103 /* The search tree for known derivations. */
104 static void *known_derivations;
106 /* Look up whether given transformation was already requested before. */
107 static int
108 internal_function
109 derivation_lookup (const char *fromset, const char *toset,
110 struct gconv_step **handle, size_t *nsteps)
112 struct known_derivation key = { fromset, toset, NULL, 0 };
113 struct known_derivation **result;
115 result = __tfind (&key, &known_derivations, derivation_compare);
117 if (result == NULL)
118 return GCONV_NOCONV;
120 *handle = (*result)->steps;
121 *nsteps = (*result)->nsteps;
123 /* Please note that we return GCONV_OK even if the last search for
124 this transformation was unsuccessful. */
125 return GCONV_OK;
128 /* Add new derivation to list of known ones. */
129 static void
130 internal_function
131 add_derivation (const char *fromset, const char *toset,
132 struct gconv_step *handle, size_t nsteps)
134 struct known_derivation *new_deriv;
135 size_t fromset_len = strlen (fromset) + 1;
136 size_t toset_len = strlen (toset) + 1;
138 new_deriv = (struct known_derivation *)
139 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
140 if (new_deriv != NULL)
142 new_deriv->from = (char *) (new_deriv + 1);
143 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
144 toset, toset_len);
146 new_deriv->steps = handle;
147 new_deriv->nsteps = nsteps;
149 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
150 == NULL)
151 /* There is some kind of memory allocation problem. */
152 free (new_deriv);
154 /* Please note that we don't complain if the allocation failed. This
155 is not tragically but in case we use the memory debugging facilities
156 not all memory will be freed. */
159 static void
160 free_derivation (void *p)
162 struct known_derivation *deriv = (struct known_derivation *) p;
163 size_t cnt;
165 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
166 if (deriv->steps[cnt].end_fct)
167 _CALL_DL_FCT (deriv->steps[cnt].end_fct, (&deriv->steps[cnt]));
169 /* Free the name strings. */
170 free ((char *) deriv->steps[0].from_name);
171 free ((char *) deriv->steps[deriv->nsteps - 1].to_name);
173 free ((struct gconv_step *) deriv->steps);
174 free (deriv);
178 static int
179 internal_function
180 gen_steps (struct derivation_step *best, const char *toset,
181 const char *fromset, struct gconv_step **handle, size_t *nsteps)
183 size_t step_cnt = 0;
184 struct gconv_step *result;
185 struct derivation_step *current;
186 int status = GCONV_NOMEM;
188 /* First determine number of steps. */
189 for (current = best; current->last != NULL; current = current->last)
190 ++step_cnt;
192 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
193 * step_cnt);
194 if (result != NULL)
196 int failed = 0;
198 status = GCONV_OK;
199 *nsteps = step_cnt;
200 current = best;
201 while (step_cnt-- > 0)
203 result[step_cnt].from_name = (step_cnt == 0
204 ? __strdup (fromset)
205 : current->last->result_set);
206 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
207 ? __strdup (current->result_set)
208 : result[step_cnt + 1].from_name);
210 #ifndef STATIC_GCONV
211 if (current->code->module_name[0] == '/')
213 /* Load the module, return handle for it. */
214 struct gconv_loaded_object *shlib_handle =
215 __gconv_find_shlib (current->code->module_name);
217 if (shlib_handle == NULL)
219 failed = 1;
220 break;
223 result[step_cnt].shlib_handle = shlib_handle;
224 result[step_cnt].modname = shlib_handle->name;
225 result[step_cnt].counter = 0;
226 result[step_cnt].fct = shlib_handle->fct;
227 result[step_cnt].init_fct = shlib_handle->init_fct;
228 result[step_cnt].end_fct = shlib_handle->end_fct;
230 else
231 #endif
232 /* It's a builtin transformation. */
233 __gconv_get_builtin_trans (current->code->module_name,
234 &result[step_cnt]);
236 /* Call the init function. */
237 if (result[step_cnt].init_fct != NULL)
239 status = _CALL_DL_FCT (result[step_cnt].init_fct,
240 (&result[step_cnt]));
242 if (status != GCONV_OK)
244 failed = 1;
245 /* Make sure we unload this modules. */
246 --step_cnt;
247 break;
251 current = current->last;
254 if (failed != 0)
256 /* Something went wrong while initializing the modules. */
257 while (++step_cnt < *nsteps)
259 if (result[step_cnt].end_fct != NULL)
260 _CALL_DL_FCT (result[step_cnt].end_fct, (&result[step_cnt]));
261 #ifndef STATIC_GCONV
262 __gconv_release_shlib (result[step_cnt].shlib_handle);
263 #endif
265 free (result);
266 *nsteps = 0;
267 *handle = NULL;
268 if (status == GCONV_OK)
269 status = GCONV_NOCONV;
271 else
272 *handle = result;
274 else
276 *nsteps = 0;
277 *handle = NULL;
280 return status;
284 /* The main function: find a possible derivation from the `fromset' (either
285 the given name or the alias) to the `toset' (again with alias). */
286 static int
287 internal_function
288 find_derivation (const char *toset, const char *toset_expand,
289 const char *fromset, const char *fromset_expand,
290 struct gconv_step **handle, size_t *nsteps)
292 __libc_lock_define_initialized (static, lock)
293 struct derivation_step *first, *current, **lastp, *solution = NULL;
294 int best_cost_hi = INT_MAX;
295 int best_cost_lo = INT_MAX;
296 int result;
298 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
299 handle, nsteps);
300 if (result == GCONV_OK)
301 return result;
303 __libc_lock_lock (lock);
305 /* There is a small chance that this derivation is meanwhile found. This
306 can happen if in `find_derivation' we look for this derivation, didn't
307 find it but at the same time another thread looked for this derivation. */
308 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
309 handle, nsteps);
310 if (result == GCONV_OK)
312 __libc_lock_unlock (lock);
313 return result;
316 /* For now we use a simple algorithm with quadratic runtime behaviour.
317 The task is to match the `toset' with any of the available rules,
318 starting from FROMSET. */
319 if (fromset_expand != NULL)
321 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
322 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
323 lastp = &first->next->next;
325 else
327 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
328 lastp = &first->next;
331 for (current = first; current != NULL; current = current->next)
333 /* Now match all the available module specifications against the
334 current charset name. If any of them matches check whether
335 we already have a derivation for this charset. If yes, use the
336 one with the lower costs. Otherwise add the new charset at the
337 end.
339 The module database is organized in a tree form which allows to
340 search for prefixes. So we search for the first entry with a
341 matching prefix and any other matching entry can be found from
342 this place. */
343 struct gconv_module *node = __gconv_modules_db;
345 /* Maybe it is not necessary anymore to look for a solution for
346 this entry since the cost is already as high (or heigher) as
347 the cost for the best solution so far. */
348 if (current->cost_hi > best_cost_hi
349 || (current->cost_hi == best_cost_hi
350 && current->cost_lo >= best_cost_lo))
351 continue;
353 while (node != NULL)
355 int cmpres = strncmp (current->result_set, node->from_constpfx,
356 MIN (current->result_set_len,
357 node->from_constpfx_len));
359 if (cmpres == 0)
361 /* Walk through the list of modules with this prefix and
362 try to match the name. */
363 struct gconv_module *runp;
365 if (current->result_set_len < node->from_constpfx_len)
366 /* Cannot possibly match. */
367 break;
369 /* Check all the modules with this prefix. */
370 runp = node;
373 const char *result_set = NULL;
375 if (runp->from_pattern == NULL)
377 /* This is a simple entry and therefore we have a
378 found an matching entry if the strings are really
379 equal. */
380 if (current->result_set_len == runp->from_constpfx_len)
382 if (strcmp (runp->to_string, "-") == 0)
383 result_set = toset_expand ?: toset;
384 else
385 result_set = runp->to_string;
388 else
390 /* Compile the regular expression if necessary. */
391 if (runp->from_regex == NULL)
393 if (__regcomp (&runp->from_regex_mem,
394 runp->from_pattern,
395 REG_EXTENDED | REG_ICASE) != 0)
396 /* Something is wrong. Remember this. */
397 runp->from_regex = (regex_t *) -1L;
398 else
399 runp->from_regex = &runp->from_regex_mem;
402 if (runp->from_regex != (regex_t *) -1L)
404 regmatch_t match[4];
406 /* Try to match the regular expression. */
407 if (__regexec (runp->from_regex, current->result_set,
408 4, match, 0) == 0
409 && match[0].rm_so == 0
410 && current->result_set[match[0].rm_eo] == '\0')
412 /* At least the whole <from> string is matched.
413 We must now match sed-like possible
414 subexpressions from the match to the
415 toset expression. */
416 #define ENSURE_LEN(LEN) \
417 if (wp + (LEN) >= constr + len - 1) \
419 char *newp = alloca (len += 128); \
420 wp = __mempcpy (newp, constr, wp - constr); \
421 constr = newp; \
423 size_t len = 128;
424 char *constr = alloca (len);
425 char *wp = constr;
426 const char *cp = runp->to_string;
428 while (*cp != '\0')
430 if (*cp != '\\')
432 ENSURE_LEN (1);
433 *wp++ = *cp++;
435 else if (cp[1] == '\0')
436 /* Backslash at end of string. */
437 break;
438 else
440 ++cp;
441 if (*cp == '\\')
443 *wp++ = *cp++;
444 ENSURE_LEN (1);
446 else if (*cp < '1' || *cp > '3')
447 break;
448 else
450 int idx = *cp - '0';
451 if (match[idx].rm_so == -1)
452 /* No match. */
453 break;
455 ENSURE_LEN (match[idx].rm_eo
456 - match[idx].rm_so);
457 wp = __mempcpy (wp,
458 &current->result_set[match[idx].rm_so],
459 match[idx].rm_eo
460 - match[idx].rm_so);
461 ++cp;
465 if (*cp == '\0' && wp != constr)
467 /* Terminate the constructed string. */
468 *wp = '\0';
469 result_set = constr;
475 if (result_set != NULL)
477 int cost_hi = runp->cost_hi + current->cost_hi;
478 int cost_lo = runp->cost_lo + current->cost_lo;
479 struct derivation_step *step;
481 /* We managed to find a derivation. First see whether
482 this is what we are looking for. */
483 if (strcmp (result_set, toset) == 0
484 || (toset_expand != NULL
485 && strcmp (result_set, toset_expand) == 0))
487 if (solution == NULL || cost_hi < best_cost_hi
488 || (cost_hi == best_cost_hi
489 && cost_lo < best_cost_lo))
491 best_cost_hi = cost_hi;
492 best_cost_lo = cost_lo;
495 /* Append this solution to list. */
496 if (solution == NULL)
497 solution = NEW_STEP (result_set, 0, 0, runp,
498 current);
499 else
501 while (solution->next != NULL)
502 solution = solution->next;
504 solution->next = NEW_STEP (result_set, 0, 0,
505 runp, current);
508 else if (cost_hi < best_cost_hi
509 || (cost_hi == best_cost_hi
510 && cost_lo < best_cost_lo))
512 /* Append at the end if there is no entry with
513 this name. */
514 for (step = first; step != NULL; step = step->next)
515 if (strcmp (result_set, step->result_set) == 0)
516 break;
518 if (step == NULL)
520 *lastp = NEW_STEP (result_set,
521 cost_hi, cost_lo,
522 runp, current);
523 lastp = &(*lastp)->next;
525 else if (step->cost_hi > cost_hi
526 || (step->cost_hi == cost_hi
527 && step->cost_lo > cost_lo))
529 step->code = runp;
530 step->last = current;
532 /* Update the cost for all steps. */
533 for (step = first; step != NULL;
534 step = step->next)
536 struct derivation_step *back;
538 if (step->code == NULL)
539 /* This is one of the entries we started
540 from. */
541 continue;
543 step->cost_hi = step->code->cost_hi;
544 step->cost_lo = step->code->cost_lo;
546 for (back = step->last; back->code != NULL;
547 back = back->last)
549 step->cost_hi += back->code->cost_hi;
550 step->cost_lo += back->code->cost_lo;
554 for (step = solution; step != NULL;
555 step = step->next)
557 step->cost_hi = (step->code->cost_hi
558 + step->last->cost_hi);
559 step->cost_lo = (step->code->cost_lo
560 + step->last->cost_lo);
562 if (step->cost_hi < best_cost_hi
563 || (step->cost_hi == best_cost_hi
564 && step->cost_lo < best_cost_lo))
566 solution = step;
567 best_cost_hi = step->cost_hi;
568 best_cost_lo = step->cost_lo;
575 runp = runp->same;
577 while (runp != NULL);
579 if (current->result_set_len == node->from_constpfx_len)
580 break;
582 node = node->matching;
584 else if (cmpres < 0)
585 node = node->left;
586 else
587 node = node->right;
591 if (solution != NULL)
592 /* We really found a way to do the transformation. Now build a data
593 structure describing the transformation steps.*/
594 result = gen_steps (solution, toset_expand ?: toset,
595 fromset_expand ?: fromset, handle, nsteps);
596 else
598 /* We haven't found a transformation. Clear the result values. */
599 *handle = NULL;
600 *nsteps = 0;
603 /* Add result in any case to list of known derivations. */
604 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
605 *handle, *nsteps);
607 __libc_lock_unlock (lock);
609 return result;
614 internal_function
615 __gconv_find_transform (const char *toset, const char *fromset,
616 struct gconv_step **handle, size_t *nsteps)
618 __libc_once_define (static, once);
619 const char *fromset_expand = NULL;
620 const char *toset_expand = NULL;
621 int result;
623 /* Ensure that the configuration data is read. */
624 __libc_once (once, __gconv_read_conf);
626 /* Acquire the lock. */
627 __libc_lock_lock (lock);
629 /* If we don't have a module database return with an error. */
630 if (__gconv_modules_db == NULL)
632 __libc_lock_unlock (lock);
633 return GCONV_NOCONV;
636 /* See whether the names are aliases. */
637 if (__gconv_alias_db != NULL)
639 struct gconv_alias key;
640 struct gconv_alias **found;
642 key.fromname = fromset;
643 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
644 fromset_expand = found != NULL ? (*found)->toname : NULL;
646 key.fromname = toset;
647 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
648 toset_expand = found != NULL ? (*found)->toname : NULL;
651 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
652 handle, nsteps);
654 #ifndef STATIC_GCONV
655 /* Increment the user counter. */
656 if (result == GCONV_OK)
658 size_t cnt = *nsteps;
659 struct gconv_step *steps = *handle;
661 while (cnt > 0)
662 if (steps[--cnt].counter++ == 0)
664 steps[cnt].shlib_handle =
665 __gconv_find_shlib (steps[cnt].modname);
666 if (steps[cnt].shlib_handle == NULL)
668 /* Oops, this is the second time we use this module (after
669 unloading) and this time loading failed!? */
670 while (++cnt < *nsteps)
671 __gconv_release_shlib (steps[cnt].shlib_handle);
672 result = GCONV_NOCONV;
673 break;
677 #endif
679 /* Release the lock. */
680 __libc_lock_unlock (lock);
682 /* The following code is necessary since `find_derivation' will return
683 GCONV_OK even when no derivation was found but the same request
684 was processed before. I.e., negative results will also be cached. */
685 return (result == GCONV_OK
686 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
687 : result);
691 /* Release the entries of the modules list. */
693 internal_function
694 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
696 int result = GCONV_OK;
698 #ifndef STATIC_GCONV
699 /* Acquire the lock. */
700 __libc_lock_lock (lock);
702 while (nsteps-- > 0)
703 if (steps[nsteps].shlib_handle != NULL
704 && --steps[nsteps].counter == 0)
706 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
707 if (result != GCONV_OK)
708 break;
709 steps[nsteps].shlib_handle = NULL;
712 /* Release the lock. */
713 __libc_lock_unlock (lock);
714 #endif
716 return result;
720 /* Free the modules mentioned. */
721 static void
722 internal_function
723 free_modules_db (struct gconv_module *node)
725 if (node->left != NULL)
726 free_modules_db (node->left);
727 if (node->right != NULL)
728 free_modules_db (node->right);
729 if (node->same != NULL)
730 free_modules_db (node->same);
733 struct gconv_module *act = node;
734 node = node->matching;
735 if (act->module_name[0] == '/')
736 free (act);
738 while (node != NULL);
742 /* Free all resources if necessary. */
743 static void __attribute__ ((unused))
744 free_mem (void)
746 if (__gconv_alias_db != NULL)
747 __tdestroy (__gconv_alias_db, free);
749 if (__gconv_modules_db != NULL)
750 free_modules_db (__gconv_modules_db);
752 if (known_derivations != NULL)
753 __tdestroy (known_derivations, free_derivation);
756 text_set_element (__libc_subfreeres, free_mem);