Update.
[glibc.git] / iconv / gconv_db.c
blobe6253b838053bdaf86c1a195745fbafa4915cabc
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <search.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <bits/libc-lock.h>
26 #include <ldsodefs.h>
27 #include <gconv_int.h>
30 /* Simple data structure for alias mapping. We have two names, `from'
31 and `to'. */
32 void *__gconv_alias_db;
34 /* Array with available modules. */
35 size_t __gconv_nmodules;
36 struct gconv_module **__gconv_modules_db;
38 /* We modify global data. */
39 __libc_lock_define_initialized (static, lock)
42 /* Function for searching alias. */
43 int
44 __gconv_alias_compare (const void *p1, const void *p2)
46 struct gconv_alias *s1 = (struct gconv_alias *) p1;
47 struct gconv_alias *s2 = (struct gconv_alias *) p2;
48 return __strcasecmp (s1->fromname, s2->fromname);
52 /* To search for a derivation we create a list of intermediate steps.
53 Each element contains a pointer to the element which precedes it
54 in the derivation order. */
55 struct derivation_step
57 const char *result_set;
58 struct gconv_module *code;
59 struct derivation_step *last;
60 struct derivation_step *next;
63 #define NEW_STEP(result, module, last_mod) \
64 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
65 newp->result_set = result; \
66 newp->code = module; \
67 newp->last = last_mod; \
68 newp->next = NULL; \
69 newp; })
72 /* If a specific transformation is used more than once we should not need
73 to start looking for it again. Instead cache each successful result. */
74 struct known_derivation
76 const char *from;
77 const char *to;
78 struct gconv_step *steps;
79 size_t nsteps;
82 /* Compare function for database of found derivations. */
83 static int
84 derivation_compare (const void *p1, const void *p2)
86 struct known_derivation *s1 = (struct known_derivation *) p1;
87 struct known_derivation *s2 = (struct known_derivation *) p2;
88 int result;
90 result = strcmp (s1->from, s2->from);
91 if (result == 0)
92 result = strcmp (s1->to, s2->to);
93 return result;
96 /* The search tree for known derivations. */
97 static void *known_derivations;
99 /* Look up whether given transformation was already requested before. */
100 static int
101 internal_function
102 derivation_lookup (const char *fromset, const char *toset,
103 struct gconv_step **handle, size_t *nsteps)
105 struct known_derivation key = { fromset, toset, NULL, 0 };
106 struct known_derivation **result;
108 result = __tfind (&key, &known_derivations, derivation_compare);
110 if (result == NULL)
111 return GCONV_NOCONV;
113 *handle = (*result)->steps;
114 *nsteps = (*result)->nsteps;
116 /* Please note that we return GCONV_OK even if the last search for
117 this transformation was unsuccessful. */
118 return GCONV_OK;
121 /* Add new derivation to list of known ones. */
122 static void
123 internal_function
124 add_derivation (const char *fromset, const char *toset,
125 struct gconv_step *handle, size_t nsteps)
127 struct known_derivation *new_deriv;
128 size_t fromset_len = strlen (fromset) + 1;
129 size_t toset_len = strlen (toset) + 1;
131 new_deriv = (struct known_derivation *)
132 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
133 if (new_deriv != NULL)
135 new_deriv->from = memcpy (new_deriv + 1, fromset, fromset_len);
136 new_deriv->to = memcpy ((char *) new_deriv->from + fromset_len,
137 toset, toset_len);
139 new_deriv->steps = handle;
140 new_deriv->nsteps = nsteps;
142 __tsearch (new_deriv, &known_derivations, derivation_compare);
144 /* Please note that we don't complain if the allocation failed. This
145 is not tragically but in case we use the memory debugging facilities
146 not all memory will be freed. */
149 static void
150 free_derivation (void *p)
152 struct known_derivation *deriv = (struct known_derivation *) p;
153 size_t cnt;
155 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
156 if (deriv->steps[cnt].end_fct)
157 _CALL_DL_FCT (deriv->steps[cnt].end_fct, (&deriv->steps[cnt]));
159 free ((struct gconv_step *) deriv->steps);
160 free (deriv);
164 static int
165 internal_function
166 gen_steps (struct derivation_step *best, const char *toset,
167 const char *fromset, struct gconv_step **handle, size_t *nsteps)
169 size_t step_cnt = 0;
170 struct gconv_step *result;
171 struct derivation_step *current;
172 int status = GCONV_NOMEM;
174 /* First determine number of steps. */
175 for (current = best; current->last != NULL; current = current->last)
176 ++step_cnt;
178 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
179 * step_cnt);
180 if (result != NULL)
182 int failed = 0;
184 status = GCONV_OK;
185 *nsteps = step_cnt;
186 current = best;
187 while (step_cnt-- > 0)
189 result[step_cnt].from_name = (step_cnt == 0
190 ? __strdup (fromset)
191 : current->last->result_set);
192 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
193 ? __strdup (current->result_set)
194 : result[step_cnt + 1].from_name);
196 #ifndef STATIC_GCONV
197 if (current->code->module_name[0] == '/')
199 /* Load the module, return handle for it. */
200 struct gconv_loaded_object *shlib_handle =
201 __gconv_find_shlib (current->code->module_name);
203 if (shlib_handle == NULL)
205 failed = 1;
206 break;
209 result[step_cnt].shlib_handle = shlib_handle;
210 result[step_cnt].modname = shlib_handle->name;
211 result[step_cnt].counter = 0;
212 result[step_cnt].fct = shlib_handle->fct;
213 result[step_cnt].init_fct = shlib_handle->init_fct;
214 result[step_cnt].end_fct = shlib_handle->end_fct;
216 else
217 #endif
218 /* It's a builtin transformation. */
219 __gconv_get_builtin_trans (current->code->module_name,
220 &result[step_cnt]);
222 /* Call the init function. */
223 if (result[step_cnt].init_fct != NULL)
225 status = _CALL_DL_FCT (result[step_cnt].init_fct,
226 (&result[step_cnt]));
228 if (status != GCONV_OK)
230 failed = 1;
231 /* Make sure we unload this modules. */
232 --step_cnt;
233 break;
237 current = current->last;
240 if (failed != 0)
242 /* Something went wrong while initializing the modules. */
243 while (++step_cnt < *nsteps)
245 if (result[step_cnt].end_fct != NULL)
246 _CALL_DL_FCT (result[step_cnt].end_fct, (&result[step_cnt]));
247 #ifndef STATIC_GCONV
248 __gconv_release_shlib (result[step_cnt].shlib_handle);
249 #endif
251 free (result);
252 *nsteps = 0;
253 *handle = NULL;
254 if (status == GCONV_OK)
255 status = GCONV_NOCONV;
257 else
258 *handle = result;
260 else
262 *nsteps = 0;
263 *handle = NULL;
266 return status;
270 /* The main function: find a possible derivation from the `fromset' (either
271 the given name or the alias) to the `toset' (again with alias). */
272 static int
273 internal_function
274 find_derivation (const char *toset, const char *toset_expand,
275 const char *fromset, const char *fromset_expand,
276 struct gconv_step **handle, size_t *nsteps)
278 __libc_lock_define_initialized (static, lock)
279 struct derivation_step *first, *current, **lastp, *best = NULL;
280 int best_cost_hi = 0;
281 int best_cost_lo = 0;
282 int result;
284 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
285 handle, nsteps);
286 if (result == GCONV_OK)
287 return result;
289 __libc_lock_lock (lock);
291 /* There is a small chance that this derivation is meanwhile found. This
292 can happen if in `find_derivation' we look for this derivation, didn't
293 find it but at the same time another thread looked for this derivation. */
294 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
295 handle, nsteps);
296 if (result == GCONV_OK)
298 __libc_lock_unlock (lock);
299 return result;
302 /* ### TODO
303 For now we use a simple algorithm with quadratic runtime behaviour.
304 The task is to match the `toset' with any of the available rules,
305 starting from FROMSET. */
306 if (fromset_expand != NULL)
308 first = NEW_STEP (fromset_expand, NULL, NULL);
309 first->next = NEW_STEP (fromset, NULL, NULL);
310 lastp = &first->next->next;
312 else
314 first = NEW_STEP (fromset, NULL, NULL);
315 lastp = &first->next;
318 current = first;
319 while (current != NULL)
321 /* Now match all the available module specifications against the
322 current charset name. If any of them matches check whether
323 we already have a derivation for this charset. If yes, use the
324 one with the lower costs. Otherwise add the new charset at the
325 end. */
326 size_t cnt;
328 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
330 const char *result_set = NULL;
332 if (__gconv_modules_db[cnt]->from_pattern == NULL)
334 if (__strcasecmp (current->result_set,
335 __gconv_modules_db[cnt]->from_constpfx) == 0)
337 if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0)
338 result_set = toset_expand ?: toset;
339 else
340 result_set = __gconv_modules_db[cnt]->to_string;
343 else
344 /* We have a regular expression. First see if the prefix
345 matches. */
346 if (__strncasecmp (current->result_set,
347 __gconv_modules_db[cnt]->from_constpfx,
348 __gconv_modules_db[cnt]->from_constpfx_len)
349 == 0)
351 /* First compile the regex if not already done. */
352 if (__gconv_modules_db[cnt]->from_regex == NULL)
354 if (__regcomp (&__gconv_modules_db[cnt]->from_regex_mem,
355 __gconv_modules_db[cnt]->from_pattern,
356 REG_EXTENDED | REG_ICASE) != 0)
357 /* Something is wrong. Remember this. */
358 __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L;
359 else
360 __gconv_modules_db[cnt]->from_regex
361 = &__gconv_modules_db[cnt]->from_regex_mem;
364 if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L)
366 /* Try to match the from name. */
367 regmatch_t match[4];
369 if (__regexec (__gconv_modules_db[cnt]->from_regex,
370 current->result_set, 4, match, 0) == 0
371 && match[0].rm_so == 0
372 && current->result_set[match[0].rm_eo] == '\0')
374 /* At least the whole <from> string is matched.
375 We must now match sed-like possible
376 subexpressions from the match to the
377 toset expression. */
378 #define ENSURE_LEN(LEN) \
379 if (wp + (LEN) >= constr + len - 1) \
381 char *newp = alloca (len += 128); \
382 memcpy (newp, constr, wp - constr); \
383 wp = newp + (wp - constr); \
384 constr = newp; \
386 size_t len = 128;
387 char *constr = alloca (len);
388 char *wp = constr;
389 const char *cp = __gconv_modules_db[cnt]->to_string;
391 while (*cp != '\0')
393 if (*cp != '\\')
395 ENSURE_LEN (1);
396 *wp++ = *cp++;
398 else if (cp[1] == '\0')
399 /* Backslash at end of string. */
400 break;
401 else
403 ++cp;
404 if (*cp == '\\')
406 *wp++ = *cp++;
407 ENSURE_LEN (1);
409 else if (*cp < '1' || *cp > '3')
410 break;
411 else
413 int idx = *cp - '0';
414 if (match[idx].rm_so == -1)
415 /* No match. */
416 break;
418 ENSURE_LEN (match[idx].rm_eo
419 - match[idx].rm_so);
420 wp = __mempcpy (wp,
421 &current->result_set[match[idx].rm_so],
422 match[idx].rm_eo
423 - match[idx].rm_so);
424 ++cp;
428 if (*cp == '\0' && wp != constr)
430 /* Terminate the constructed string. */
431 *wp = '\0';
432 result_set = constr;
438 if (result_set != NULL)
440 /* We managed to find a derivation. First see whether
441 this is what we are looking for. */
442 if (__strcasecmp (result_set, toset) == 0
443 || (toset_expand != NULL
444 && __strcasecmp (result_set, toset_expand) == 0))
446 /* Determine the costs. If they are lower than the
447 previous solution (or this is the first solution)
448 remember this solution. */
449 int cost_hi = __gconv_modules_db[cnt]->cost_hi;
450 int cost_lo = __gconv_modules_db[cnt]->cost_lo;
451 struct derivation_step *runp = current;
452 while (runp->code != NULL)
454 cost_hi += runp->code->cost_hi;
455 cost_lo += runp->code->cost_lo;
456 runp = runp->last;
458 if (best == NULL || cost_hi < best_cost_hi
459 || (cost_hi == best_cost_hi && cost_lo < best_cost_lo))
461 best = NEW_STEP (result_set, __gconv_modules_db[cnt],
462 current);
463 best_cost_hi = cost_hi;
464 best_cost_lo = cost_lo;
467 else
469 /* Append at the end if there is no entry with this name. */
470 struct derivation_step *runp = first;
472 while (runp != NULL)
474 if (__strcasecmp (result_set, runp->result_set) == 0)
475 break;
476 runp = runp->next;
479 if (runp == NULL)
481 *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt],
482 current);
483 lastp = &(*lastp)->next;
489 /* Go on with the next entry. */
490 current = current->next;
493 if (best != NULL)
494 /* We really found a way to do the transformation. Now build a data
495 structure describing the transformation steps.*/
496 result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset,
497 handle, nsteps);
498 else
500 /* We haven't found a transformation. Clear the result values. */
501 *handle = NULL;
502 *nsteps = 0;
505 /* Add result in any case to list of known derivations. */
506 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
507 *handle, *nsteps);
509 __libc_lock_unlock (lock);
511 return result;
516 internal_function
517 __gconv_find_transform (const char *toset, const char *fromset,
518 struct gconv_step **handle, size_t *nsteps)
520 __libc_once_define (static, once);
521 const char *fromset_expand = NULL;
522 const char *toset_expand = NULL;
523 int result;
525 /* Ensure that the configuration data is read. */
526 __libc_once (once, __gconv_read_conf);
528 /* Acquire the lock. */
529 __libc_lock_lock (lock);
531 /* If we don't have a module database return with an error. */
532 if (__gconv_modules_db == NULL)
534 __libc_lock_unlock (lock);
535 return GCONV_NOCONV;
538 /* See whether the names are aliases. */
539 if (__gconv_alias_db != NULL)
541 struct gconv_alias key;
542 struct gconv_alias **found;
544 key.fromname = fromset;
545 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
546 fromset_expand = found != NULL ? (*found)->toname : NULL;
548 key.fromname = toset;
549 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
550 toset_expand = found != NULL ? (*found)->toname : NULL;
553 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
554 handle, nsteps);
556 #ifndef STATIC_GCONV
557 /* Increment the user counter. */
558 if (result == GCONV_OK)
560 size_t cnt = *nsteps;
561 struct gconv_step *steps = *handle;
564 if (steps[--cnt].counter++ == 0)
566 steps[cnt].shlib_handle =
567 __gconv_find_shlib (steps[cnt].modname);
568 if (steps[cnt].shlib_handle == NULL)
570 /* Oops, this is the second time we use this module (after
571 unloading) and this time loading failed!? */
572 while (++cnt < *nsteps)
573 __gconv_release_shlib (steps[cnt].shlib_handle);
574 result = GCONV_NOCONV;
575 break;
578 while (cnt > 0);
580 #endif
582 /* Release the lock. */
583 __libc_lock_unlock (lock);
585 /* The following code is necessary since `find_derivation' will return
586 GCONV_OK even when no derivation was found but the same request
587 was processed before. I.e., negative results will also be cached. */
588 return (result == GCONV_OK
589 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
590 : result);
594 /* Release the entries of the modules list. */
596 internal_function
597 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
599 int result = GCONV_OK;
601 #ifndef STATIC_GCONV
602 /* Acquire the lock. */
603 __libc_lock_lock (lock);
605 while (nsteps-- > 0)
606 if (steps[nsteps].shlib_handle != NULL
607 && --steps[nsteps].counter == 0)
609 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
610 if (result != GCONV_OK)
611 break;
612 steps[nsteps].shlib_handle = NULL;
615 /* Release the lock. */
616 __libc_lock_unlock (lock);
617 #endif
619 return result;
623 /* Free all resources if necessary. */
624 static void __attribute__ ((unused))
625 free_mem (void)
627 size_t cnt;
629 if (__gconv_alias_db != NULL)
630 __tdestroy (__gconv_alias_db, free);
632 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
633 /* Modules which names do not start with a slash are builtin
634 transformations and the memory is not allocated dynamically. */
635 if (__gconv_modules_db[cnt]->module_name[0] == '/')
636 free (__gconv_modules_db[cnt]);
638 if (known_derivations != NULL)
639 __tdestroy (known_derivations, free_derivation);
642 text_set_element (__libc_subfreeres, free_mem);