Update.
[glibc.git] / iconv / gconv_db.c
blobbe2b7fa233c0526147edbd0af08c8f764816c143
1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <search.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <bits/libc-lock.h>
26 #include <ldsodefs.h>
27 #include <gconv_int.h>
30 /* Simple data structure for alias mapping. We have two names, `from'
31 and `to'. */
32 void *__gconv_alias_db;
34 /* Array with available modules. */
35 size_t __gconv_nmodules;
36 struct gconv_module **__gconv_modules_db;
38 /* We modify global data. */
39 __libc_lock_define_initialized (static, lock)
42 /* Function for searching alias. */
43 int
44 __gconv_alias_compare (const void *p1, const void *p2)
46 struct gconv_alias *s1 = (struct gconv_alias *) p1;
47 struct gconv_alias *s2 = (struct gconv_alias *) p2;
48 return __strcasecmp (s1->fromname, s2->fromname);
52 /* To search for a derivation we create a list of intermediate steps.
53 Each element contains a pointer to the element which precedes it
54 in the derivation order. */
55 struct derivation_step
57 const char *result_set;
58 struct gconv_module *code;
59 struct derivation_step *last;
60 struct derivation_step *next;
63 #define NEW_STEP(result, module, last_mod) \
64 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
65 newp->result_set = result; \
66 newp->code = module; \
67 newp->last = last_mod; \
68 newp->next = NULL; \
69 newp; })
72 /* If a specific transformation is used more than once we should not need
73 to start looking for it again. Instead cache each successful result. */
74 struct known_derivation
76 const char *from;
77 const char *to;
78 struct gconv_step *steps;
79 size_t nsteps;
82 /* Compare function for database of found derivations. */
83 static int
84 derivation_compare (const void *p1, const void *p2)
86 struct known_derivation *s1 = (struct known_derivation *) p1;
87 struct known_derivation *s2 = (struct known_derivation *) p2;
88 int result;
90 result = strcmp (s1->from, s2->from);
91 if (result == 0)
92 result = strcmp (s1->to, s2->to);
93 return result;
96 /* The search tree for known derivations. */
97 static void *known_derivations;
99 /* Look up whether given transformation was already requested before. */
100 static int
101 internal_function
102 derivation_lookup (const char *fromset, const char *toset,
103 struct gconv_step **handle, size_t *nsteps)
105 struct known_derivation key = { fromset, toset, NULL, 0 };
106 struct known_derivation **result;
108 result = __tfind (&key, &known_derivations, derivation_compare);
110 if (result == NULL)
111 return GCONV_NOCONV;
113 *handle = (*result)->steps;
114 *nsteps = (*result)->nsteps;
116 /* Please note that we return GCONV_OK even if the last search for
117 this transformation was unsuccessful. */
118 return GCONV_OK;
121 /* Add new derivation to list of known ones. */
122 static void
123 internal_function
124 add_derivation (const char *fromset, const char *toset,
125 struct gconv_step *handle, size_t nsteps)
127 struct known_derivation *new_deriv;
128 size_t fromset_len = strlen (fromset) + 1;
129 size_t toset_len = strlen (toset) + 1;
131 new_deriv = (struct known_derivation *)
132 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
133 if (new_deriv != NULL)
135 new_deriv->from = memcpy (new_deriv + 1, fromset, fromset_len);
136 new_deriv->to = memcpy ((char *) new_deriv->from + fromset_len,
137 toset, toset_len);
139 new_deriv->steps = handle;
140 new_deriv->nsteps = nsteps;
142 __tsearch (new_deriv, &known_derivations, derivation_compare);
144 /* Please note that we don't complain if the allocation failed. This
145 is not tragically but in case we use the memory debugging facilities
146 not all memory will be freed. */
149 static void
150 free_derivation (void *p)
152 struct known_derivation *deriv = (struct known_derivation *) p;
153 size_t cnt;
155 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
156 if (deriv->steps[cnt].end_fct)
157 _CALL_DL_FCT (deriv->steps[cnt].end_fct, (&deriv->steps[cnt]));
159 free ((struct gconv_step *) deriv->steps);
160 free (deriv);
164 static int
165 internal_function
166 gen_steps (struct derivation_step *best, const char *toset,
167 const char *fromset, struct gconv_step **handle, size_t *nsteps)
169 size_t step_cnt = 0;
170 struct gconv_step *result;
171 struct derivation_step *current;
172 int status = GCONV_NOMEM;
174 /* First determine number of steps. */
175 for (current = best; current->last != NULL; current = current->last)
176 ++step_cnt;
178 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
179 * step_cnt);
180 if (result != NULL)
182 int failed = 0;
184 *nsteps = step_cnt;
185 current = best;
186 while (step_cnt-- > 0)
188 result[step_cnt].from_name = (step_cnt == 0
189 ? __strdup (fromset)
190 : current->last->result_set);
191 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
192 ? __strdup (current->result_set)
193 : result[step_cnt + 1].from_name);
195 #ifndef STATIC_GCONV
196 if (current->code->module_name[0] == '/')
198 /* Load the module, return handle for it. */
199 struct gconv_loaded_object *shlib_handle =
200 __gconv_find_shlib (current->code->module_name);
202 if (shlib_handle == NULL)
204 failed = 1;
205 break;
208 result[step_cnt].shlib_handle = shlib_handle;
209 result[step_cnt].modname = shlib_handle->name;
210 result[step_cnt].counter = 0;
211 result[step_cnt].fct = shlib_handle->fct;
212 result[step_cnt].init_fct = shlib_handle->init_fct;
213 result[step_cnt].end_fct = shlib_handle->end_fct;
215 else
216 #endif
217 /* It's a builtin transformation. */
218 __gconv_get_builtin_trans (current->code->module_name,
219 &result[step_cnt]);
221 /* Call the init function. */
222 if (result[step_cnt].init_fct != NULL)
223 _CALL_DL_FCT (result[step_cnt].init_fct, (&result[step_cnt]));
225 current = current->last;
228 if (failed != 0)
230 /* Something went wrong while initializing the modules. */
231 while (++step_cnt < *nsteps)
233 if (result[step_cnt].end_fct != NULL)
234 _CALL_DL_FCT (result[step_cnt].end_fct, (&result[step_cnt]));
235 #ifndef STATIC_GCONV
236 __gconv_release_shlib (result[step_cnt].shlib_handle);
237 #endif
239 free (result);
240 *nsteps = 0;
241 *handle = NULL;
242 status = GCONV_NOCONV;
244 else
246 *handle = result;
247 status = GCONV_OK;
250 else
252 *nsteps = 0;
253 *handle = NULL;
256 return status;
260 /* The main function: find a possible derivation from the `fromset' (either
261 the given name or the alias) to the `toset' (again with alias). */
262 static int
263 internal_function
264 find_derivation (const char *toset, const char *toset_expand,
265 const char *fromset, const char *fromset_expand,
266 struct gconv_step **handle, size_t *nsteps)
268 __libc_lock_define_initialized (static, lock)
269 struct derivation_step *first, *current, **lastp, *best = NULL;
270 int best_cost_hi = 0;
271 int best_cost_lo = 0;
272 int result;
274 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
275 handle, nsteps);
276 if (result == GCONV_OK)
277 return result;
279 __libc_lock_lock (lock);
281 /* There is a small chance that this derivation is meanwhile found. This
282 can happen if in `find_derivation' we look for this derivation, didn't
283 find it but at the same time another thread looked for this derivation. */
284 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
285 handle, nsteps);
286 if (result == GCONV_OK)
287 return result;
289 /* ### TODO
290 For now we use a simple algorithm with quadratic runtime behaviour.
291 The task is to match the `toset' with any of the available rules,
292 starting from FROMSET. */
293 if (fromset_expand != NULL)
295 first = NEW_STEP (fromset_expand, NULL, NULL);
296 first->next = NEW_STEP (fromset, NULL, NULL);
297 lastp = &first->next->next;
299 else
301 first = NEW_STEP (fromset, NULL, NULL);
302 lastp = &first->next;
305 current = first;
306 while (current != NULL)
308 /* Now match all the available module specifications against the
309 current charset name. If any of them matches check whether
310 we already have a derivation for this charset. If yes, use the
311 one with the lower costs. Otherwise add the new charset at the
312 end. */
313 size_t cnt;
315 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
317 const char *result_set = NULL;
319 if (__gconv_modules_db[cnt]->from_pattern == NULL)
321 if (__strcasecmp (current->result_set,
322 __gconv_modules_db[cnt]->from_constpfx) == 0)
324 if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0)
325 result_set = toset_expand ?: toset;
326 else
327 result_set = __gconv_modules_db[cnt]->to_string;
330 else
331 /* We have a regular expression. First see if the prefix
332 matches. */
333 if (__strncasecmp (current->result_set,
334 __gconv_modules_db[cnt]->from_constpfx,
335 __gconv_modules_db[cnt]->from_constpfx_len)
336 == 0)
338 /* First compile the regex if not already done. */
339 if (__gconv_modules_db[cnt]->from_regex == NULL)
341 regex_t *newp = (regex_t *) malloc (sizeof (regex_t));
343 if (__regcomp (newp, __gconv_modules_db[cnt]->from_pattern,
344 REG_EXTENDED | REG_ICASE) != 0)
346 /* Something is wrong. Remember this. */
347 free (newp);
348 __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L;
350 else
351 __gconv_modules_db[cnt]->from_regex = newp;
354 if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L)
356 /* Try to match the from name. */
357 regmatch_t match[4];
359 if (__regexec (__gconv_modules_db[cnt]->from_regex,
360 current->result_set, 4, match, 0) == 0
361 && match[0].rm_so == 0
362 && current->result_set[match[0].rm_eo] == '\0')
364 /* At least the whole <from> string is matched.
365 We must now match sed-like possible
366 subexpressions from the match to the
367 toset expression. */
368 #define ENSURE_LEN(LEN) \
369 if (wp + (LEN) >= constr + len - 1) \
371 char *newp = alloca (len += 128); \
372 memcpy (newp, constr, wp - constr); \
373 wp = newp + (wp - constr); \
374 constr = newp; \
376 size_t len = 128;
377 char *constr = alloca (len);
378 char *wp = constr;
379 const char *cp = __gconv_modules_db[cnt]->to_string;
381 while (*cp != '\0')
383 if (*cp != '\\')
385 ENSURE_LEN (1);
386 *wp++ = *cp++;
388 else if (cp[1] == '\0')
389 /* Backslash at end of string. */
390 break;
391 else
393 ++cp;
394 if (*cp == '\\')
396 *wp++ = *cp++;
397 ENSURE_LEN (1);
399 else if (*cp < '1' || *cp > '3')
400 break;
401 else
403 int idx = *cp - '0';
404 if (match[idx].rm_so == -1)
405 /* No match. */
406 break;
408 ENSURE_LEN (match[idx].rm_eo
409 - match[idx].rm_so);
410 wp = __mempcpy (wp,
411 &current->result_set[match[idx].rm_so],
412 match[idx].rm_eo
413 - match[idx].rm_so);
414 ++cp;
418 if (*cp == '\0' && wp != constr)
420 /* Terminate the constructed string. */
421 *wp = '\0';
422 result_set = constr;
428 if (result_set != NULL)
430 /* We managed to find a derivation. First see whether
431 this is what we are looking for. */
432 if (__strcasecmp (result_set, toset) == 0
433 || (toset_expand != NULL
434 && __strcasecmp (result_set, toset_expand) == 0))
436 /* Determine the costs. If they are lower than the
437 previous solution (or this is the first solution)
438 remember this solution. */
439 int cost_hi = __gconv_modules_db[cnt]->cost_hi;
440 int cost_lo = __gconv_modules_db[cnt]->cost_lo;
441 struct derivation_step *runp = current;
442 while (runp->code != NULL)
444 cost_hi += runp->code->cost_hi;
445 cost_lo += runp->code->cost_lo;
446 runp = runp->last;
448 if (best == NULL || cost_hi < best_cost_hi
449 || (cost_hi == best_cost_hi && cost_lo < best_cost_lo))
451 best = NEW_STEP (result_set, __gconv_modules_db[cnt],
452 current);
453 best_cost_hi = cost_hi;
454 best_cost_lo = cost_lo;
457 else
459 /* Append at the end if there is no entry with this name. */
460 struct derivation_step *runp = first;
462 while (runp != NULL)
464 if (__strcasecmp (result_set, runp->result_set) == 0)
465 break;
466 runp = runp->next;
469 if (runp == NULL)
471 *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt],
472 current);
473 lastp = &(*lastp)->next;
479 /* Go on with the next entry. */
480 current = current->next;
483 if (best != NULL)
484 /* We really found a way to do the transformation. Now build a data
485 structure describing the transformation steps.*/
486 result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset,
487 handle, nsteps);
488 else
490 /* We haven't found a transformation. Clear the result values. */
491 *handle = NULL;
492 *nsteps = 0;
495 /* Add result in any case to list of known derivations. */
496 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
497 *handle, *nsteps);
499 __libc_lock_unlock (lock);
501 return result;
506 internal_function
507 __gconv_find_transform (const char *toset, const char *fromset,
508 struct gconv_step **handle, size_t *nsteps)
510 __libc_once_define (static, once);
511 const char *fromset_expand = NULL;
512 const char *toset_expand = NULL;
513 int result;
515 /* Ensure that the configuration data is read. */
516 __libc_once (once, __gconv_read_conf);
518 /* Acquire the lock. */
519 __libc_lock_lock (lock);
521 /* If we don't have a module database return with an error. */
522 if (__gconv_modules_db == NULL)
523 return GCONV_NOCONV;
525 /* See whether the names are aliases. */
526 if (__gconv_alias_db != NULL)
528 struct gconv_alias key;
529 struct gconv_alias **found;
531 key.fromname = fromset;
532 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
533 fromset_expand = found != NULL ? (*found)->toname : NULL;
535 key.fromname = toset;
536 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
537 toset_expand = found != NULL ? (*found)->toname : NULL;
540 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
541 handle, nsteps);
543 #ifndef STATIC_GCONV
544 /* Increment the user counter. */
545 if (result == GCONV_OK)
547 size_t cnt = *nsteps;
548 struct gconv_step *steps = *handle;
551 if (steps[--cnt].counter++ == 0)
553 steps[cnt].shlib_handle =
554 __gconv_find_shlib (steps[cnt].modname);
555 if (steps[cnt].shlib_handle == NULL)
557 /* Oops, this is the second time we use this module (after
558 unloading) and this time loading failed!? */
559 while (++cnt < *nsteps)
560 __gconv_release_shlib (steps[cnt].shlib_handle);
561 result = GCONV_NOCONV;
562 break;
565 while (cnt > 0);
567 #endif
569 /* Release the lock. */
570 __libc_lock_unlock (lock);
572 /* The following code is necessary since `find_derivation' will return
573 GCONV_OK even when no derivation was found but the same request
574 was processed before. I.e., negative results will also be cached. */
575 return (result == GCONV_OK
576 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
577 : result);
581 /* Release the entries of the modules list. */
583 internal_function
584 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
586 int result = GCONV_OK;
588 #ifndef STATIC_GCONV
589 /* Acquire the lock. */
590 __libc_lock_lock (lock);
592 while (nsteps-- > 0)
593 if (steps[nsteps].shlib_handle != NULL
594 && --steps[nsteps].counter == 0)
596 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
597 if (result != GCONV_OK)
598 break;
599 steps[nsteps].shlib_handle = NULL;
602 /* Release the lock. */
603 __libc_lock_unlock (lock);
604 #endif
606 return result;
610 /* Free all resources if necessary. */
611 static void __attribute__ ((unused))
612 free_mem (void)
614 size_t cnt;
616 if (__gconv_alias_db != NULL)
617 __tdestroy (__gconv_alias_db, free);
619 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
621 if (__gconv_modules_db[cnt]->from_regex != NULL)
622 __regfree ((regex_t *) __gconv_modules_db[cnt]->from_regex);
624 /* Modules which names do not start with a slash are builtin
625 transformations and the memory is not allocated dynamically. */
626 if (__gconv_modules_db[cnt]->module_name[0] == '/')
627 free (__gconv_modules_db[cnt]);
630 if (known_derivations != NULL)
631 __tdestroy (known_derivations, free_derivation);
634 text_set_element (__libc_subfreeres, free_mem);