1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
25 #include <sys/param.h>
26 #include <bits/libc-lock.h>
29 #include <gconv_int.h>
32 /* Simple data structure for alias mapping. We have two names, `from'
34 void *__gconv_alias_db
;
36 /* Array with available modules. */
37 struct gconv_module
*__gconv_modules_db
;
39 /* We modify global data. */
40 __libc_lock_define_initialized (static, lock
)
43 /* Function for searching alias. */
45 __gconv_alias_compare (const void *p1
, const void *p2
)
47 struct gconv_alias
*s1
= (struct gconv_alias
*) p1
;
48 struct gconv_alias
*s2
= (struct gconv_alias
*) p2
;
49 return __strcasecmp (s1
->fromname
, s2
->fromname
);
53 /* To search for a derivation we create a list of intermediate steps.
54 Each element contains a pointer to the element which precedes it
55 in the derivation order. */
56 struct derivation_step
58 const char *result_set
;
59 size_t result_set_len
;
62 struct gconv_module
*code
;
63 struct derivation_step
*last
;
64 struct derivation_step
*next
;
67 #define NEW_STEP(result, hi, lo, module, last_mod) \
68 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
69 newp->result_set = result; \
70 newp->result_set_len = strlen (result); \
73 newp->code = module; \
74 newp->last = last_mod; \
79 /* If a specific transformation is used more than once we should not need
80 to start looking for it again. Instead cache each successful result. */
81 struct known_derivation
85 struct gconv_step
*steps
;
89 /* Compare function for database of found derivations. */
91 derivation_compare (const void *p1
, const void *p2
)
93 struct known_derivation
*s1
= (struct known_derivation
*) p1
;
94 struct known_derivation
*s2
= (struct known_derivation
*) p2
;
97 result
= strcmp (s1
->from
, s2
->from
);
99 result
= strcmp (s1
->to
, s2
->to
);
103 /* The search tree for known derivations. */
104 static void *known_derivations
;
106 /* Look up whether given transformation was already requested before. */
109 derivation_lookup (const char *fromset
, const char *toset
,
110 struct gconv_step
**handle
, size_t *nsteps
)
112 struct known_derivation key
= { fromset
, toset
, NULL
, 0 };
113 struct known_derivation
**result
;
115 result
= __tfind (&key
, &known_derivations
, derivation_compare
);
120 *handle
= (*result
)->steps
;
121 *nsteps
= (*result
)->nsteps
;
123 /* Please note that we return GCONV_OK even if the last search for
124 this transformation was unsuccessful. */
128 /* Add new derivation to list of known ones. */
131 add_derivation (const char *fromset
, const char *toset
,
132 struct gconv_step
*handle
, size_t nsteps
)
134 struct known_derivation
*new_deriv
;
135 size_t fromset_len
= strlen (fromset
) + 1;
136 size_t toset_len
= strlen (toset
) + 1;
138 new_deriv
= (struct known_derivation
*)
139 malloc (sizeof (struct known_derivation
) + fromset_len
+ toset_len
);
140 if (new_deriv
!= NULL
)
142 new_deriv
->from
= memcpy (new_deriv
+ 1, fromset
, fromset_len
);
143 new_deriv
->to
= memcpy ((char *) new_deriv
->from
+ fromset_len
,
146 new_deriv
->steps
= handle
;
147 new_deriv
->nsteps
= nsteps
;
149 __tsearch (new_deriv
, &known_derivations
, derivation_compare
);
151 /* Please note that we don't complain if the allocation failed. This
152 is not tragically but in case we use the memory debugging facilities
153 not all memory will be freed. */
157 free_derivation (void *p
)
159 struct known_derivation
*deriv
= (struct known_derivation
*) p
;
162 for (cnt
= 0; cnt
< deriv
->nsteps
; ++cnt
)
163 if (deriv
->steps
[cnt
].end_fct
)
164 _CALL_DL_FCT (deriv
->steps
[cnt
].end_fct
, (&deriv
->steps
[cnt
]));
166 free ((struct gconv_step
*) deriv
->steps
);
173 gen_steps (struct derivation_step
*best
, const char *toset
,
174 const char *fromset
, struct gconv_step
**handle
, size_t *nsteps
)
177 struct gconv_step
*result
;
178 struct derivation_step
*current
;
179 int status
= GCONV_NOMEM
;
181 /* First determine number of steps. */
182 for (current
= best
; current
->last
!= NULL
; current
= current
->last
)
185 result
= (struct gconv_step
*) malloc (sizeof (struct gconv_step
)
194 while (step_cnt
-- > 0)
196 result
[step_cnt
].from_name
= (step_cnt
== 0
198 : current
->last
->result_set
);
199 result
[step_cnt
].to_name
= (step_cnt
+ 1 == *nsteps
200 ? __strdup (current
->result_set
)
201 : result
[step_cnt
+ 1].from_name
);
204 if (current
->code
->module_name
[0] == '/')
206 /* Load the module, return handle for it. */
207 struct gconv_loaded_object
*shlib_handle
=
208 __gconv_find_shlib (current
->code
->module_name
);
210 if (shlib_handle
== NULL
)
216 result
[step_cnt
].shlib_handle
= shlib_handle
;
217 result
[step_cnt
].modname
= shlib_handle
->name
;
218 result
[step_cnt
].counter
= 0;
219 result
[step_cnt
].fct
= shlib_handle
->fct
;
220 result
[step_cnt
].init_fct
= shlib_handle
->init_fct
;
221 result
[step_cnt
].end_fct
= shlib_handle
->end_fct
;
225 /* It's a builtin transformation. */
226 __gconv_get_builtin_trans (current
->code
->module_name
,
229 /* Call the init function. */
230 if (result
[step_cnt
].init_fct
!= NULL
)
232 status
= _CALL_DL_FCT (result
[step_cnt
].init_fct
,
233 (&result
[step_cnt
]));
235 if (status
!= GCONV_OK
)
238 /* Make sure we unload this modules. */
244 current
= current
->last
;
249 /* Something went wrong while initializing the modules. */
250 while (++step_cnt
< *nsteps
)
252 if (result
[step_cnt
].end_fct
!= NULL
)
253 _CALL_DL_FCT (result
[step_cnt
].end_fct
, (&result
[step_cnt
]));
255 __gconv_release_shlib (result
[step_cnt
].shlib_handle
);
261 if (status
== GCONV_OK
)
262 status
= GCONV_NOCONV
;
277 /* The main function: find a possible derivation from the `fromset' (either
278 the given name or the alias) to the `toset' (again with alias). */
281 find_derivation (const char *toset
, const char *toset_expand
,
282 const char *fromset
, const char *fromset_expand
,
283 struct gconv_step
**handle
, size_t *nsteps
)
285 __libc_lock_define_initialized (static, lock
)
286 struct derivation_step
*first
, *current
, **lastp
, *solution
= NULL
;
287 int best_cost_hi
= INT_MAX
;
288 int best_cost_lo
= INT_MAX
;
291 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
293 if (result
== GCONV_OK
)
296 __libc_lock_lock (lock
);
298 /* There is a small chance that this derivation is meanwhile found. This
299 can happen if in `find_derivation' we look for this derivation, didn't
300 find it but at the same time another thread looked for this derivation. */
301 result
= derivation_lookup (fromset_expand
?: fromset
, toset_expand
?: toset
,
303 if (result
== GCONV_OK
)
305 __libc_lock_unlock (lock
);
309 /* For now we use a simple algorithm with quadratic runtime behaviour.
310 The task is to match the `toset' with any of the available rules,
311 starting from FROMSET. */
312 if (fromset_expand
!= NULL
)
314 first
= NEW_STEP (fromset_expand
, 0, 0, NULL
, NULL
);
315 first
->next
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
316 lastp
= &first
->next
->next
;
320 first
= NEW_STEP (fromset
, 0, 0, NULL
, NULL
);
321 lastp
= &first
->next
;
324 for (current
= first
; current
!= NULL
; current
= current
->next
)
326 /* Now match all the available module specifications against the
327 current charset name. If any of them matches check whether
328 we already have a derivation for this charset. If yes, use the
329 one with the lower costs. Otherwise add the new charset at the
332 The module database is organized in a tree form which allows to
333 search for prefixes. So we search for the first entry with a
334 matching prefix and any other matching entry can be found from
336 struct gconv_module
*node
= __gconv_modules_db
;
338 /* Maybe it is not necessary anymore to look for a solution for
339 this entry since the cost is already as high (or heigher) as
340 the cost for the best solution so far. */
341 if (current
->cost_hi
> best_cost_hi
342 || (current
->cost_hi
== best_cost_hi
343 && current
->cost_lo
>= best_cost_lo
))
348 int cmpres
= strncmp (current
->result_set
, node
->from_constpfx
,
349 MIN (current
->result_set_len
,
350 node
->from_constpfx_len
));
354 /* Walk through the list of modules with this prefix and
355 try to match the name. */
356 struct gconv_module
*runp
;
358 if (current
->result_set_len
< node
->from_constpfx_len
)
359 /* Cannot possibly match. */
362 /* Check all the modules with this prefix. */
366 const char *result_set
= NULL
;
368 if (runp
->from_pattern
== NULL
)
370 /* This is a simple entry and therefore we have a
371 found an matching entry if the strings are really
373 if (current
->result_set_len
== runp
->from_constpfx_len
)
375 if (strcmp (runp
->to_string
, "-") == 0)
376 result_set
= toset_expand
?: toset
;
378 result_set
= runp
->to_string
;
383 /* Compile the regular expression if necessary. */
384 if (runp
->from_regex
== NULL
)
386 if (__regcomp (&runp
->from_regex_mem
,
388 REG_EXTENDED
| REG_ICASE
) != 0)
389 /* Something is wrong. Remember this. */
390 runp
->from_regex
= (regex_t
*) -1L;
392 runp
->from_regex
= &runp
->from_regex_mem
;
395 if (runp
->from_regex
!= (regex_t
*) -1L)
399 /* Try to match the regular expression. */
400 if (__regexec (runp
->from_regex
, current
->result_set
,
402 && match
[0].rm_so
== 0
403 && current
->result_set
[match
[0].rm_eo
] == '\0')
405 /* At least the whole <from> string is matched.
406 We must now match sed-like possible
407 subexpressions from the match to the
409 #define ENSURE_LEN(LEN) \
410 if (wp + (LEN) >= constr + len - 1) \
412 char *newp = alloca (len += 128); \
413 wp = __mempcpy (newp, constr, wp - constr); \
417 char *constr
= alloca (len
);
419 const char *cp
= runp
->to_string
;
428 else if (cp
[1] == '\0')
429 /* Backslash at end of string. */
439 else if (*cp
< '1' || *cp
> '3')
444 if (match
[idx
].rm_so
== -1)
448 ENSURE_LEN (match
[idx
].rm_eo
451 ¤t
->result_set
[match
[idx
].rm_so
],
458 if (*cp
== '\0' && wp
!= constr
)
460 /* Terminate the constructed string. */
468 if (result_set
!= NULL
)
470 int cost_hi
= runp
->cost_hi
+ current
->cost_hi
;
471 int cost_lo
= runp
->cost_lo
+ current
->cost_lo
;
472 struct derivation_step
*step
;
474 /* We managed to find a derivation. First see whether
475 this is what we are looking for. */
476 if (__strcasecmp (result_set
, toset
) == 0
477 || (toset_expand
!= NULL
478 && __strcasecmp (result_set
, toset_expand
) == 0))
480 if (solution
== NULL
|| cost_hi
< best_cost_hi
481 || (cost_hi
== best_cost_hi
482 && cost_lo
< best_cost_lo
))
484 best_cost_hi
= cost_hi
;
485 best_cost_lo
= cost_lo
;
488 /* Append this solution to list. */
489 if (solution
== NULL
)
490 solution
= NEW_STEP (result_set
, 0, 0, runp
,
494 while (solution
->next
!= NULL
)
495 solution
= solution
->next
;
497 solution
->next
= NEW_STEP (result_set
, 0, 0,
501 else if (cost_hi
< best_cost_hi
502 || (cost_hi
== best_cost_hi
503 && cost_lo
< best_cost_lo
))
505 /* Append at the end if there is no entry with
507 for (step
= first
; step
!= NULL
; step
= step
->next
)
508 if (__strcasecmp (result_set
, step
->result_set
)
514 *lastp
= NEW_STEP (result_set
,
517 lastp
= &(*lastp
)->next
;
519 else if (step
->cost_hi
> cost_hi
520 || (step
->cost_hi
== cost_hi
521 && step
->cost_lo
> cost_lo
))
524 step
->last
= current
;
526 /* Update the cost for all steps. */
527 for (step
= first
; step
!= NULL
;
530 struct derivation_step
*back
;
532 if (step
->code
== NULL
)
533 /* This is one of the entries we started
537 step
->cost_hi
= step
->code
->cost_hi
;
538 step
->cost_lo
= step
->code
->cost_lo
;
540 for (back
= step
->last
; back
->code
!= NULL
;
543 step
->cost_hi
+= back
->code
->cost_hi
;
544 step
->cost_lo
+= back
->code
->cost_lo
;
548 for (step
= solution
; step
!= NULL
;
551 step
->cost_hi
= (step
->code
->cost_hi
552 + step
->last
->cost_hi
);
553 step
->cost_lo
= (step
->code
->cost_lo
554 + step
->last
->cost_lo
);
556 if (step
->cost_hi
< best_cost_hi
557 || (step
->cost_hi
== best_cost_hi
558 && step
->cost_lo
< best_cost_lo
))
561 best_cost_hi
= step
->cost_hi
;
562 best_cost_lo
= step
->cost_lo
;
571 while (runp
!= NULL
);
573 if (current
->result_set_len
== node
->from_constpfx_len
)
576 node
= node
->matching
;
585 if (solution
!= NULL
)
586 /* We really found a way to do the transformation. Now build a data
587 structure describing the transformation steps.*/
588 result
= gen_steps (solution
, toset_expand
?: toset
,
589 fromset_expand
?: fromset
, handle
, nsteps
);
592 /* We haven't found a transformation. Clear the result values. */
597 /* Add result in any case to list of known derivations. */
598 add_derivation (fromset_expand
?: fromset
, toset_expand
?: toset
,
601 __libc_lock_unlock (lock
);
609 __gconv_find_transform (const char *toset
, const char *fromset
,
610 struct gconv_step
**handle
, size_t *nsteps
)
612 __libc_once_define (static, once
);
613 const char *fromset_expand
= NULL
;
614 const char *toset_expand
= NULL
;
617 /* Ensure that the configuration data is read. */
618 __libc_once (once
, __gconv_read_conf
);
620 /* Acquire the lock. */
621 __libc_lock_lock (lock
);
623 /* If we don't have a module database return with an error. */
624 if (__gconv_modules_db
== NULL
)
626 __libc_lock_unlock (lock
);
630 /* See whether the names are aliases. */
631 if (__gconv_alias_db
!= NULL
)
633 struct gconv_alias key
;
634 struct gconv_alias
**found
;
636 key
.fromname
= fromset
;
637 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
638 fromset_expand
= found
!= NULL
? (*found
)->toname
: NULL
;
640 key
.fromname
= toset
;
641 found
= __tfind (&key
, &__gconv_alias_db
, __gconv_alias_compare
);
642 toset_expand
= found
!= NULL
? (*found
)->toname
: NULL
;
645 result
= find_derivation (toset
, toset_expand
, fromset
, fromset_expand
,
649 /* Increment the user counter. */
650 if (result
== GCONV_OK
)
652 size_t cnt
= *nsteps
;
653 struct gconv_step
*steps
= *handle
;
656 if (steps
[--cnt
].counter
++ == 0)
658 steps
[cnt
].shlib_handle
=
659 __gconv_find_shlib (steps
[cnt
].modname
);
660 if (steps
[cnt
].shlib_handle
== NULL
)
662 /* Oops, this is the second time we use this module (after
663 unloading) and this time loading failed!? */
664 while (++cnt
< *nsteps
)
665 __gconv_release_shlib (steps
[cnt
].shlib_handle
);
666 result
= GCONV_NOCONV
;
674 /* Release the lock. */
675 __libc_lock_unlock (lock
);
677 /* The following code is necessary since `find_derivation' will return
678 GCONV_OK even when no derivation was found but the same request
679 was processed before. I.e., negative results will also be cached. */
680 return (result
== GCONV_OK
681 ? (*handle
== NULL
? GCONV_NOCONV
: GCONV_OK
)
686 /* Release the entries of the modules list. */
689 __gconv_close_transform (struct gconv_step
*steps
, size_t nsteps
)
691 int result
= GCONV_OK
;
694 /* Acquire the lock. */
695 __libc_lock_lock (lock
);
698 if (steps
[nsteps
].shlib_handle
!= NULL
699 && --steps
[nsteps
].counter
== 0)
701 result
= __gconv_release_shlib (steps
[nsteps
].shlib_handle
);
702 if (result
!= GCONV_OK
)
704 steps
[nsteps
].shlib_handle
= NULL
;
707 /* Release the lock. */
708 __libc_lock_unlock (lock
);
715 /* Free the modules mentioned. */
718 free_modules_db (struct gconv_module
*node
)
720 if (node
->left
!= NULL
)
721 free_modules_db (node
->left
);
722 if (node
->right
!= NULL
)
723 free_modules_db (node
->right
);
724 if (node
->same
!= NULL
)
725 free_modules_db (node
->same
);
728 struct gconv_module
*act
= node
;
729 node
= node
->matching
;
732 while (node
!= NULL
);
736 /* Free all resources if necessary. */
737 static void __attribute__ ((unused
))
740 if (__gconv_alias_db
!= NULL
)
741 __tdestroy (__gconv_alias_db
, free
);
743 free_modules_db (__gconv_modules_db
);
745 if (known_derivations
!= NULL
)
746 __tdestroy (known_derivations
, free_derivation
);
749 text_set_element (__libc_subfreeres
, free_mem
);