Add some more ulps.
[glibc/pb-stable.git] / iconv / gconv_trans.c
blob4a42a35afd0801d2355d187242adb54829db2295
1 /* Transliteration using the locale's data.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <assert.h>
22 #include <dlfcn.h>
23 #include <search.h>
24 #include <stdint.h>
25 #include <string.h>
26 #include <stdlib.h>
28 #include <bits/libc-lock.h>
29 #include "gconv_int.h"
30 #include "../locale/localeinfo.h"
33 int
34 __gconv_transliterate (struct __gconv_step *step,
35 struct __gconv_step_data *step_data,
36 void *trans_data __attribute__ ((unused)),
37 const unsigned char *inbufstart,
38 const unsigned char **inbufp,
39 const unsigned char *inbufend,
40 unsigned char **outbufstart, size_t *irreversible)
42 /* Find out about the locale's transliteration. */
43 uint_fast32_t size;
44 uint32_t *from_idx;
45 uint32_t *from_tbl;
46 uint32_t *to_idx;
47 uint32_t *to_tbl;
48 uint32_t *winbuf;
49 uint32_t *winbufend;
50 uint_fast32_t low;
51 uint_fast32_t high;
53 /* The input buffer. There are actually 4-byte values. */
54 winbuf = (uint32_t *) *inbufp;
55 winbufend = (uint32_t *) inbufend;
57 /* If there is no transliteration information in the locale don't do
58 anything and return the error. */
59 size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
60 if (size == 0)
61 goto no_rules;
63 /* Get the rest of the values. */
64 from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
65 from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
66 to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
67 to_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
69 /* Test whether there is enough input. */
70 if (winbuf + 1 > winbufend)
71 return (winbuf == winbufend
72 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
74 /* The array starting at FROM_IDX contains indeces to the string table
75 in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
76 are doing binary search. */
77 low = 0;
78 high = size;
79 while (low < high)
81 uint_fast32_t med = (low + high) / 2;
82 uint32_t idx;
83 int cnt;
85 /* Compare the string at this index with the string at the current
86 position in the input buffer. */
87 idx = from_idx[med];
88 cnt = 0;
91 if (from_tbl[idx + cnt] != winbuf[cnt])
92 /* Does not match. */
93 break;
94 ++cnt;
96 while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
98 if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
100 /* Found a matching input sequence. Now try to convert the
101 possible replacements. */
102 uint32_t idx2 = to_idx[med];
106 /* Determine length of replacement. */
107 uint_fast32_t len = 0;
108 int res;
109 const unsigned char *toinptr;
111 while (to_tbl[idx2 + len] != L'\0')
112 ++len;
114 /* Try this input text. */
115 toinptr = (const unsigned char *) &to_tbl[idx2];
116 res = DL_CALL_FCT (step->__fct,
117 (step, step_data, &toinptr,
118 (const unsigned char *) &to_tbl[idx2 + len],
119 (unsigned char **) outbufstart,
120 NULL, 0, 0));
121 if (res != __GCONV_ILLEGAL_INPUT)
123 /* If the conversion succeeds we have to increment the
124 input buffer. */
125 if (res == __GCONV_EMPTY_INPUT)
127 *inbufp += cnt * sizeof (uint32_t);
128 ++*irreversible;
129 res = __GCONV_OK;
132 return res;
135 /* Next replacement. */
136 idx2 += len + 1;
138 while (to_tbl[idx2] != L'\0');
140 /* Nothing found, continue searching. */
142 else if (cnt > 0)
143 /* This means that the input buffer contents matches a prefix of
144 an entry. Since we cannot match it unless we get more input,
145 we will tell the caller about it. */
146 return __GCONV_INCOMPLETE_INPUT;
148 if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
149 low = med + 1;
150 else
151 high = med;
154 no_rules:
155 /* Maybe the character is supposed to be ignored. */
156 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
158 int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
159 uint32_t *ranges = (uint32_t *) _NL_CURRENT (LC_CTYPE,
160 _NL_CTYPE_TRANSLIT_IGNORE);
161 uint32_t wc = *(uint32_t *) (*inbufp);
162 int i;
164 /* Test whether there is enough input. */
165 if (winbuf + 1 > winbufend)
166 return (winbuf == winbufend
167 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
169 for (i = 0; i < n; ranges += 3, ++i)
170 if (ranges[0] <= wc && wc <= ranges[1]
171 && (wc - ranges[0]) % ranges[2] == 0)
173 /* Matches the range. Ignore it. */
174 *inbufp += 4;
175 ++*irreversible;
176 return __GCONV_OK;
178 else if (wc < ranges[0])
179 /* There cannot be any other matching range since they are
180 sorted. */
181 break;
184 /* One last chance: use the default replacement. */
185 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
187 uint32_t *default_missing = (uint32_t *)
188 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
189 const unsigned char *toinptr = (const unsigned char *) default_missing;
190 uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
191 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
192 int res;
194 /* Test whether there is enough input. */
195 if (winbuf + 1 > winbufend)
196 return (winbuf == winbufend
197 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
199 res = DL_CALL_FCT (step->__fct,
200 (step, step_data, &toinptr,
201 (const unsigned char *) (default_missing + len),
202 (unsigned char **) outbufstart,
203 NULL, 0, 0));
205 if (res != __GCONV_ILLEGAL_INPUT)
207 /* If the conversion succeeds we have to increment the
208 input buffer. */
209 if (res == __GCONV_EMPTY_INPUT)
211 /* This worked but is not reversible. */
212 ++*irreversible;
213 *inbufp += 4;
214 res = __GCONV_OK;
217 return res;
221 /* Haven't found a match. */
222 return __GCONV_ILLEGAL_INPUT;
226 /* Structure to represent results of found (or not) transliteration
227 modules. */
228 struct known_trans
230 /* This structure must remain the first member. */
231 struct trans_struct info;
233 const char *fname;
234 void *handle;
235 int open_count;
239 /* Tree with results of previous calls to __gconv_translit_find. */
240 static void *search_tree;
242 /* We modify global data. */
243 __libc_lock_define_initialized (static, lock);
246 /* Compare two transliteration entries. */
247 static int
248 trans_compare (const void *p1, const void *p2)
250 struct known_trans *s1 = (struct known_trans *) p1;
251 struct known_trans *s2 = (struct known_trans *) p2;
253 return strcmp (s1->info.name, s2->info.name);
257 /* Open (maybe reopen) the module named in the struct. Get the function
258 and data structure pointers we need. */
259 static int
260 open_translit (struct known_trans *trans)
262 __gconv_trans_query_fct queryfct;
264 trans->handle = __libc_dlopen (trans->fname);
265 if (trans->handle == NULL)
266 /* Not available. */
267 return 1;
269 /* Find the required symbol. */
270 queryfct = __libc_dlsym (trans->handle, "gconv_trans_context");
271 if (queryfct == NULL)
273 /* We cannot live with that. */
274 close_and_out:
275 __libc_dlclose (trans->handle);
276 trans->handle = NULL;
277 return 1;
280 /* Get the context. */
281 if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames)
282 != 0)
283 goto close_and_out;
285 /* Of course we also have to have the actual function. */
286 trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans");
287 if (trans->info.trans_fct == NULL)
288 goto close_and_out;
290 /* Now the optional functions. */
291 trans->info.trans_init_fct =
292 __libc_dlsym (trans->handle, "gconv_trans_init");
293 trans->info.trans_context_fct =
294 __libc_dlsym (trans->handle, "gconv_trans_context");
295 trans->info.trans_end_fct =
296 __libc_dlsym (trans->handle, "gconv_trans_end");
298 trans->open_count = 1;
300 return 0;
305 internal_function
306 __gconv_translit_find (struct trans_struct *trans)
308 struct known_trans **found;
309 const struct path_elem *runp;
310 int res = 1;
312 /* We have to have a name. */
313 assert (trans->name != NULL);
315 /* Acquire the lock. */
316 __libc_lock_lock (lock);
318 /* See whether we know this module already. */
319 found = __tfind (trans, &search_tree, trans_compare);
320 if (found != NULL)
322 /* Is this module available? */
323 if ((*found)->handle != NULL)
325 /* Maybe we have to reopen the file. */
326 if ((*found)->handle != (void *) -1)
327 /* The object is not unloaded. */
328 res = 0;
329 else if (open_translit (*found) == 0)
331 /* Copy the data. */
332 *trans = (*found)->info;
333 (*found)->open_count++;
334 res = 0;
338 else
340 size_t name_len = strlen (trans->name) + 1;
341 int need_so = 0;
342 struct known_trans *newp;
344 /* We have to continue looking for the module. */
345 if (__gconv_path_elem == NULL)
346 __gconv_get_path ();
348 /* See whether we have to append .so. */
349 if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0)
350 need_so = 1;
352 /* Create a new entry. */
353 newp = (struct known_trans *) malloc (sizeof (struct known_trans)
354 + (__gconv_max_path_elem_len
355 + name_len + 3)
356 + name_len);
357 if (newp != NULL)
359 char *cp;
361 /* Clear the struct. */
362 memset (newp, '\0', sizeof (struct known_trans));
364 /* Store a copy of the module name. */
365 newp->info.name = (char *) (newp + 1);
366 cp = __mempcpy ((char *) newp->info.name, trans->name, name_len);
368 newp->fname = cp;
370 /* Search in all the directories. */
371 for (runp = __gconv_path_elem; runp->name != NULL; ++runp)
373 cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name),
374 trans->name, name_len);
375 if (need_so)
376 memcpy (cp, ".so", sizeof (".so"));
378 if (open_translit (newp) == 0)
380 /* We found a module. */
381 res = 0;
382 break;
386 if (res)
387 newp->fname = NULL;
389 /* In any case we'll add the entry to our search tree. */
390 if (__tsearch (newp, &search_tree, trans_compare) == NULL)
392 /* Yickes, this should not happen. Unload the object. */
393 res = 1;
394 /* XXX unload here. */
399 __libc_lock_unlock (lock);
401 return res;