Update copyright notices with scripts/update-copyrights
[glibc.git] / iconv / gconv_trans.c
blob1e25854ccfa84e4f612320f61e6cafa2f955a7d9
1 /* Transliteration using the locale's data.
2 Copyright (C) 2000-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 #include <assert.h>
21 #include <dlfcn.h>
22 #include <search.h>
23 #include <stdint.h>
24 #include <string.h>
25 #include <stdlib.h>
27 #include <bits/libc-lock.h>
28 #include "gconv_int.h"
29 #include "../locale/localeinfo.h"
32 int
33 __gconv_transliterate (struct __gconv_step *step,
34 struct __gconv_step_data *step_data,
35 void *trans_data __attribute__ ((unused)),
36 const unsigned char *inbufstart,
37 const unsigned char **inbufp,
38 const unsigned char *inbufend,
39 unsigned char **outbufstart, size_t *irreversible)
41 /* Find out about the locale's transliteration. */
42 uint_fast32_t size;
43 const uint32_t *from_idx;
44 const uint32_t *from_tbl;
45 const uint32_t *to_idx;
46 const uint32_t *to_tbl;
47 const uint32_t *winbuf;
48 const uint32_t *winbufend;
49 uint_fast32_t low;
50 uint_fast32_t high;
52 /* The input buffer. There are actually 4-byte values. */
53 winbuf = (const uint32_t *) *inbufp;
54 winbufend = (const uint32_t *) inbufend;
56 __gconv_fct fct = step->__fct;
57 #ifdef PTR_DEMANGLE
58 if (step->__shlib_handle != NULL)
59 PTR_DEMANGLE (fct);
60 #endif
62 /* If there is no transliteration information in the locale don't do
63 anything and return the error. */
64 size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
65 if (size == 0)
66 goto no_rules;
68 /* Get the rest of the values. */
69 from_idx =
70 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
71 from_tbl =
72 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
73 to_idx =
74 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
75 to_tbl =
76 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
78 /* Test whether there is enough input. */
79 if (winbuf + 1 > winbufend)
80 return (winbuf == winbufend
81 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
83 /* The array starting at FROM_IDX contains indeces to the string table
84 in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
85 are doing binary search. */
86 low = 0;
87 high = size;
88 while (low < high)
90 uint_fast32_t med = (low + high) / 2;
91 uint32_t idx;
92 int cnt;
94 /* Compare the string at this index with the string at the current
95 position in the input buffer. */
96 idx = from_idx[med];
97 cnt = 0;
100 if (from_tbl[idx + cnt] != winbuf[cnt])
101 /* Does not match. */
102 break;
103 ++cnt;
105 while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
107 if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
109 /* Found a matching input sequence. Now try to convert the
110 possible replacements. */
111 uint32_t idx2 = to_idx[med];
115 /* Determine length of replacement. */
116 uint_fast32_t len = 0;
117 int res;
118 const unsigned char *toinptr;
119 unsigned char *outptr;
121 while (to_tbl[idx2 + len] != L'\0')
122 ++len;
124 /* Try this input text. */
125 toinptr = (const unsigned char *) &to_tbl[idx2];
126 outptr = *outbufstart;
127 res = DL_CALL_FCT (fct,
128 (step, step_data, &toinptr,
129 (const unsigned char *) &to_tbl[idx2 + len],
130 &outptr, NULL, 0, 0));
131 if (res != __GCONV_ILLEGAL_INPUT)
133 /* If the conversion succeeds we have to increment the
134 input buffer. */
135 if (res == __GCONV_EMPTY_INPUT)
137 *inbufp += cnt * sizeof (uint32_t);
138 ++*irreversible;
139 res = __GCONV_OK;
141 /* Do not increment the output pointer if we could not
142 store the entire output. */
143 if (res != __GCONV_FULL_OUTPUT)
144 *outbufstart = outptr;
146 return res;
149 /* Next replacement. */
150 idx2 += len + 1;
152 while (to_tbl[idx2] != L'\0');
154 /* Nothing found, continue searching. */
156 else if (cnt > 0)
157 /* This means that the input buffer contents matches a prefix of
158 an entry. Since we cannot match it unless we get more input,
159 we will tell the caller about it. */
160 return __GCONV_INCOMPLETE_INPUT;
162 if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
163 low = med + 1;
164 else
165 high = med;
168 no_rules:
169 /* Maybe the character is supposed to be ignored. */
170 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
172 int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
173 const uint32_t *ranges =
174 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
175 const uint32_t wc = *(const uint32_t *) (*inbufp);
176 int i;
178 /* Test whether there is enough input. */
179 if (winbuf + 1 > winbufend)
180 return (winbuf == winbufend
181 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
183 for (i = 0; i < n; ranges += 3, ++i)
184 if (ranges[0] <= wc && wc <= ranges[1]
185 && (wc - ranges[0]) % ranges[2] == 0)
187 /* Matches the range. Ignore it. */
188 *inbufp += 4;
189 ++*irreversible;
190 return __GCONV_OK;
192 else if (wc < ranges[0])
193 /* There cannot be any other matching range since they are
194 sorted. */
195 break;
198 /* One last chance: use the default replacement. */
199 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
201 const uint32_t *default_missing = (const uint32_t *)
202 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
203 const unsigned char *toinptr = (const unsigned char *) default_missing;
204 uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
205 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
206 unsigned char *outptr;
207 int res;
209 /* Test whether there is enough input. */
210 if (winbuf + 1 > winbufend)
211 return (winbuf == winbufend
212 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
214 outptr = *outbufstart;
215 res = DL_CALL_FCT (fct,
216 (step, step_data, &toinptr,
217 (const unsigned char *) (default_missing + len),
218 &outptr, NULL, 0, 0));
220 if (res != __GCONV_ILLEGAL_INPUT)
222 /* If the conversion succeeds we have to increment the
223 input buffer. */
224 if (res == __GCONV_EMPTY_INPUT)
226 /* This worked but is not reversible. */
227 ++*irreversible;
228 *inbufp += 4;
229 res = __GCONV_OK;
231 *outbufstart = outptr;
233 return res;
237 /* Haven't found a match. */
238 return __GCONV_ILLEGAL_INPUT;
242 /* Structure to represent results of found (or not) transliteration
243 modules. */
244 struct known_trans
246 /* This structure must remain the first member. */
247 struct trans_struct info;
249 char *fname;
250 void *handle;
251 int open_count;
255 /* Tree with results of previous calls to __gconv_translit_find. */
256 static void *search_tree;
258 /* We modify global data. */
259 __libc_lock_define_initialized (static, lock);
262 /* Compare two transliteration entries. */
263 static int
264 trans_compare (const void *p1, const void *p2)
266 const struct known_trans *s1 = (const struct known_trans *) p1;
267 const struct known_trans *s2 = (const struct known_trans *) p2;
269 return strcmp (s1->info.name, s2->info.name);
273 /* Open (maybe reopen) the module named in the struct. Get the function
274 and data structure pointers we need. */
275 static int
276 open_translit (struct known_trans *trans)
278 __gconv_trans_query_fct queryfct;
280 trans->handle = __libc_dlopen (trans->fname);
281 if (trans->handle == NULL)
282 /* Not available. */
283 return 1;
285 /* Find the required symbol. */
286 queryfct = __libc_dlsym (trans->handle, "gconv_trans_context");
287 if (queryfct == NULL)
289 /* We cannot live with that. */
290 close_and_out:
291 __libc_dlclose (trans->handle);
292 trans->handle = NULL;
293 return 1;
296 /* Get the context. */
297 if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames)
298 != 0)
299 goto close_and_out;
301 /* Of course we also have to have the actual function. */
302 trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans");
303 if (trans->info.trans_fct == NULL)
304 goto close_and_out;
306 /* Now the optional functions. */
307 trans->info.trans_init_fct =
308 __libc_dlsym (trans->handle, "gconv_trans_init");
309 trans->info.trans_context_fct =
310 __libc_dlsym (trans->handle, "gconv_trans_context");
311 trans->info.trans_end_fct =
312 __libc_dlsym (trans->handle, "gconv_trans_end");
314 trans->open_count = 1;
316 return 0;
321 internal_function
322 __gconv_translit_find (struct trans_struct *trans)
324 struct known_trans **found;
325 const struct path_elem *runp;
326 int res = 1;
328 /* We have to have a name. */
329 assert (trans->name != NULL);
331 /* Acquire the lock. */
332 __libc_lock_lock (lock);
334 /* See whether we know this module already. */
335 found = __tfind (trans, &search_tree, trans_compare);
336 if (found != NULL)
338 /* Is this module available? */
339 if ((*found)->handle != NULL)
341 /* Maybe we have to reopen the file. */
342 if ((*found)->handle != (void *) -1)
343 /* The object is not unloaded. */
344 res = 0;
345 else if (open_translit (*found) == 0)
347 /* Copy the data. */
348 *trans = (*found)->info;
349 (*found)->open_count++;
350 res = 0;
354 else
356 size_t name_len = strlen (trans->name) + 1;
357 int need_so = 0;
358 struct known_trans *newp;
360 /* We have to continue looking for the module. */
361 if (__gconv_path_elem == NULL)
362 __gconv_get_path ();
364 /* See whether we have to append .so. */
365 if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0)
366 need_so = 1;
368 /* Create a new entry. */
369 newp = (struct known_trans *) malloc (sizeof (struct known_trans)
370 + (__gconv_max_path_elem_len
371 + name_len + 3)
372 + name_len);
373 if (newp != NULL)
375 char *cp;
377 /* Clear the struct. */
378 memset (newp, '\0', sizeof (struct known_trans));
380 /* Store a copy of the module name. */
381 newp->info.name = cp = (char *) (newp + 1);
382 cp = __mempcpy (cp, trans->name, name_len);
384 newp->fname = cp;
386 /* Search in all the directories. */
387 for (runp = __gconv_path_elem; runp->name != NULL; ++runp)
389 cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name),
390 trans->name, name_len);
391 if (need_so)
392 memcpy (cp, ".so", sizeof (".so"));
394 if (open_translit (newp) == 0)
396 /* We found a module. */
397 res = 0;
398 break;
402 if (res)
403 newp->fname = NULL;
405 /* In any case we'll add the entry to our search tree. */
406 if (__tsearch (newp, &search_tree, trans_compare) == NULL)
408 /* Yickes, this should not happen. Unload the object. */
409 res = 1;
410 /* XXX unload here. */
415 __libc_lock_unlock (lock);
417 return res;