Properly recover from shorter read.
[glibc.git] / iconv / gconv_trans.c
blob1f1dd01b19e55180b23ab6cb35dd4fe983830db5
1 /* Transliteration using the locale's data.
2 Copyright (C) 2000, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <assert.h>
22 #include <dlfcn.h>
23 #include <search.h>
24 #include <stdint.h>
25 #include <string.h>
26 #include <stdlib.h>
28 #include <bits/libc-lock.h>
29 #include "gconv_int.h"
30 #include "../locale/localeinfo.h"
33 int
34 __gconv_transliterate (struct __gconv_step *step,
35 struct __gconv_step_data *step_data,
36 void *trans_data __attribute__ ((unused)),
37 const unsigned char *inbufstart,
38 const unsigned char **inbufp,
39 const unsigned char *inbufend,
40 unsigned char **outbufstart, size_t *irreversible)
42 /* Find out about the locale's transliteration. */
43 uint_fast32_t size;
44 const uint32_t *from_idx;
45 const uint32_t *from_tbl;
46 const uint32_t *to_idx;
47 const uint32_t *to_tbl;
48 const uint32_t *winbuf;
49 const uint32_t *winbufend;
50 uint_fast32_t low;
51 uint_fast32_t high;
53 /* The input buffer. There are actually 4-byte values. */
54 winbuf = (const uint32_t *) *inbufp;
55 winbufend = (const uint32_t *) inbufend;
57 __gconv_fct fct = step->__fct;
58 #ifdef PTR_DEMANGLE
59 if (step->__shlib_handle != NULL)
60 PTR_DEMANGLE (fct);
61 #endif
63 /* If there is no transliteration information in the locale don't do
64 anything and return the error. */
65 size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
66 if (size == 0)
67 goto no_rules;
69 /* Get the rest of the values. */
70 from_idx =
71 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
72 from_tbl =
73 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
74 to_idx =
75 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
76 to_tbl =
77 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
79 /* Test whether there is enough input. */
80 if (winbuf + 1 > winbufend)
81 return (winbuf == winbufend
82 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
84 /* The array starting at FROM_IDX contains indeces to the string table
85 in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
86 are doing binary search. */
87 low = 0;
88 high = size;
89 while (low < high)
91 uint_fast32_t med = (low + high) / 2;
92 uint32_t idx;
93 int cnt;
95 /* Compare the string at this index with the string at the current
96 position in the input buffer. */
97 idx = from_idx[med];
98 cnt = 0;
101 if (from_tbl[idx + cnt] != winbuf[cnt])
102 /* Does not match. */
103 break;
104 ++cnt;
106 while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
108 if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
110 /* Found a matching input sequence. Now try to convert the
111 possible replacements. */
112 uint32_t idx2 = to_idx[med];
116 /* Determine length of replacement. */
117 uint_fast32_t len = 0;
118 int res;
119 const unsigned char *toinptr;
120 unsigned char *outptr;
122 while (to_tbl[idx2 + len] != L'\0')
123 ++len;
125 /* Try this input text. */
126 toinptr = (const unsigned char *) &to_tbl[idx2];
127 outptr = *outbufstart;
128 res = DL_CALL_FCT (fct,
129 (step, step_data, &toinptr,
130 (const unsigned char *) &to_tbl[idx2 + len],
131 &outptr, NULL, 0, 0));
132 if (res != __GCONV_ILLEGAL_INPUT)
134 /* If the conversion succeeds we have to increment the
135 input buffer. */
136 if (res == __GCONV_EMPTY_INPUT)
138 *inbufp += cnt * sizeof (uint32_t);
139 ++*irreversible;
140 res = __GCONV_OK;
142 /* Do not increment the output pointer if we could not
143 store the entire output. */
144 if (res != __GCONV_FULL_OUTPUT)
145 *outbufstart = outptr;
147 return res;
150 /* Next replacement. */
151 idx2 += len + 1;
153 while (to_tbl[idx2] != L'\0');
155 /* Nothing found, continue searching. */
157 else if (cnt > 0)
158 /* This means that the input buffer contents matches a prefix of
159 an entry. Since we cannot match it unless we get more input,
160 we will tell the caller about it. */
161 return __GCONV_INCOMPLETE_INPUT;
163 if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
164 low = med + 1;
165 else
166 high = med;
169 no_rules:
170 /* Maybe the character is supposed to be ignored. */
171 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
173 int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
174 const uint32_t *ranges =
175 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
176 const uint32_t wc = *(const uint32_t *) (*inbufp);
177 int i;
179 /* Test whether there is enough input. */
180 if (winbuf + 1 > winbufend)
181 return (winbuf == winbufend
182 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
184 for (i = 0; i < n; ranges += 3, ++i)
185 if (ranges[0] <= wc && wc <= ranges[1]
186 && (wc - ranges[0]) % ranges[2] == 0)
188 /* Matches the range. Ignore it. */
189 *inbufp += 4;
190 ++*irreversible;
191 return __GCONV_OK;
193 else if (wc < ranges[0])
194 /* There cannot be any other matching range since they are
195 sorted. */
196 break;
199 /* One last chance: use the default replacement. */
200 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
202 const uint32_t *default_missing = (const uint32_t *)
203 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
204 const unsigned char *toinptr = (const unsigned char *) default_missing;
205 uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
206 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
207 unsigned char *outptr;
208 int res;
210 /* Test whether there is enough input. */
211 if (winbuf + 1 > winbufend)
212 return (winbuf == winbufend
213 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
215 outptr = *outbufstart;
216 res = DL_CALL_FCT (fct,
217 (step, step_data, &toinptr,
218 (const unsigned char *) (default_missing + len),
219 &outptr, NULL, 0, 0));
221 if (res != __GCONV_ILLEGAL_INPUT)
223 /* If the conversion succeeds we have to increment the
224 input buffer. */
225 if (res == __GCONV_EMPTY_INPUT)
227 /* This worked but is not reversible. */
228 ++*irreversible;
229 *inbufp += 4;
230 res = __GCONV_OK;
232 *outbufstart = outptr;
234 return res;
238 /* Haven't found a match. */
239 return __GCONV_ILLEGAL_INPUT;
243 /* Structure to represent results of found (or not) transliteration
244 modules. */
245 struct known_trans
247 /* This structure must remain the first member. */
248 struct trans_struct info;
250 char *fname;
251 void *handle;
252 int open_count;
256 /* Tree with results of previous calls to __gconv_translit_find. */
257 static void *search_tree;
259 /* We modify global data. */
260 __libc_lock_define_initialized (static, lock);
263 /* Compare two transliteration entries. */
264 static int
265 trans_compare (const void *p1, const void *p2)
267 const struct known_trans *s1 = (const struct known_trans *) p1;
268 const struct known_trans *s2 = (const struct known_trans *) p2;
270 return strcmp (s1->info.name, s2->info.name);
274 /* Open (maybe reopen) the module named in the struct. Get the function
275 and data structure pointers we need. */
276 static int
277 open_translit (struct known_trans *trans)
279 __gconv_trans_query_fct queryfct;
281 trans->handle = __libc_dlopen (trans->fname);
282 if (trans->handle == NULL)
283 /* Not available. */
284 return 1;
286 /* Find the required symbol. */
287 queryfct = __libc_dlsym (trans->handle, "gconv_trans_context");
288 if (queryfct == NULL)
290 /* We cannot live with that. */
291 close_and_out:
292 __libc_dlclose (trans->handle);
293 trans->handle = NULL;
294 return 1;
297 /* Get the context. */
298 if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames)
299 != 0)
300 goto close_and_out;
302 /* Of course we also have to have the actual function. */
303 trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans");
304 if (trans->info.trans_fct == NULL)
305 goto close_and_out;
307 /* Now the optional functions. */
308 trans->info.trans_init_fct =
309 __libc_dlsym (trans->handle, "gconv_trans_init");
310 trans->info.trans_context_fct =
311 __libc_dlsym (trans->handle, "gconv_trans_context");
312 trans->info.trans_end_fct =
313 __libc_dlsym (trans->handle, "gconv_trans_end");
315 trans->open_count = 1;
317 return 0;
322 internal_function
323 __gconv_translit_find (struct trans_struct *trans)
325 struct known_trans **found;
326 const struct path_elem *runp;
327 int res = 1;
329 /* We have to have a name. */
330 assert (trans->name != NULL);
332 /* Acquire the lock. */
333 __libc_lock_lock (lock);
335 /* See whether we know this module already. */
336 found = __tfind (trans, &search_tree, trans_compare);
337 if (found != NULL)
339 /* Is this module available? */
340 if ((*found)->handle != NULL)
342 /* Maybe we have to reopen the file. */
343 if ((*found)->handle != (void *) -1)
344 /* The object is not unloaded. */
345 res = 0;
346 else if (open_translit (*found) == 0)
348 /* Copy the data. */
349 *trans = (*found)->info;
350 (*found)->open_count++;
351 res = 0;
355 else
357 size_t name_len = strlen (trans->name) + 1;
358 int need_so = 0;
359 struct known_trans *newp;
361 /* We have to continue looking for the module. */
362 if (__gconv_path_elem == NULL)
363 __gconv_get_path ();
365 /* See whether we have to append .so. */
366 if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0)
367 need_so = 1;
369 /* Create a new entry. */
370 newp = (struct known_trans *) malloc (sizeof (struct known_trans)
371 + (__gconv_max_path_elem_len
372 + name_len + 3)
373 + name_len);
374 if (newp != NULL)
376 char *cp;
378 /* Clear the struct. */
379 memset (newp, '\0', sizeof (struct known_trans));
381 /* Store a copy of the module name. */
382 newp->info.name = cp = (char *) (newp + 1);
383 cp = __mempcpy (cp, trans->name, name_len);
385 newp->fname = cp;
387 /* Search in all the directories. */
388 for (runp = __gconv_path_elem; runp->name != NULL; ++runp)
390 cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name),
391 trans->name, name_len);
392 if (need_so)
393 memcpy (cp, ".so", sizeof (".so"));
395 if (open_translit (newp) == 0)
397 /* We found a module. */
398 res = 0;
399 break;
403 if (res)
404 newp->fname = NULL;
406 /* In any case we'll add the entry to our search tree. */
407 if (__tsearch (newp, &search_tree, trans_compare) == NULL)
409 /* Yickes, this should not happen. Unload the object. */
410 res = 1;
411 /* XXX unload here. */
416 __libc_lock_unlock (lock);
418 return res;