1 /* Transliteration using the locale's data.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
28 #include <bits/libc-lock.h>
29 #include "gconv_int.h"
30 #include "../locale/localeinfo.h"
34 __gconv_transliterate (struct __gconv_step
*step
,
35 struct __gconv_step_data
*step_data
,
36 void *trans_data
__attribute__ ((unused
)),
37 const unsigned char *inbufstart
,
38 const unsigned char **inbufp
,
39 const unsigned char *inbufend
,
40 unsigned char **outbufstart
, size_t *irreversible
)
42 /* Find out about the locale's transliteration. */
53 /* The input buffer. There are actually 4-byte values. */
54 winbuf
= (uint32_t *) *inbufp
;
55 winbufend
= (uint32_t *) inbufend
;
57 /* If there is no transliteration information in the locale don't do
58 anything and return the error. */
59 size
= _NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TAB_SIZE
);
63 /* Get the rest of the values. */
64 from_idx
= (uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_FROM_IDX
);
65 from_tbl
= (uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_FROM_TBL
);
66 to_idx
= (uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TO_IDX
);
67 to_tbl
= (uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TO_TBL
);
69 /* Test whether there is enough input. */
70 if (winbuf
+ 1 > winbufend
)
71 return (winbuf
== winbufend
72 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
74 /* The array starting at FROM_IDX contains indeces to the string table
75 in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
76 are doing binary search. */
81 uint_fast32_t med
= (low
+ high
) / 2;
85 /* Compare the string at this index with the string at the current
86 position in the input buffer. */
91 if (from_tbl
[idx
+ cnt
] != winbuf
[cnt
])
96 while (from_tbl
[idx
+ cnt
] != L
'\0' && winbuf
+ cnt
< winbufend
);
98 if (cnt
> 0 && from_tbl
[idx
+ cnt
] == L
'\0')
100 /* Found a matching input sequence. Now try to convert the
101 possible replacements. */
102 uint32_t idx2
= to_idx
[med
];
106 /* Determine length of replacement. */
107 uint_fast32_t len
= 0;
109 const unsigned char *toinptr
;
111 while (to_tbl
[idx2
+ len
] != L
'\0')
114 /* Try this input text. */
115 toinptr
= (const unsigned char *) &to_tbl
[idx2
];
116 res
= DL_CALL_FCT (step
->__fct
,
117 (step
, step_data
, &toinptr
,
118 (const unsigned char *) &to_tbl
[idx2
+ len
],
119 (unsigned char **) outbufstart
,
121 if (res
!= __GCONV_ILLEGAL_INPUT
)
123 /* If the conversion succeeds we have to increment the
125 if (res
== __GCONV_EMPTY_INPUT
)
127 *inbufp
+= cnt
* sizeof (uint32_t);
135 /* Next replacement. */
138 while (to_tbl
[idx2
] != L
'\0');
140 /* Nothing found, continue searching. */
143 /* This means that the input buffer contents matches a prefix of
144 an entry. Since we cannot match it unless we get more input,
145 we will tell the caller about it. */
146 return __GCONV_INCOMPLETE_INPUT
;
148 if (winbuf
+ cnt
>= winbufend
|| from_tbl
[idx
+ cnt
] < winbuf
[cnt
])
155 /* Maybe the character is supposed to be ignored. */
156 if (_NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_IGNORE_LEN
) != 0)
158 int n
= _NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_IGNORE_LEN
);
159 uint32_t *ranges
= (uint32_t *) _NL_CURRENT (LC_CTYPE
,
160 _NL_CTYPE_TRANSLIT_IGNORE
);
161 uint32_t wc
= *(uint32_t *) (*inbufp
);
164 /* Test whether there is enough input. */
165 if (winbuf
+ 1 > winbufend
)
166 return (winbuf
== winbufend
167 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
169 for (i
= 0; i
< n
; ranges
+= 3, ++i
)
170 if (ranges
[0] <= wc
&& wc
<= ranges
[1]
171 && (wc
- ranges
[0]) % ranges
[2] == 0)
173 /* Matches the range. Ignore it. */
178 else if (wc
< ranges
[0])
179 /* There cannot be any other matching range since they are
184 /* One last chance: use the default replacement. */
185 if (_NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
) != 0)
187 uint32_t *default_missing
= (uint32_t *)
188 _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING
);
189 const unsigned char *toinptr
= (const unsigned char *) default_missing
;
190 uint32_t len
= _NL_CURRENT_WORD (LC_CTYPE
,
191 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
);
194 /* Test whether there is enough input. */
195 if (winbuf
+ 1 > winbufend
)
196 return (winbuf
== winbufend
197 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
199 res
= DL_CALL_FCT (step
->__fct
,
200 (step
, step_data
, &toinptr
,
201 (const unsigned char *) (default_missing
+ len
),
202 (unsigned char **) outbufstart
,
205 if (res
!= __GCONV_ILLEGAL_INPUT
)
207 /* If the conversion succeeds we have to increment the
209 if (res
== __GCONV_EMPTY_INPUT
)
211 /* This worked but is not reversible. */
221 /* Haven't found a match. */
222 return __GCONV_ILLEGAL_INPUT
;
226 /* Structure to represent results of found (or not) transliteration
230 /* This structure must remain the first member. */
231 struct trans_struct info
;
239 /* Tree with results of previous calls to __gconv_translit_find. */
240 static void *search_tree
;
242 /* We modify global data. */
243 __libc_lock_define_initialized (static, lock
);
246 /* Compare two transliteration entries. */
248 trans_compare (const void *p1
, const void *p2
)
250 struct known_trans
*s1
= (struct known_trans
*) p1
;
251 struct known_trans
*s2
= (struct known_trans
*) p2
;
253 return strcmp (s1
->info
.name
, s2
->info
.name
);
257 /* Open (maybe reopen) the module named in the struct. Get the function
258 and data structure pointers we need. */
260 open_translit (struct known_trans
*trans
)
262 __gconv_trans_query_fct queryfct
;
264 trans
->handle
= __libc_dlopen (trans
->fname
);
265 if (trans
->handle
== NULL
)
269 /* Find the required symbol. */
270 queryfct
= __libc_dlsym (trans
->handle
, "gconv_trans_context");
271 if (queryfct
== NULL
)
273 /* We cannot live with that. */
275 __libc_dlclose (trans
->handle
);
276 trans
->handle
= NULL
;
280 /* Get the context. */
281 if (queryfct (trans
->info
.name
, &trans
->info
.csnames
, &trans
->info
.ncsnames
)
285 /* Of course we also have to have the actual function. */
286 trans
->info
.trans_fct
= __libc_dlsym (trans
->handle
, "gconv_trans");
287 if (trans
->info
.trans_fct
== NULL
)
290 /* Now the optional functions. */
291 trans
->info
.trans_init_fct
=
292 __libc_dlsym (trans
->handle
, "gconv_trans_init");
293 trans
->info
.trans_context_fct
=
294 __libc_dlsym (trans
->handle
, "gconv_trans_context");
295 trans
->info
.trans_end_fct
=
296 __libc_dlsym (trans
->handle
, "gconv_trans_end");
298 trans
->open_count
= 1;
306 __gconv_translit_find (struct trans_struct
*trans
)
308 struct known_trans
**found
;
309 const struct path_elem
*runp
;
312 /* We have to have a name. */
313 assert (trans
->name
!= NULL
);
315 /* Acquire the lock. */
316 __libc_lock_lock (lock
);
318 /* See whether we know this module already. */
319 found
= __tfind (trans
, &search_tree
, trans_compare
);
322 /* Is this module available? */
323 if ((*found
)->handle
!= NULL
)
325 /* Maybe we have to reopen the file. */
326 if ((*found
)->handle
!= (void *) -1)
327 /* The object is not unloaded. */
329 else if (open_translit (*found
) == 0)
332 *trans
= (*found
)->info
;
333 (*found
)->open_count
++;
340 size_t name_len
= strlen (trans
->name
) + 1;
342 struct known_trans
*newp
;
344 /* We have to continue looking for the module. */
345 if (__gconv_path_elem
== NULL
)
348 /* See whether we have to append .so. */
349 if (name_len
<= 4 || memcmp (&trans
->name
[name_len
- 4], ".so", 3) != 0)
352 /* Create a new entry. */
353 newp
= (struct known_trans
*) malloc (sizeof (struct known_trans
)
354 + (__gconv_max_path_elem_len
361 /* Clear the struct. */
362 memset (newp
, '\0', sizeof (struct known_trans
));
364 /* Store a copy of the module name. */
365 newp
->info
.name
= (char *) (newp
+ 1);
366 cp
= __mempcpy ((char *) newp
->info
.name
, trans
->name
, name_len
);
370 /* Search in all the directories. */
371 for (runp
= __gconv_path_elem
; runp
->name
!= NULL
; ++runp
)
373 cp
= __mempcpy (__stpcpy ((char *) newp
->fname
, runp
->name
),
374 trans
->name
, name_len
);
376 memcpy (cp
, ".so", sizeof (".so"));
378 if (open_translit (newp
) == 0)
380 /* We found a module. */
389 /* In any case we'll add the entry to our search tree. */
390 if (__tsearch (newp
, &search_tree
, trans_compare
) == NULL
)
392 /* Yickes, this should not happen. Unload the object. */
394 /* XXX unload here. */
399 __libc_lock_unlock (lock
);