1 /* Transliteration using the locale's data.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 #include <bits/libc-lock.h>
29 #include "gconv_int.h"
30 #include "../locale/localeinfo.h"
34 __gconv_transliterate (struct __gconv_step
*step
,
35 struct __gconv_step_data
*step_data
,
36 void *trans_data
__attribute__ ((unused
)),
37 const unsigned char *inbufstart
,
38 const unsigned char **inbufp
,
39 const unsigned char *inbufend
,
40 unsigned char **outbufstart
, size_t *irreversible
)
42 /* Find out about the locale's transliteration. */
44 const uint32_t *from_idx
;
45 const uint32_t *from_tbl
;
46 const uint32_t *to_idx
;
47 const uint32_t *to_tbl
;
48 const uint32_t *winbuf
;
49 const uint32_t *winbufend
;
53 /* The input buffer. There are actually 4-byte values. */
54 winbuf
= (const uint32_t *) *inbufp
;
55 winbufend
= (const uint32_t *) inbufend
;
57 /* If there is no transliteration information in the locale don't do
58 anything and return the error. */
59 size
= _NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TAB_SIZE
);
63 /* Get the rest of the values. */
65 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_FROM_IDX
);
67 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_FROM_TBL
);
69 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TO_IDX
);
71 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TO_TBL
);
73 /* Test whether there is enough input. */
74 if (winbuf
+ 1 > winbufend
)
75 return (winbuf
== winbufend
76 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
78 /* The array starting at FROM_IDX contains indeces to the string table
79 in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
80 are doing binary search. */
85 uint_fast32_t med
= (low
+ high
) / 2;
89 /* Compare the string at this index with the string at the current
90 position in the input buffer. */
95 if (from_tbl
[idx
+ cnt
] != winbuf
[cnt
])
100 while (from_tbl
[idx
+ cnt
] != L
'\0' && winbuf
+ cnt
< winbufend
);
102 if (cnt
> 0 && from_tbl
[idx
+ cnt
] == L
'\0')
104 /* Found a matching input sequence. Now try to convert the
105 possible replacements. */
106 uint32_t idx2
= to_idx
[med
];
110 /* Determine length of replacement. */
111 uint_fast32_t len
= 0;
113 const unsigned char *toinptr
;
114 unsigned char *outptr
;
116 while (to_tbl
[idx2
+ len
] != L
'\0')
119 /* Try this input text. */
120 toinptr
= (const unsigned char *) &to_tbl
[idx2
];
121 outptr
= *outbufstart
;
122 res
= DL_CALL_FCT (step
->__fct
,
123 (step
, step_data
, &toinptr
,
124 (const unsigned char *) &to_tbl
[idx2
+ len
],
125 &outptr
, NULL
, 0, 0));
126 if (res
!= __GCONV_ILLEGAL_INPUT
)
128 /* If the conversion succeeds we have to increment the
130 if (res
== __GCONV_EMPTY_INPUT
)
132 *inbufp
+= cnt
* sizeof (uint32_t);
136 *outbufstart
= outptr
;
141 /* Next replacement. */
144 while (to_tbl
[idx2
] != L
'\0');
146 /* Nothing found, continue searching. */
149 /* This means that the input buffer contents matches a prefix of
150 an entry. Since we cannot match it unless we get more input,
151 we will tell the caller about it. */
152 return __GCONV_INCOMPLETE_INPUT
;
154 if (winbuf
+ cnt
>= winbufend
|| from_tbl
[idx
+ cnt
] < winbuf
[cnt
])
161 /* Maybe the character is supposed to be ignored. */
162 if (_NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_IGNORE_LEN
) != 0)
164 int n
= _NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_IGNORE_LEN
);
165 const uint32_t *ranges
=
166 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_IGNORE
);
167 const uint32_t wc
= *(const uint32_t *) (*inbufp
);
170 /* Test whether there is enough input. */
171 if (winbuf
+ 1 > winbufend
)
172 return (winbuf
== winbufend
173 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
175 for (i
= 0; i
< n
; ranges
+= 3, ++i
)
176 if (ranges
[0] <= wc
&& wc
<= ranges
[1]
177 && (wc
- ranges
[0]) % ranges
[2] == 0)
179 /* Matches the range. Ignore it. */
184 else if (wc
< ranges
[0])
185 /* There cannot be any other matching range since they are
190 /* One last chance: use the default replacement. */
191 if (_NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
) != 0)
193 const uint32_t *default_missing
= (const uint32_t *)
194 _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING
);
195 const unsigned char *toinptr
= (const unsigned char *) default_missing
;
196 uint32_t len
= _NL_CURRENT_WORD (LC_CTYPE
,
197 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
);
198 unsigned char *outptr
;
201 /* Test whether there is enough input. */
202 if (winbuf
+ 1 > winbufend
)
203 return (winbuf
== winbufend
204 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
206 outptr
= *outbufstart
;
207 res
= DL_CALL_FCT (step
->__fct
,
208 (step
, step_data
, &toinptr
,
209 (const unsigned char *) (default_missing
+ len
),
210 &outptr
, NULL
, 0, 0));
212 if (res
!= __GCONV_ILLEGAL_INPUT
)
214 /* If the conversion succeeds we have to increment the
216 if (res
== __GCONV_EMPTY_INPUT
)
218 /* This worked but is not reversible. */
223 *outbufstart
= outptr
;
229 /* Haven't found a match. */
230 return __GCONV_ILLEGAL_INPUT
;
234 /* Structure to represent results of found (or not) transliteration
238 /* This structure must remain the first member. */
239 struct trans_struct info
;
247 /* Tree with results of previous calls to __gconv_translit_find. */
248 static void *search_tree
;
250 /* We modify global data. */
251 __libc_lock_define_initialized (static, lock
);
254 /* Compare two transliteration entries. */
256 trans_compare (const void *p1
, const void *p2
)
258 const struct known_trans
*s1
= (const struct known_trans
*) p1
;
259 const struct known_trans
*s2
= (const struct known_trans
*) p2
;
261 return strcmp (s1
->info
.name
, s2
->info
.name
);
265 /* Open (maybe reopen) the module named in the struct. Get the function
266 and data structure pointers we need. */
268 open_translit (struct known_trans
*trans
)
270 __gconv_trans_query_fct queryfct
;
272 trans
->handle
= __libc_dlopen (trans
->fname
);
273 if (trans
->handle
== NULL
)
277 /* Find the required symbol. */
278 queryfct
= __libc_dlsym (trans
->handle
, "gconv_trans_context");
279 if (queryfct
== NULL
)
281 /* We cannot live with that. */
283 __libc_dlclose (trans
->handle
);
284 trans
->handle
= NULL
;
288 /* Get the context. */
289 if (queryfct (trans
->info
.name
, &trans
->info
.csnames
, &trans
->info
.ncsnames
)
293 /* Of course we also have to have the actual function. */
294 trans
->info
.trans_fct
= __libc_dlsym (trans
->handle
, "gconv_trans");
295 if (trans
->info
.trans_fct
== NULL
)
298 /* Now the optional functions. */
299 trans
->info
.trans_init_fct
=
300 __libc_dlsym (trans
->handle
, "gconv_trans_init");
301 trans
->info
.trans_context_fct
=
302 __libc_dlsym (trans
->handle
, "gconv_trans_context");
303 trans
->info
.trans_end_fct
=
304 __libc_dlsym (trans
->handle
, "gconv_trans_end");
306 trans
->open_count
= 1;
314 __gconv_translit_find (struct trans_struct
*trans
)
316 struct known_trans
**found
;
317 const struct path_elem
*runp
;
320 /* We have to have a name. */
321 assert (trans
->name
!= NULL
);
323 /* Acquire the lock. */
324 __libc_lock_lock (lock
);
326 /* See whether we know this module already. */
327 found
= __tfind (trans
, &search_tree
, trans_compare
);
330 /* Is this module available? */
331 if ((*found
)->handle
!= NULL
)
333 /* Maybe we have to reopen the file. */
334 if ((*found
)->handle
!= (void *) -1)
335 /* The object is not unloaded. */
337 else if (open_translit (*found
) == 0)
340 *trans
= (*found
)->info
;
341 (*found
)->open_count
++;
348 size_t name_len
= strlen (trans
->name
) + 1;
350 struct known_trans
*newp
;
352 /* We have to continue looking for the module. */
353 if (__gconv_path_elem
== NULL
)
356 /* See whether we have to append .so. */
357 if (name_len
<= 4 || memcmp (&trans
->name
[name_len
- 4], ".so", 3) != 0)
360 /* Create a new entry. */
361 newp
= (struct known_trans
*) malloc (sizeof (struct known_trans
)
362 + (__gconv_max_path_elem_len
369 /* Clear the struct. */
370 memset (newp
, '\0', sizeof (struct known_trans
));
372 /* Store a copy of the module name. */
373 newp
->info
.name
= cp
= (char *) (newp
+ 1);
374 cp
= __mempcpy (cp
, trans
->name
, name_len
);
378 /* Search in all the directories. */
379 for (runp
= __gconv_path_elem
; runp
->name
!= NULL
; ++runp
)
381 cp
= __mempcpy (__stpcpy ((char *) newp
->fname
, runp
->name
),
382 trans
->name
, name_len
);
384 memcpy (cp
, ".so", sizeof (".so"));
386 if (open_translit (newp
) == 0)
388 /* We found a module. */
397 /* In any case we'll add the entry to our search tree. */
398 if (__tsearch (newp
, &search_tree
, trans_compare
) == NULL
)
400 /* Yickes, this should not happen. Unload the object. */
402 /* XXX unload here. */
407 __libc_lock_unlock (lock
);