mbsnrtowcs: Work around Solaris 11.4 bug.
[gnulib.git] / lib / striconveha.c
blobbe62474f8a99489a06921ebf27426a148f2bad08
1 /* Character set conversion with error handling and autodetection.
2 Copyright (C) 2002, 2005, 2007, 2009-2018 Free Software Foundation, Inc.
3 Written by Bruno Haible.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 #include <config.h>
20 /* Specification. */
21 #include "striconveha.h"
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
27 #include "malloca.h"
28 #include "c-strcase.h"
29 #include "striconveh.h"
31 #define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
34 /* Autodetection list. */
36 struct autodetect_alias
38 struct autodetect_alias *next;
39 const char *name;
40 const char * const *encodings_to_try;
43 static const char * const autodetect_utf8_try[] =
45 /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
46 be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
47 "UTF-8", "ISO-8859-1",
48 NULL
50 static const char * const autodetect_jp_try[] =
52 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
53 it will fail.
54 Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
55 is unavoidable. People will condemn SHIFT_JIS.
56 If we tried SHIFT_JIS first, then some short EUC-JP inputs would
57 come out wrong, and people would condemn EUC-JP and Unix, which
58 would not be good.
59 Finally try SHIFT_JIS. */
60 "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS",
61 NULL
63 static const char * const autodetect_kr_try[] =
65 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
66 it will fail.
67 Finally try EUC-KR. */
68 "ISO-2022-KR", "EUC-KR",
69 NULL
72 static struct autodetect_alias autodetect_predefined[] =
74 { &autodetect_predefined[1], "autodetect_utf8", autodetect_utf8_try },
75 { &autodetect_predefined[2], "autodetect_jp", autodetect_jp_try },
76 { NULL, "autodetect_kr", autodetect_kr_try }
79 static struct autodetect_alias *autodetect_list = &autodetect_predefined[0];
80 static struct autodetect_alias **autodetect_list_end =
81 &autodetect_predefined[SIZEOF(autodetect_predefined)-1].next;
83 int
84 uniconv_register_autodetect (const char *name,
85 const char * const *try_in_order)
87 size_t namelen;
88 size_t listlen;
89 size_t memneed;
90 size_t i;
91 char *memory;
92 struct autodetect_alias *new_alias;
93 char *new_name;
94 const char **new_try_in_order;
96 /* The TRY_IN_ORDER list must not be empty. */
97 if (try_in_order[0] == NULL)
99 errno = EINVAL;
100 return -1;
103 /* We must deep-copy NAME and TRY_IN_ORDER, because they may be allocated
104 with dynamic extent. */
105 namelen = strlen (name) + 1;
106 memneed = sizeof (struct autodetect_alias) + namelen + sizeof (char *);
107 for (i = 0; try_in_order[i] != NULL; i++)
108 memneed += sizeof (char *) + strlen (try_in_order[i]) + 1;
109 listlen = i;
111 memory = (char *) malloc (memneed);
112 if (memory != NULL)
114 new_alias = (struct autodetect_alias *) memory;
115 memory += sizeof (struct autodetect_alias);
117 new_try_in_order = (const char **) memory;
118 memory += (listlen + 1) * sizeof (char *);
120 new_name = (char *) memory;
121 memcpy (new_name, name, namelen);
122 memory += namelen;
124 for (i = 0; i < listlen; i++)
126 size_t len = strlen (try_in_order[i]) + 1;
127 memcpy (memory, try_in_order[i], len);
128 new_try_in_order[i] = (const char *) memory;
129 memory += len;
131 new_try_in_order[i] = NULL;
133 /* Now insert the new alias. */
134 new_alias->name = new_name;
135 new_alias->encodings_to_try = new_try_in_order;
136 new_alias->next = NULL;
137 /* FIXME: Not multithread-safe. */
138 *autodetect_list_end = new_alias;
139 autodetect_list_end = &new_alias->next;
140 return 0;
142 else
144 errno = ENOMEM;
145 return -1;
149 /* Like mem_iconveha, except no handling of transliteration. */
150 static int
151 mem_iconveha_notranslit (const char *src, size_t srclen,
152 const char *from_codeset, const char *to_codeset,
153 enum iconv_ilseq_handler handler,
154 size_t *offsets,
155 char **resultp, size_t *lengthp)
157 int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
158 offsets, resultp, lengthp);
159 if (retval >= 0 || errno != EINVAL)
160 return retval;
161 else
163 struct autodetect_alias *alias;
165 /* Unsupported from_codeset or to_codeset. Check whether the caller
166 requested autodetection. */
167 for (alias = autodetect_list; alias != NULL; alias = alias->next)
168 if (strcmp (from_codeset, alias->name) == 0)
170 const char * const *encodings;
172 if (handler != iconveh_error)
174 /* First try all encodings without any forgiving. */
175 encodings = alias->encodings_to_try;
178 retval = mem_iconveha_notranslit (src, srclen,
179 *encodings, to_codeset,
180 iconveh_error, offsets,
181 resultp, lengthp);
182 if (!(retval < 0 && errno == EILSEQ))
183 return retval;
184 encodings++;
186 while (*encodings != NULL);
189 encodings = alias->encodings_to_try;
192 retval = mem_iconveha_notranslit (src, srclen,
193 *encodings, to_codeset,
194 handler, offsets,
195 resultp, lengthp);
196 if (!(retval < 0 && errno == EILSEQ))
197 return retval;
198 encodings++;
200 while (*encodings != NULL);
202 /* Return the last call's result. */
203 return -1;
206 /* It wasn't an autodetection name. */
207 errno = EINVAL;
208 return -1;
213 mem_iconveha (const char *src, size_t srclen,
214 const char *from_codeset, const char *to_codeset,
215 bool transliterate,
216 enum iconv_ilseq_handler handler,
217 size_t *offsets,
218 char **resultp, size_t *lengthp)
220 if (srclen == 0)
222 /* Nothing to convert. */
223 *lengthp = 0;
224 return 0;
227 /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
228 we want to use transliteration. */
229 #if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
230 && !defined __UCLIBC__) \
231 || _LIBICONV_VERSION >= 0x0105
232 if (transliterate)
234 int retval;
235 size_t len = strlen (to_codeset);
236 char *to_codeset_suffixed = (char *) malloca (len + 10 + 1);
237 memcpy (to_codeset_suffixed, to_codeset, len);
238 memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
240 retval = mem_iconveha_notranslit (src, srclen,
241 from_codeset, to_codeset_suffixed,
242 handler, offsets, resultp, lengthp);
244 freea (to_codeset_suffixed);
246 return retval;
248 else
249 #endif
250 return mem_iconveha_notranslit (src, srclen,
251 from_codeset, to_codeset,
252 handler, offsets, resultp, lengthp);
255 /* Like str_iconveha, except no handling of transliteration. */
256 static char *
257 str_iconveha_notranslit (const char *src,
258 const char *from_codeset, const char *to_codeset,
259 enum iconv_ilseq_handler handler)
261 char *result = str_iconveh (src, from_codeset, to_codeset, handler);
263 if (result != NULL || errno != EINVAL)
264 return result;
265 else
267 struct autodetect_alias *alias;
269 /* Unsupported from_codeset or to_codeset. Check whether the caller
270 requested autodetection. */
271 for (alias = autodetect_list; alias != NULL; alias = alias->next)
272 if (strcmp (from_codeset, alias->name) == 0)
274 const char * const *encodings;
276 if (handler != iconveh_error)
278 /* First try all encodings without any forgiving. */
279 encodings = alias->encodings_to_try;
282 result = str_iconveha_notranslit (src,
283 *encodings, to_codeset,
284 iconveh_error);
285 if (!(result == NULL && errno == EILSEQ))
286 return result;
287 encodings++;
289 while (*encodings != NULL);
292 encodings = alias->encodings_to_try;
295 result = str_iconveha_notranslit (src,
296 *encodings, to_codeset,
297 handler);
298 if (!(result == NULL && errno == EILSEQ))
299 return result;
300 encodings++;
302 while (*encodings != NULL);
304 /* Return the last call's result. */
305 return NULL;
308 /* It wasn't an autodetection name. */
309 errno = EINVAL;
310 return NULL;
314 char *
315 str_iconveha (const char *src,
316 const char *from_codeset, const char *to_codeset,
317 bool transliterate,
318 enum iconv_ilseq_handler handler)
320 if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
322 char *result = strdup (src);
324 if (result == NULL)
325 errno = ENOMEM;
326 return result;
329 /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
330 we want to use transliteration. */
331 #if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
332 && !defined __UCLIBC__) \
333 || _LIBICONV_VERSION >= 0x0105
334 if (transliterate)
336 char *result;
337 size_t len = strlen (to_codeset);
338 char *to_codeset_suffixed = (char *) malloca (len + 10 + 1);
339 memcpy (to_codeset_suffixed, to_codeset, len);
340 memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
342 result = str_iconveha_notranslit (src, from_codeset, to_codeset_suffixed,
343 handler);
345 freea (to_codeset_suffixed);
347 return result;
349 else
350 #endif
351 return str_iconveha_notranslit (src, from_codeset, to_codeset, handler);