exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / striconveha.c
blob08008d8bdc94829e7e09ff81a802f1ff4555f233
1 /* Character set conversion with error handling and autodetection.
2 Copyright (C) 2002, 2005, 2007, 2009-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible.
5 This file is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation; either version 2.1 of the
8 License, or (at your option) any later version.
10 This file is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 #include <config.h>
20 /* Specification. */
21 #include "striconveha.h"
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
27 #include "malloca.h"
28 #include "c-strcase.h"
29 #include "striconveh.h"
31 #define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
34 /* Autodetection list. */
36 struct autodetect_alias
38 struct autodetect_alias *next;
39 const char *name;
40 const char * const *encodings_to_try;
43 static const char * const autodetect_utf8_try[] =
45 /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
46 be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
47 "UTF-8", "ISO-8859-1",
48 NULL
50 static const char * const autodetect_jp_try[] =
52 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
53 it will fail.
54 Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
55 is unavoidable. People will condemn SHIFT_JIS.
56 If we tried SHIFT_JIS first, then some short EUC-JP inputs would
57 come out wrong, and people would condemn EUC-JP and Unix, which
58 would not be good.
59 Finally try SHIFT_JIS. */
60 "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS",
61 NULL
63 static const char * const autodetect_kr_try[] =
65 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
66 it will fail.
67 Finally try EUC-KR. */
68 "ISO-2022-KR", "EUC-KR",
69 NULL
72 static struct autodetect_alias autodetect_predefined[] =
74 { &autodetect_predefined[1], "autodetect_utf8", autodetect_utf8_try },
75 { &autodetect_predefined[2], "autodetect_jp", autodetect_jp_try },
76 { NULL, "autodetect_kr", autodetect_kr_try }
79 static struct autodetect_alias *autodetect_list = &autodetect_predefined[0];
80 static struct autodetect_alias **autodetect_list_end =
81 &autodetect_predefined[SIZEOF(autodetect_predefined)-1].next;
83 int
84 uniconv_register_autodetect (const char *name,
85 const char * const *try_in_order)
87 size_t namelen;
88 size_t listlen;
89 size_t memneed;
90 size_t i;
92 /* The TRY_IN_ORDER list must not be empty. */
93 if (try_in_order[0] == NULL)
95 errno = EINVAL;
96 return -1;
99 /* We must deep-copy NAME and TRY_IN_ORDER, because they may be allocated
100 with dynamic extent. */
101 namelen = strlen (name) + 1;
102 memneed = sizeof (struct autodetect_alias) + namelen + sizeof (char *);
103 for (i = 0; try_in_order[i] != NULL; i++)
104 memneed += sizeof (char *) + strlen (try_in_order[i]) + 1;
105 listlen = i;
107 void *memory = malloc (memneed);
108 if (memory != NULL)
110 struct autodetect_alias *new_alias = memory;
111 memory = new_alias + 1;
113 char const **new_try_in_order = memory;
114 memory = new_try_in_order + listlen + 1;
116 char *new_name = memcpy (memory, name, namelen);
117 memory = new_name + namelen;
119 for (i = 0; i < listlen; i++)
121 size_t len = strlen (try_in_order[i]) + 1;
122 char *copy = memcpy (memory, try_in_order[i], len);
123 new_try_in_order[i] = copy;
124 memory = copy + len;
126 new_try_in_order[i] = NULL;
128 /* Now insert the new alias. */
129 new_alias->name = new_name;
130 new_alias->encodings_to_try = new_try_in_order;
131 new_alias->next = NULL;
132 /* FIXME: Not multithread-safe. */
133 *autodetect_list_end = new_alias;
134 autodetect_list_end = &new_alias->next;
135 return 0;
137 else
139 errno = ENOMEM;
140 return -1;
144 /* Like mem_iconveha, except no handling of transliteration. */
145 static int
146 mem_iconveha_notranslit (const char *src, size_t srclen,
147 const char *from_codeset, const char *to_codeset,
148 enum iconv_ilseq_handler handler,
149 size_t *offsets,
150 char **resultp, size_t *lengthp)
152 int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
153 offsets, resultp, lengthp);
154 if (retval >= 0 || errno != EINVAL)
155 return retval;
156 else
158 struct autodetect_alias *alias;
160 /* Unsupported from_codeset or to_codeset. Check whether the caller
161 requested autodetection. */
162 for (alias = autodetect_list; alias != NULL; alias = alias->next)
163 if (strcmp (from_codeset, alias->name) == 0)
165 const char * const *encodings;
167 if (handler != iconveh_error)
169 /* First try all encodings without any forgiving. */
170 encodings = alias->encodings_to_try;
173 retval = mem_iconveha_notranslit (src, srclen,
174 *encodings, to_codeset,
175 iconveh_error, offsets,
176 resultp, lengthp);
177 if (!(retval < 0 && errno == EILSEQ))
178 return retval;
179 encodings++;
181 while (*encodings != NULL);
184 encodings = alias->encodings_to_try;
187 retval = mem_iconveha_notranslit (src, srclen,
188 *encodings, to_codeset,
189 handler, offsets,
190 resultp, lengthp);
191 if (!(retval < 0 && errno == EILSEQ))
192 return retval;
193 encodings++;
195 while (*encodings != NULL);
197 /* Return the last call's result. */
198 return -1;
201 /* It wasn't an autodetection name. */
202 errno = EINVAL;
203 return -1;
208 mem_iconveha (const char *src, size_t srclen,
209 const char *from_codeset, const char *to_codeset,
210 bool transliterate,
211 enum iconv_ilseq_handler handler,
212 size_t *offsets,
213 char **resultp, size_t *lengthp)
215 if (srclen == 0)
217 /* Nothing to convert. */
218 *lengthp = 0;
219 return 0;
222 /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5 or Citrus/FreeBSD/macOS
223 iconv, we want to use transliteration. */
224 #if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
225 && !defined __UCLIBC__) \
226 || _LIBICONV_VERSION >= 0x0105 \
227 || defined ICONV_SET_TRANSLITERATE
228 if (transliterate)
230 int retval;
231 size_t len = strlen (to_codeset);
232 char *to_codeset_suffixed = (char *) malloca (len + 10 + 1);
233 if (to_codeset_suffixed == NULL)
235 errno = ENOMEM;
236 return -1;
238 memcpy (to_codeset_suffixed, to_codeset, len);
239 memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
241 retval = mem_iconveha_notranslit (src, srclen,
242 from_codeset, to_codeset_suffixed,
243 handler, offsets, resultp, lengthp);
245 freea (to_codeset_suffixed);
247 return retval;
249 else
250 #endif
251 return mem_iconveha_notranslit (src, srclen,
252 from_codeset, to_codeset,
253 handler, offsets, resultp, lengthp);
256 /* Like str_iconveha, except no handling of transliteration. */
257 static char *
258 str_iconveha_notranslit (const char *src,
259 const char *from_codeset, const char *to_codeset,
260 enum iconv_ilseq_handler handler)
262 char *result = str_iconveh (src, from_codeset, to_codeset, handler);
264 if (result != NULL || errno != EINVAL)
265 return result;
266 else
268 struct autodetect_alias *alias;
270 /* Unsupported from_codeset or to_codeset. Check whether the caller
271 requested autodetection. */
272 for (alias = autodetect_list; alias != NULL; alias = alias->next)
273 if (strcmp (from_codeset, alias->name) == 0)
275 const char * const *encodings;
277 if (handler != iconveh_error)
279 /* First try all encodings without any forgiving. */
280 encodings = alias->encodings_to_try;
283 result = str_iconveha_notranslit (src,
284 *encodings, to_codeset,
285 iconveh_error);
286 if (!(result == NULL && errno == EILSEQ))
287 return result;
288 encodings++;
290 while (*encodings != NULL);
293 encodings = alias->encodings_to_try;
296 result = str_iconveha_notranslit (src,
297 *encodings, to_codeset,
298 handler);
299 if (!(result == NULL && errno == EILSEQ))
300 return result;
301 encodings++;
303 while (*encodings != NULL);
305 /* Return the last call's result. */
306 return NULL;
309 /* It wasn't an autodetection name. */
310 errno = EINVAL;
311 return NULL;
315 char *
316 str_iconveha (const char *src,
317 const char *from_codeset, const char *to_codeset,
318 bool transliterate,
319 enum iconv_ilseq_handler handler)
321 if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
323 char *result = strdup (src);
325 if (result == NULL)
326 errno = ENOMEM;
327 return result;
330 /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5 or Citrus/FreeBSD/macOS
331 iconv, we want to use transliteration. */
332 #if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
333 && !defined __UCLIBC__) \
334 || _LIBICONV_VERSION >= 0x0105 \
335 || defined ICONV_SET_TRANSLITERATE
336 if (transliterate)
338 char *result;
339 size_t len = strlen (to_codeset);
340 char *to_codeset_suffixed = (char *) malloca (len + 10 + 1);
341 if (to_codeset_suffixed == NULL)
343 errno = ENOMEM;
344 return NULL;
346 memcpy (to_codeset_suffixed, to_codeset, len);
347 memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
349 result = str_iconveha_notranslit (src, from_codeset, to_codeset_suffixed,
350 handler);
352 freea (to_codeset_suffixed);
354 return result;
356 else
357 #endif
358 return str_iconveha_notranslit (src, from_codeset, to_codeset, handler);