uchar C++ tests: Fix build error on FreeBSD 12.
[gnulib.git] / lib / mbrtowc-impl.h
blobc97043964f45498609ae4f1986f595a1410e02b3
1 /* Convert multibyte character to wide character.
2 Copyright (C) 1999-2002, 2005-2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008. */
19 /* This file contains the body of the mbrtowc and mbrtoc32 functions,
20 when GNULIB_defined_mbstate_t is defined. */
22 char *pstate = (char *)ps;
24 if (s == NULL)
26 pwc = NULL;
27 s = "";
28 n = 1;
31 if (n == 0)
32 return (size_t)(-2);
34 /* Here n > 0. */
36 if (pstate == NULL)
37 pstate = internal_state;
40 size_t nstate = pstate[0];
41 char buf[4];
42 const char *p;
43 size_t m;
44 enc_t enc;
45 int res;
47 switch (nstate)
49 case 0:
50 p = s;
51 m = n;
52 break;
53 case 3:
54 buf[2] = pstate[3];
55 FALLTHROUGH;
56 case 2:
57 buf[1] = pstate[2];
58 FALLTHROUGH;
59 case 1:
60 buf[0] = pstate[1];
61 p = buf;
62 m = nstate;
63 buf[m++] = s[0];
64 if (n >= 2 && m < 4)
66 buf[m++] = s[1];
67 if (n >= 3 && m < 4)
68 buf[m++] = s[2];
70 break;
71 default:
72 errno = EINVAL;
73 return (size_t)(-1);
76 /* Here m > 0. */
78 enc = locale_encoding_classification ();
80 if (enc == enc_utf8) /* UTF-8 */
82 /* Achieve
83 - multi-thread safety and
84 - the ability to produce wide character values > WCHAR_MAX
85 by not calling mbtowc() at all. */
86 #include "mbrtowc-impl-utf8.h"
88 else
90 /* The hidden internal state of mbtowc would make this function not
91 multi-thread safe. Achieve multi-thread safety through a lock. */
92 wchar_t wc;
93 res = mbtowc_with_lock (&wc, p, m);
95 if (res >= 0)
97 if ((wc == 0) != (res == 0))
98 abort ();
99 if (pwc != NULL)
100 *pwc = wc;
101 goto success;
104 /* mbtowc does not distinguish between invalid and incomplete multibyte
105 sequences. But mbrtowc needs to make this distinction.
106 There are two possible approaches:
107 - Use iconv() and its return value.
108 - Use built-in knowledge about the possible encodings.
109 Given the low quality of implementation of iconv() on the systems
110 that lack mbrtowc(), we use the second approach.
111 The possible encodings are:
112 - 8-bit encodings,
113 - EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
114 - UTF-8 (already handled above).
115 Use specialized code for each. */
116 if (m >= 4 || m >= MB_CUR_MAX)
117 goto invalid;
118 /* Here MB_CUR_MAX > 1 and 0 < m < 4. */
119 switch (enc)
121 /* As a reference for this code, you can use the GNU libiconv
122 implementation. Look for uses of the RET_TOOFEW macro. */
124 case enc_eucjp: /* EUC-JP */
126 if (m == 1)
128 unsigned char c = (unsigned char) p[0];
130 if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
131 goto incomplete;
133 if (m == 2)
135 unsigned char c = (unsigned char) p[0];
137 if (c == 0x8f)
139 unsigned char c2 = (unsigned char) p[1];
141 if (c2 >= 0xa1 && c2 < 0xff)
142 goto incomplete;
145 goto invalid;
148 case enc_94: /* EUC-KR, GB2312, BIG5 */
150 if (m == 1)
152 unsigned char c = (unsigned char) p[0];
154 if (c >= 0xa1 && c < 0xff)
155 goto incomplete;
157 goto invalid;
160 case enc_euctw: /* EUC-TW */
162 if (m == 1)
164 unsigned char c = (unsigned char) p[0];
166 if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
167 goto incomplete;
169 else /* m == 2 || m == 3 */
171 unsigned char c = (unsigned char) p[0];
173 if (c == 0x8e)
174 goto incomplete;
176 goto invalid;
179 case enc_gb18030: /* GB18030 */
181 if (m == 1)
183 unsigned char c = (unsigned char) p[0];
185 if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
186 goto incomplete;
188 else /* m == 2 || m == 3 */
190 unsigned char c = (unsigned char) p[0];
192 if (c >= 0x90 && c <= 0xe3)
194 unsigned char c2 = (unsigned char) p[1];
196 if (c2 >= 0x30 && c2 <= 0x39)
198 if (m == 2)
199 goto incomplete;
200 else /* m == 3 */
202 unsigned char c3 = (unsigned char) p[2];
204 if (c3 >= 0x81 && c3 <= 0xfe)
205 goto incomplete;
210 goto invalid;
213 case enc_sjis: /* SJIS */
215 if (m == 1)
217 unsigned char c = (unsigned char) p[0];
219 if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
220 || (c >= 0xf0 && c <= 0xf9))
221 goto incomplete;
223 goto invalid;
226 default:
227 /* An unknown multibyte encoding. */
228 goto incomplete;
232 success:
233 /* res >= 0 is the corrected return value of
234 mbtowc_with_lock (&wc, p, m). */
235 if (nstate >= (res > 0 ? res : 1))
236 abort ();
237 res -= nstate;
238 pstate[0] = 0;
239 return res;
241 incomplete:
243 size_t k = nstate;
244 /* Here 0 <= k < m < 4. */
245 pstate[++k] = s[0];
246 if (k < m)
248 pstate[++k] = s[1];
249 if (k < m)
250 pstate[++k] = s[2];
252 if (k != m)
253 abort ();
255 pstate[0] = m;
256 return (size_t)(-2);
258 invalid:
259 errno = EILSEQ;
260 /* The conversion state is undefined, says POSIX. */
261 return (size_t)(-1);