c32isxdigit: Add tests.
[gnulib.git] / lib / mbrtoc32.c
blobfacf28bc547b82024f5b5d2d500bf810d607f1ed
1 /* Convert multibyte character to 32-bit wide character.
2 Copyright (C) 2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2020. */
19 #include <config.h>
21 /* Specification. */
22 #include <uchar.h>
24 #include <errno.h>
25 #include <stdlib.h>
27 #ifndef FALLTHROUGH
28 # if __GNUC__ < 7
29 # define FALLTHROUGH ((void) 0)
30 # else
31 # define FALLTHROUGH __attribute__ ((__fallthrough__))
32 # endif
33 #endif
35 #if GNULIB_defined_mbstate_t /* AIX, IRIX */
36 /* Implement mbrtoc32() on top of mbtowc() for the non-UTF-8 locales
37 and directly for the UTF-8 locales. */
39 # if defined _WIN32 && !defined __CYGWIN__
41 # define WIN32_LEAN_AND_MEAN /* avoid including junk */
42 # include <windows.h>
44 # elif HAVE_PTHREAD_API
46 # include <pthread.h>
47 # if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
48 # include <threads.h>
49 # pragma weak thrd_exit
50 # define c11_threads_in_use() (thrd_exit != NULL)
51 # else
52 # define c11_threads_in_use() 0
53 # endif
55 # elif HAVE_THREADS_H
57 # include <threads.h>
59 # endif
61 # include "verify.h"
62 # include "lc-charset-dispatch.h"
63 # include "mbtowc-lock.h"
65 verify (sizeof (mbstate_t) >= 4);
66 static char internal_state[4];
68 size_t
69 mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
71 # define FITS_IN_CHAR_TYPE(wc) 1
72 # include "mbrtowc-impl.h"
75 #else /* glibc, macOS, FreeBSD, NetBSD, OpenBSD, HP-UX, Solaris, Cygwin, mingw, MSVC, Minix, Android */
77 /* Implement mbrtoc32() based on the original mbrtoc32() or on mbrtowc(). */
79 # include <wchar.h>
81 # include "localcharset.h"
82 # include "streq.h"
84 # if MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ
85 # include "hard-locale.h"
86 # include <locale.h>
87 # endif
89 static mbstate_t internal_state;
91 size_t
92 mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
93 # undef mbrtoc32
95 /* It's simpler to handle the case s == NULL upfront, than to worry about
96 this case later, before every test of pwc and n. */
97 if (s == NULL)
99 pwc = NULL;
100 s = "";
101 n = 1;
104 # if MBRTOC32_EMPTY_INPUT_BUG || _GL_LARGE_CHAR32_T
105 if (n == 0)
106 return (size_t) -2;
107 # endif
109 if (ps == NULL)
110 ps = &internal_state;
112 # if HAVE_WORKING_MBRTOC32
113 /* mbrtoc32() may produce different values for wc than mbrtowc(). Therefore
114 use mbrtoc32(). */
116 # if defined _WIN32 && !defined __CYGWIN__
117 char32_t wc;
118 size_t ret = mbrtoc32 (&wc, s, n, ps);
119 if (ret < (size_t) -2 && pwc != NULL)
120 *pwc = wc;
121 # else
122 size_t ret = mbrtoc32 (pwc, s, n, ps);
123 # endif
125 # if MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ
126 if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
128 if (pwc != NULL)
129 *pwc = (unsigned char) *s;
130 return 1;
132 # endif
134 return ret;
136 # elif _GL_LARGE_CHAR32_T
138 /* Special-case all encodings that may produce wide character values
139 > WCHAR_MAX. */
140 const char *encoding = locale_charset ();
141 if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
143 /* Special-case the UTF-8 encoding. Assume that the wide-character
144 encoding in a UTF-8 locale is UCS-2 or, equivalently, UTF-16. */
145 /* Here n > 0. */
146 char *pstate = (char *)ps;
147 size_t nstate = pstate[0];
148 char buf[4];
149 const char *p;
150 size_t m;
151 int res;
153 switch (nstate)
155 case 0:
156 p = s;
157 m = n;
158 break;
159 case 3:
160 buf[2] = pstate[3];
161 FALLTHROUGH;
162 case 2:
163 buf[1] = pstate[2];
164 FALLTHROUGH;
165 case 1:
166 buf[0] = pstate[1];
167 p = buf;
168 m = nstate;
169 buf[m++] = s[0];
170 if (n >= 2 && m < 4)
172 buf[m++] = s[1];
173 if (n >= 3 && m < 4)
174 buf[m++] = s[2];
176 break;
177 default:
178 errno = EINVAL;
179 return (size_t)(-1);
182 /* Here m > 0. */
185 # define FITS_IN_CHAR_TYPE(wc) 1
186 # include "mbrtowc-impl-utf8.h"
189 success:
190 if (nstate >= (res > 0 ? res : 1))
191 abort ();
192 res -= nstate;
193 /* Set *ps to the initial state. */
194 # if defined _WIN32 && !defined __CYGWIN__
195 /* Native Windows. */
196 /* MSVC defines 'mbstate_t' as an 8-byte struct; the first 4 bytes matter.
197 On mingw, 'mbstate_t' is sometimes defined as 'int', sometimes defined
198 as an 8-byte struct, of which the first 4 bytes matter. */
199 *(unsigned int *)pstate = 0;
200 # elif defined __CYGWIN__
201 /* Cygwin defines 'mbstate_t' as an 8-byte struct; the first 4 bytes
202 matter. */
203 ps->__count = 0;
204 # else
205 pstate[0] = 0;
206 # endif
207 return res;
209 incomplete:
211 size_t k = nstate;
212 /* Here 0 <= k < m < 4. */
213 pstate[++k] = s[0];
214 if (k < m)
216 pstate[++k] = s[1];
217 if (k < m)
218 pstate[++k] = s[2];
220 if (k != m)
221 abort ();
223 pstate[0] = m;
224 return (size_t)(-2);
226 invalid:
227 errno = EILSEQ;
228 /* The conversion state is undefined, says POSIX. */
229 return (size_t)(-1);
231 else
233 wchar_t wc;
234 size_t ret = mbrtowc (&wc, s, n, ps);
235 if (ret < (size_t) -2 && pwc != NULL)
236 *pwc = wc;
237 return ret;
240 # else
242 /* char32_t and wchar_t are equivalent. Use mbrtowc(). */
243 wchar_t wc;
244 size_t ret = mbrtowc (&wc, s, n, ps);
245 if (ret < (size_t) -2 && pwc != NULL)
246 *pwc = wc;
247 return ret;
249 # endif
252 #endif