malloc-h: New module.
[gnulib.git] / lib / mbrtoc32.c
blob04f3dbd2ace1fa49c82c453ea686a43e3c73cd8c
1 /* Convert multibyte character to 32-bit wide character.
2 Copyright (C) 2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2020. */
19 #include <config.h>
21 /* Specification. */
22 #include <uchar.h>
24 #include "attribute.h"
26 #include <errno.h>
27 #include <stdlib.h>
29 #if GNULIB_defined_mbstate_t /* AIX, IRIX */
30 /* Implement mbrtoc32() on top of mbtowc() for the non-UTF-8 locales
31 and directly for the UTF-8 locales. */
33 # if defined _WIN32 && !defined __CYGWIN__
35 # define WIN32_LEAN_AND_MEAN /* avoid including junk */
36 # include <windows.h>
38 # elif HAVE_PTHREAD_API
40 # include <pthread.h>
41 # if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
42 # include <threads.h>
43 # pragma weak thrd_exit
44 # define c11_threads_in_use() (thrd_exit != NULL)
45 # else
46 # define c11_threads_in_use() 0
47 # endif
49 # elif HAVE_THREADS_H
51 # include <threads.h>
53 # endif
55 # include "verify.h"
56 # include "lc-charset-dispatch.h"
57 # include "mbtowc-lock.h"
59 verify (sizeof (mbstate_t) >= 4);
60 static char internal_state[4];
62 size_t
63 mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
65 # define FITS_IN_CHAR_TYPE(wc) 1
66 # include "mbrtowc-impl.h"
69 #else /* glibc, macOS, FreeBSD, NetBSD, OpenBSD, HP-UX, Solaris, Cygwin, mingw, MSVC, Minix, Android */
71 /* Implement mbrtoc32() based on the original mbrtoc32() or on mbrtowc(). */
73 # include <wchar.h>
75 # include "localcharset.h"
76 # include "streq.h"
78 # if MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ
79 # include "hard-locale.h"
80 # include <locale.h>
81 # endif
83 static mbstate_t internal_state;
85 size_t
86 mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
87 # undef mbrtoc32
89 /* It's simpler to handle the case s == NULL upfront, than to worry about
90 this case later, before every test of pwc and n. */
91 if (s == NULL)
93 pwc = NULL;
94 s = "";
95 n = 1;
98 # if MBRTOC32_EMPTY_INPUT_BUG || _GL_LARGE_CHAR32_T
99 if (n == 0)
100 return (size_t) -2;
101 # endif
103 if (ps == NULL)
104 ps = &internal_state;
106 # if HAVE_WORKING_MBRTOC32
107 /* mbrtoc32() may produce different values for wc than mbrtowc(). Therefore
108 use mbrtoc32(). */
110 # if defined _WIN32 && !defined __CYGWIN__
111 char32_t wc;
112 size_t ret = mbrtoc32 (&wc, s, n, ps);
113 if (ret < (size_t) -2 && pwc != NULL)
114 *pwc = wc;
115 # else
116 size_t ret = mbrtoc32 (pwc, s, n, ps);
117 # endif
119 # if MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ
120 if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
122 if (pwc != NULL)
123 *pwc = (unsigned char) *s;
124 return 1;
126 # endif
128 return ret;
130 # elif _GL_LARGE_CHAR32_T
132 /* Special-case all encodings that may produce wide character values
133 > WCHAR_MAX. */
134 const char *encoding = locale_charset ();
135 if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
137 /* Special-case the UTF-8 encoding. Assume that the wide-character
138 encoding in a UTF-8 locale is UCS-2 or, equivalently, UTF-16. */
139 /* Here n > 0. */
140 char *pstate = (char *)ps;
141 size_t nstate = pstate[0];
142 char buf[4];
143 const char *p;
144 size_t m;
145 int res;
147 switch (nstate)
149 case 0:
150 p = s;
151 m = n;
152 break;
153 case 3:
154 buf[2] = pstate[3];
155 FALLTHROUGH;
156 case 2:
157 buf[1] = pstate[2];
158 FALLTHROUGH;
159 case 1:
160 buf[0] = pstate[1];
161 p = buf;
162 m = nstate;
163 buf[m++] = s[0];
164 if (n >= 2 && m < 4)
166 buf[m++] = s[1];
167 if (n >= 3 && m < 4)
168 buf[m++] = s[2];
170 break;
171 default:
172 errno = EINVAL;
173 return (size_t)(-1);
176 /* Here m > 0. */
179 # define FITS_IN_CHAR_TYPE(wc) 1
180 # include "mbrtowc-impl-utf8.h"
183 success:
184 if (nstate >= (res > 0 ? res : 1))
185 abort ();
186 res -= nstate;
187 /* Set *ps to the initial state. */
188 # if defined _WIN32 && !defined __CYGWIN__
189 /* Native Windows. */
190 /* MSVC defines 'mbstate_t' as an 8-byte struct; the first 4 bytes matter.
191 On mingw, 'mbstate_t' is sometimes defined as 'int', sometimes defined
192 as an 8-byte struct, of which the first 4 bytes matter. */
193 *(unsigned int *)pstate = 0;
194 # elif defined __CYGWIN__
195 /* Cygwin defines 'mbstate_t' as an 8-byte struct; the first 4 bytes
196 matter. */
197 ps->__count = 0;
198 # else
199 pstate[0] = 0;
200 # endif
201 return res;
203 incomplete:
205 size_t k = nstate;
206 /* Here 0 <= k < m < 4. */
207 pstate[++k] = s[0];
208 if (k < m)
210 pstate[++k] = s[1];
211 if (k < m)
212 pstate[++k] = s[2];
214 if (k != m)
215 abort ();
217 pstate[0] = m;
218 return (size_t)(-2);
220 invalid:
221 errno = EILSEQ;
222 /* The conversion state is undefined, says POSIX. */
223 return (size_t)(-1);
225 else
227 wchar_t wc;
228 size_t ret = mbrtowc (&wc, s, n, ps);
229 if (ret < (size_t) -2 && pwc != NULL)
230 *pwc = wc;
231 return ret;
234 # else
236 /* char32_t and wchar_t are equivalent. Use mbrtowc(). */
237 wchar_t wc;
238 size_t ret = mbrtowc (&wc, s, n, ps);
239 if (ret < (size_t) -2 && pwc != NULL)
240 *pwc = wc;
241 return ret;
243 # endif
246 #endif