maint.mk: Update system header list for #include syntax checks.
[gnulib.git] / tests / test-mbrlen.c
blob149b7d8dd346c69e5c504d03c0c84e57b0df21aa
1 /* Test of conversion of multibyte character to wide character.
2 Copyright (C) 2008-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2023. */
19 #include <config.h>
21 #include <wchar.h>
23 #include "signature.h"
24 SIGNATURE_CHECK (mbrlen, size_t, (char const *, size_t, mbstate_t *));
26 #include <locale.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
31 #include "macros.h"
33 int
34 main (int argc, char *argv[])
36 mbstate_t state;
37 size_t ret;
39 /* configure should already have checked that the locale is supported. */
40 if (setlocale (LC_ALL, "") == NULL)
41 return 1;
43 /* Test zero-length input. */
45 memset (&state, '\0', sizeof (mbstate_t));
46 ret = mbrlen ("x", 0, &state);
47 ASSERT (ret == (size_t)(-2));
48 ASSERT (mbsinit (&state));
51 /* Test NUL byte input. */
53 memset (&state, '\0', sizeof (mbstate_t));
54 ret = mbrlen ("", 1, &state);
55 ASSERT (ret == 0);
56 ASSERT (mbsinit (&state));
59 /* Test single-byte input. */
61 int c;
62 char buf[1];
64 memset (&state, '\0', sizeof (mbstate_t));
65 for (c = 0; c < 0x100; c++)
66 switch (c)
68 case '\t': case '\v': case '\f':
69 case ' ': case '!': case '"': case '#': case '%':
70 case '&': case '\'': case '(': case ')': case '*':
71 case '+': case ',': case '-': case '.': case '/':
72 case '0': case '1': case '2': case '3': case '4':
73 case '5': case '6': case '7': case '8': case '9':
74 case ':': case ';': case '<': case '=': case '>':
75 case '?':
76 case 'A': case 'B': case 'C': case 'D': case 'E':
77 case 'F': case 'G': case 'H': case 'I': case 'J':
78 case 'K': case 'L': case 'M': case 'N': case 'O':
79 case 'P': case 'Q': case 'R': case 'S': case 'T':
80 case 'U': case 'V': case 'W': case 'X': case 'Y':
81 case 'Z':
82 case '[': case '\\': case ']': case '^': case '_':
83 case 'a': case 'b': case 'c': case 'd': case 'e':
84 case 'f': case 'g': case 'h': case 'i': case 'j':
85 case 'k': case 'l': case 'm': case 'n': case 'o':
86 case 'p': case 'q': case 'r': case 's': case 't':
87 case 'u': case 'v': case 'w': case 'x': case 'y':
88 case 'z': case '{': case '|': case '}': case '~':
89 /* c is in the ISO C "basic character set". */
90 ASSERT (c < 0x80);
91 /* c is an ASCII character. */
92 buf[0] = c;
94 ret = mbrlen (buf, 1, &state);
95 ASSERT (ret == 1);
96 ASSERT (mbsinit (&state));
98 break;
99 default:
100 break;
104 /* Test special calling convention, passing a NULL pointer. */
106 memset (&state, '\0', sizeof (mbstate_t));
107 ret = mbrlen (NULL, 5, &state);
108 ASSERT (ret == 0);
109 ASSERT (mbsinit (&state));
112 #ifdef __ANDROID__
113 /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
114 "C" locale. Furthermore, when you attempt to set the "C" or "POSIX"
115 locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
116 that is, effectively the "C.UTF-8" locale. */
117 if (argc > 1 && strcmp (argv[1], "1") == 0 && MB_CUR_MAX > 1)
118 argv[1] = "3";
119 #endif
121 if (argc > 1)
122 switch (argv[1][0])
124 case '1':
125 /* C or POSIX locale. */
127 int c;
128 char buf[1];
130 memset (&state, '\0', sizeof (mbstate_t));
131 for (c = 0; c < 0x100; c++)
132 if (c != 0)
134 /* We are testing all nonnull bytes. */
135 buf[0] = c;
137 ret = mbrlen (buf, 1, &state);
138 /* POSIX:2018 says: "In the POSIX locale an [EILSEQ] error
139 cannot occur since all byte values are valid characters." */
140 ASSERT (ret == 1);
141 ASSERT (mbsinit (&state));
144 return test_exit_status;
146 case '2':
147 /* Locale encoding is ISO-8859-1 or ISO-8859-15. */
149 char input[] = "B\374\337er"; /* "Büßer" */
150 memset (&state, '\0', sizeof (mbstate_t));
152 ret = mbrlen (input, 1, &state);
153 ASSERT (ret == 1);
154 ASSERT (mbsinit (&state));
155 input[0] = '\0';
157 ret = mbrlen (input + 1, 1, &state);
158 ASSERT (ret == 1);
159 ASSERT (mbsinit (&state));
160 input[1] = '\0';
162 ret = mbrlen (input + 2, 3, &state);
163 ASSERT (ret == 1);
164 ASSERT (mbsinit (&state));
165 input[2] = '\0';
167 ret = mbrlen (input + 3, 2, &state);
168 ASSERT (ret == 1);
169 ASSERT (mbsinit (&state));
170 input[3] = '\0';
172 ret = mbrlen (input + 4, 1, &state);
173 ASSERT (ret == 1);
174 ASSERT (mbsinit (&state));
176 return test_exit_status;
178 case '3':
179 /* Locale encoding is UTF-8. */
181 char input[] = "B\303\274\303\237er"; /* "Büßer" */
182 memset (&state, '\0', sizeof (mbstate_t));
184 ret = mbrlen (input, 1, &state);
185 ASSERT (ret == 1);
186 ASSERT (mbsinit (&state));
187 input[0] = '\0';
189 ret = mbrlen (input + 1, 1, &state);
190 ASSERT (ret == (size_t)(-2));
191 ASSERT (!mbsinit (&state));
192 input[1] = '\0';
194 ret = mbrlen (input + 2, 5, &state);
195 ASSERT (ret == 1);
196 ASSERT (mbsinit (&state));
197 input[2] = '\0';
199 ret = mbrlen (input + 3, 4, &state);
200 ASSERT (ret == 2);
201 ASSERT (mbsinit (&state));
202 input[3] = '\0';
203 input[4] = '\0';
205 ret = mbrlen (input + 5, 2, &state);
206 ASSERT (ret == 1);
207 ASSERT (mbsinit (&state));
208 input[5] = '\0';
210 ret = mbrlen (input + 6, 1, &state);
211 ASSERT (ret == 1);
212 ASSERT (mbsinit (&state));
214 return test_exit_status;
216 case '4':
217 /* Locale encoding is EUC-JP. */
219 char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
220 memset (&state, '\0', sizeof (mbstate_t));
222 ret = mbrlen (input, 1, &state);
223 ASSERT (ret == 1);
224 ASSERT (mbsinit (&state));
225 input[0] = '\0';
227 ret = mbrlen (input + 1, 2, &state);
228 ASSERT (ret == 2);
229 ASSERT (mbsinit (&state));
230 input[1] = '\0';
231 input[2] = '\0';
233 ret = mbrlen (input + 3, 1, &state);
234 ASSERT (ret == (size_t)(-2));
235 ASSERT (!mbsinit (&state));
236 input[3] = '\0';
238 ret = mbrlen (input + 4, 4, &state);
239 ASSERT (ret == 1);
240 ASSERT (mbsinit (&state));
241 input[4] = '\0';
243 ret = mbrlen (input + 5, 3, &state);
244 ASSERT (ret == 2);
245 ASSERT (mbsinit (&state));
246 input[5] = '\0';
247 input[6] = '\0';
249 ret = mbrlen (input + 7, 1, &state);
250 ASSERT (ret == 1);
251 ASSERT (mbsinit (&state));
253 return test_exit_status;
255 case '5':
256 /* Locale encoding is GB18030. */
258 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
259 memset (&state, '\0', sizeof (mbstate_t));
261 ret = mbrlen (input, 1, &state);
262 ASSERT (ret == 1);
263 ASSERT (mbsinit (&state));
264 input[0] = '\0';
266 ret = mbrlen (input + 1, 1, &state);
267 ASSERT (ret == (size_t)(-2));
268 ASSERT (!mbsinit (&state));
269 input[1] = '\0';
271 ret = mbrlen (input + 2, 7, &state);
272 ASSERT (ret == 1);
273 ASSERT (mbsinit (&state));
274 input[2] = '\0';
276 ret = mbrlen (input + 3, 6, &state);
277 ASSERT (ret == 4);
278 ASSERT (mbsinit (&state));
279 input[3] = '\0';
280 input[4] = '\0';
281 input[5] = '\0';
282 input[6] = '\0';
284 ret = mbrlen (input + 7, 2, &state);
285 ASSERT (ret == 1);
286 ASSERT (mbsinit (&state));
287 input[7] = '\0';
289 ret = mbrlen (input + 8, 1, &state);
290 ASSERT (ret == 1);
291 ASSERT (mbsinit (&state));
293 return test_exit_status;
296 return 1;