maint.mk: Update system header list for #include syntax checks.
[gnulib.git] / tests / test-mbrlen-w32.c
blob4f1415dedb599f459735a3b1d974f9c8e7f1e4f4
1 /* Test of conversion of multibyte character to wide character.
2 Copyright (C) 2008-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include <wchar.h>
21 #include <errno.h>
22 #include <locale.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
27 #include "localcharset.h"
28 #include "macros.h"
30 #if defined _WIN32 && !defined __CYGWIN__
32 static int
33 test_one_locale (const char *name, int codepage)
35 mbstate_t state;
36 size_t ret;
38 # if 1
39 /* Portable code to set the locale. */
41 char name_with_codepage[1024];
43 sprintf (name_with_codepage, "%s.%d", name, codepage);
45 /* Set the locale. */
46 if (setlocale (LC_ALL, name_with_codepage) == NULL)
47 return 77;
49 # else
50 /* Hacky way to set a locale.codepage combination that setlocale() refuses
51 to set. */
53 /* Codepage of the current locale, set with setlocale().
54 Not necessarily the same as GetACP(). */
55 extern __declspec(dllimport) unsigned int __lc_codepage;
57 /* Set the locale. */
58 if (setlocale (LC_ALL, name) == NULL)
59 return 77;
61 /* Clobber the codepage and MB_CUR_MAX, both set by setlocale(). */
62 __lc_codepage = codepage;
63 switch (codepage)
65 case 1252:
66 case 1256:
67 MB_CUR_MAX = 1;
68 break;
69 case 932:
70 case 950:
71 case 936:
72 MB_CUR_MAX = 2;
73 break;
74 case 54936:
75 case 65001:
76 MB_CUR_MAX = 4;
77 break;
80 /* Test whether the codepage is really available. */
81 memset (&state, '\0', sizeof (mbstate_t));
82 if (mbrlen (" ", 1, &state) == (size_t)(-1))
83 return 77;
85 # endif
87 /* Test zero-length input. */
89 memset (&state, '\0', sizeof (mbstate_t));
90 ret = mbrlen ("x", 0, &state);
91 /* gnulib's implementation returns (size_t)(-2).
92 The AIX 5.1 implementation returns (size_t)(-1).
93 glibc's implementation returns 0. */
94 ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
95 ASSERT (mbsinit (&state));
98 /* Test NUL byte input. */
100 memset (&state, '\0', sizeof (mbstate_t));
101 ret = mbrlen ("", 1, &state);
102 ASSERT (ret == 0);
103 ASSERT (mbsinit (&state));
106 /* Test single-byte input. */
108 int c;
109 char buf[1];
111 memset (&state, '\0', sizeof (mbstate_t));
112 for (c = 0; c < 0x100; c++)
113 switch (c)
115 case '\t': case '\v': case '\f':
116 case ' ': case '!': case '"': case '#': case '%':
117 case '&': case '\'': case '(': case ')': case '*':
118 case '+': case ',': case '-': case '.': case '/':
119 case '0': case '1': case '2': case '3': case '4':
120 case '5': case '6': case '7': case '8': case '9':
121 case ':': case ';': case '<': case '=': case '>':
122 case '?':
123 case 'A': case 'B': case 'C': case 'D': case 'E':
124 case 'F': case 'G': case 'H': case 'I': case 'J':
125 case 'K': case 'L': case 'M': case 'N': case 'O':
126 case 'P': case 'Q': case 'R': case 'S': case 'T':
127 case 'U': case 'V': case 'W': case 'X': case 'Y':
128 case 'Z':
129 case '[': case '\\': case ']': case '^': case '_':
130 case 'a': case 'b': case 'c': case 'd': case 'e':
131 case 'f': case 'g': case 'h': case 'i': case 'j':
132 case 'k': case 'l': case 'm': case 'n': case 'o':
133 case 'p': case 'q': case 'r': case 's': case 't':
134 case 'u': case 'v': case 'w': case 'x': case 'y':
135 case 'z': case '{': case '|': case '}': case '~':
136 /* c is in the ISO C "basic character set". */
137 buf[0] = c;
138 ret = mbrlen (buf, 1, &state);
139 ASSERT (ret == 1);
140 ASSERT (mbsinit (&state));
141 break;
145 /* Test special calling convention, passing a NULL pointer. */
147 memset (&state, '\0', sizeof (mbstate_t));
148 ret = mbrlen (NULL, 5, &state);
149 ASSERT (ret == 0);
150 ASSERT (mbsinit (&state));
153 switch (codepage)
155 case 1252:
156 /* Locale encoding is CP1252, an extension of ISO-8859-1. */
158 char input[] = "B\374\337er"; /* "Büßer" */
159 memset (&state, '\0', sizeof (mbstate_t));
161 ret = mbrlen (input, 1, &state);
162 ASSERT (ret == 1);
163 ASSERT (mbsinit (&state));
164 input[0] = '\0';
166 ret = mbrlen (input + 1, 1, &state);
167 ASSERT (ret == 1);
168 ASSERT (mbsinit (&state));
169 input[1] = '\0';
171 ret = mbrlen (input + 2, 3, &state);
172 ASSERT (ret == 1);
173 ASSERT (mbsinit (&state));
174 input[2] = '\0';
176 ret = mbrlen (input + 3, 2, &state);
177 ASSERT (ret == 1);
178 ASSERT (mbsinit (&state));
179 input[3] = '\0';
181 ret = mbrlen (input + 4, 1, &state);
182 ASSERT (ret == 1);
183 ASSERT (mbsinit (&state));
185 return 0;
187 case 1256:
188 /* Locale encoding is CP1256, not the same as ISO-8859-6. */
190 char input[] = "x\302\341\346y"; /* "xآلوy" */
191 memset (&state, '\0', sizeof (mbstate_t));
193 ret = mbrlen (input, 1, &state);
194 ASSERT (ret == 1);
195 ASSERT (mbsinit (&state));
196 input[0] = '\0';
198 ret = mbrlen (input + 1, 1, &state);
199 ASSERT (ret == 1);
200 ASSERT (mbsinit (&state));
201 input[1] = '\0';
203 ret = mbrlen (input + 2, 3, &state);
204 ASSERT (ret == 1);
205 ASSERT (mbsinit (&state));
206 input[2] = '\0';
208 ret = mbrlen (input + 3, 2, &state);
209 ASSERT (ret == 1);
210 ASSERT (mbsinit (&state));
211 input[3] = '\0';
213 ret = mbrlen (input + 4, 1, &state);
214 ASSERT (ret == 1);
215 ASSERT (mbsinit (&state));
217 return 0;
219 case 65001:
220 /* Locale encoding is CP65001 = UTF-8. */
221 if (strcmp (locale_charset (), "UTF-8") != 0)
222 return 77;
224 char input[] = "B\303\274\303\237er"; /* "Büßer" */
225 memset (&state, '\0', sizeof (mbstate_t));
227 ret = mbrlen (input, 1, &state);
228 ASSERT (ret == 1);
229 ASSERT (mbsinit (&state));
230 input[0] = '\0';
232 ret = mbrlen (input + 1, 1, &state);
233 ASSERT (ret == (size_t)(-2));
234 ASSERT (!mbsinit (&state));
235 input[1] = '\0';
237 ret = mbrlen (input + 2, 5, &state);
238 ASSERT (ret == 1);
239 ASSERT (mbsinit (&state));
240 input[2] = '\0';
242 ret = mbrlen (input + 3, 4, &state);
243 ASSERT (ret == 2);
244 ASSERT (mbsinit (&state));
245 input[3] = '\0';
246 input[4] = '\0';
248 ret = mbrlen (input + 5, 2, &state);
249 ASSERT (ret == 1);
250 ASSERT (mbsinit (&state));
251 input[5] = '\0';
253 ret = mbrlen (input + 6, 1, &state);
254 ASSERT (ret == 1);
255 ASSERT (mbsinit (&state));
257 /* Test some invalid input. */
258 memset (&state, '\0', sizeof (mbstate_t));
259 ret = mbrlen ("\377", 1, &state); /* 0xFF */
260 ASSERT (ret == (size_t)-1);
261 ASSERT (errno == EILSEQ);
263 memset (&state, '\0', sizeof (mbstate_t));
264 ret = mbrlen ("\303\300", 2, &state); /* 0xC3 0xC0 */
265 ASSERT (ret == (size_t)-1);
266 ASSERT (errno == EILSEQ);
268 memset (&state, '\0', sizeof (mbstate_t));
269 ret = mbrlen ("\343\300", 2, &state); /* 0xE3 0xC0 */
270 ASSERT (ret == (size_t)-1);
271 ASSERT (errno == EILSEQ);
273 memset (&state, '\0', sizeof (mbstate_t));
274 ret = mbrlen ("\343\300\200", 3, &state); /* 0xE3 0xC0 0x80 */
275 ASSERT (ret == (size_t)-1);
276 ASSERT (errno == EILSEQ);
278 memset (&state, '\0', sizeof (mbstate_t));
279 ret = mbrlen ("\343\200\300", 3, &state); /* 0xE3 0x80 0xC0 */
280 ASSERT (ret == (size_t)-1);
281 ASSERT (errno == EILSEQ);
283 memset (&state, '\0', sizeof (mbstate_t));
284 ret = mbrlen ("\363\300", 2, &state); /* 0xF3 0xC0 */
285 ASSERT (ret == (size_t)-1);
286 ASSERT (errno == EILSEQ);
288 memset (&state, '\0', sizeof (mbstate_t));
289 ret = mbrlen ("\363\300\200\200", 4, &state); /* 0xF3 0xC0 0x80 0x80 */
290 ASSERT (ret == (size_t)-1);
291 ASSERT (errno == EILSEQ);
293 memset (&state, '\0', sizeof (mbstate_t));
294 ret = mbrlen ("\363\200\300", 3, &state); /* 0xF3 0x80 0xC0 */
295 ASSERT (ret == (size_t)-1);
296 ASSERT (errno == EILSEQ);
298 memset (&state, '\0', sizeof (mbstate_t));
299 ret = mbrlen ("\363\200\300\200", 4, &state); /* 0xF3 0x80 0xC0 0x80 */
300 ASSERT (ret == (size_t)-1);
301 ASSERT (errno == EILSEQ);
303 memset (&state, '\0', sizeof (mbstate_t));
304 ret = mbrlen ("\363\200\200\300", 4, &state); /* 0xF3 0x80 0x80 0xC0 */
305 ASSERT (ret == (size_t)-1);
306 ASSERT (errno == EILSEQ);
308 return 0;
310 case 932:
311 /* Locale encoding is CP932, similar to Shift_JIS. */
313 char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
314 memset (&state, '\0', sizeof (mbstate_t));
316 ret = mbrlen (input, 1, &state);
317 ASSERT (ret == 1);
318 ASSERT (mbsinit (&state));
319 input[0] = '\0';
321 ret = mbrlen (input + 1, 2, &state);
322 ASSERT (ret == 2);
323 ASSERT (mbsinit (&state));
324 input[1] = '\0';
325 input[2] = '\0';
327 ret = mbrlen (input + 3, 1, &state);
328 ASSERT (ret == (size_t)(-2));
329 ASSERT (!mbsinit (&state));
330 input[3] = '\0';
332 ret = mbrlen (input + 4, 4, &state);
333 ASSERT (ret == 1);
334 ASSERT (mbsinit (&state));
335 input[4] = '\0';
337 ret = mbrlen (input + 5, 3, &state);
338 ASSERT (ret == 2);
339 ASSERT (mbsinit (&state));
340 input[5] = '\0';
341 input[6] = '\0';
343 ret = mbrlen (input + 7, 1, &state);
344 ASSERT (ret == 1);
345 ASSERT (mbsinit (&state));
347 /* Test some invalid input. */
348 memset (&state, '\0', sizeof (mbstate_t));
349 ret = mbrlen ("\377", 1, &state); /* 0xFF */
350 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
352 memset (&state, '\0', sizeof (mbstate_t));
353 ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
354 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
356 return 0;
358 case 950:
359 /* Locale encoding is CP950, similar to Big5. */
361 char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
362 memset (&state, '\0', sizeof (mbstate_t));
364 ret = mbrlen (input, 1, &state);
365 ASSERT (ret == 1);
366 ASSERT (mbsinit (&state));
367 input[0] = '\0';
369 ret = mbrlen (input + 1, 2, &state);
370 ASSERT (ret == 2);
371 ASSERT (mbsinit (&state));
372 input[1] = '\0';
373 input[2] = '\0';
375 ret = mbrlen (input + 3, 1, &state);
376 ASSERT (ret == (size_t)(-2));
377 ASSERT (!mbsinit (&state));
378 input[3] = '\0';
380 ret = mbrlen (input + 4, 4, &state);
381 ASSERT (ret == 1);
382 ASSERT (mbsinit (&state));
383 input[4] = '\0';
385 ret = mbrlen (input + 5, 3, &state);
386 ASSERT (ret == 2);
387 ASSERT (mbsinit (&state));
388 input[5] = '\0';
389 input[6] = '\0';
391 ret = mbrlen (input + 7, 1, &state);
392 ASSERT (ret == 1);
393 ASSERT (mbsinit (&state));
395 /* Test some invalid input. */
396 memset (&state, '\0', sizeof (mbstate_t));
397 ret = mbrlen ("\377", 1, &state); /* 0xFF */
398 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
400 memset (&state, '\0', sizeof (mbstate_t));
401 ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
402 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
404 return 0;
406 case 936:
407 /* Locale encoding is CP936 = GBK, an extension of GB2312. */
409 char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
410 memset (&state, '\0', sizeof (mbstate_t));
412 ret = mbrlen (input, 1, &state);
413 ASSERT (ret == 1);
414 ASSERT (mbsinit (&state));
415 input[0] = '\0';
417 ret = mbrlen (input + 1, 2, &state);
418 ASSERT (ret == 2);
419 ASSERT (mbsinit (&state));
420 input[1] = '\0';
421 input[2] = '\0';
423 ret = mbrlen (input + 3, 1, &state);
424 ASSERT (ret == (size_t)(-2));
425 ASSERT (!mbsinit (&state));
426 input[3] = '\0';
428 ret = mbrlen (input + 4, 4, &state);
429 ASSERT (ret == 1);
430 ASSERT (mbsinit (&state));
431 input[4] = '\0';
433 ret = mbrlen (input + 5, 3, &state);
434 ASSERT (ret == 2);
435 ASSERT (mbsinit (&state));
436 input[5] = '\0';
437 input[6] = '\0';
439 ret = mbrlen (input + 7, 1, &state);
440 ASSERT (ret == 1);
441 ASSERT (mbsinit (&state));
443 /* Test some invalid input. */
444 memset (&state, '\0', sizeof (mbstate_t));
445 ret = mbrlen ("\377", 1, &state); /* 0xFF */
446 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == (size_t)-2);
448 memset (&state, '\0', sizeof (mbstate_t));
449 ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
450 ASSERT ((ret == (size_t)-1 && errno == EILSEQ) || ret == 2);
452 return 0;
454 case 54936:
455 /* Locale encoding is CP54936 = GB18030. */
456 if (strcmp (locale_charset (), "GB18030") != 0)
457 return 77;
459 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
460 memset (&state, '\0', sizeof (mbstate_t));
462 ret = mbrlen (input, 1, &state);
463 ASSERT (ret == 1);
464 ASSERT (mbsinit (&state));
465 input[0] = '\0';
467 ret = mbrlen (input + 1, 1, &state);
468 ASSERT (ret == (size_t)(-2));
469 ASSERT (!mbsinit (&state));
470 input[1] = '\0';
472 ret = mbrlen (input + 2, 7, &state);
473 ASSERT (ret == 1);
474 ASSERT (mbsinit (&state));
475 input[2] = '\0';
477 ret = mbrlen (input + 3, 6, &state);
478 ASSERT (ret == 4);
479 ASSERT (mbsinit (&state));
480 input[3] = '\0';
481 input[4] = '\0';
482 input[5] = '\0';
483 input[6] = '\0';
485 ret = mbrlen (input + 7, 2, &state);
486 ASSERT (ret == 1);
487 ASSERT (mbsinit (&state));
488 input[7] = '\0';
490 ret = mbrlen (input + 8, 1, &state);
491 ASSERT (ret == 1);
492 ASSERT (mbsinit (&state));
494 /* Test some invalid input. */
495 memset (&state, '\0', sizeof (mbstate_t));
496 ret = mbrlen ("\377", 1, &state); /* 0xFF */
497 ASSERT (ret == (size_t)-1);
498 ASSERT (errno == EILSEQ);
500 memset (&state, '\0', sizeof (mbstate_t));
501 ret = mbrlen ("\225\377", 2, &state); /* 0x95 0xFF */
502 ASSERT (ret == (size_t)-1);
503 ASSERT (errno == EILSEQ);
505 memset (&state, '\0', sizeof (mbstate_t));
506 ret = mbrlen ("\201\045", 2, &state); /* 0x81 0x25 */
507 ASSERT (ret == (size_t)-1);
508 ASSERT (errno == EILSEQ);
510 memset (&state, '\0', sizeof (mbstate_t));
511 ret = mbrlen ("\201\060\377", 3, &state); /* 0x81 0x30 0xFF */
512 ASSERT (ret == (size_t)-1);
513 ASSERT (errno == EILSEQ);
515 memset (&state, '\0', sizeof (mbstate_t));
516 ret = mbrlen ("\201\060\377\064", 4, &state); /* 0x81 0x30 0xFF 0x34 */
517 ASSERT (ret == (size_t)-1);
518 ASSERT (errno == EILSEQ);
520 memset (&state, '\0', sizeof (mbstate_t));
521 ret = mbrlen ("\201\060\211\072", 4, &state); /* 0x81 0x30 0x89 0x3A */
522 ASSERT (ret == (size_t)-1);
523 ASSERT (errno == EILSEQ);
525 return 0;
527 default:
528 return 1;
533 main (int argc, char *argv[])
535 int codepage = atoi (argv[argc - 1]);
536 int result;
537 int i;
539 result = 77;
540 for (i = 1; i < argc - 1; i++)
542 int ret = test_one_locale (argv[i], codepage);
544 if (ret != 77)
545 result = ret;
548 if (result == 77)
550 if (test_exit_status != EXIT_SUCCESS)
551 return test_exit_status;
552 fprintf (stderr, "Skipping test: found no locale with codepage %d\n",
553 codepage);
555 return (result ? result : test_exit_status);
558 #else
561 main (int argc, char *argv[])
563 fputs ("Skipping test: not a native Windows system\n", stderr);
564 return 77;
567 #endif