1 /* Test of conversion of multibyte character to wide character.
2 Copyright (C) 2008-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
27 #include "localcharset.h"
30 #if defined _WIN32 && !defined __CYGWIN__
33 test_one_locale (const char *name
, int codepage
)
39 /* Portable code to set the locale. */
41 char name_with_codepage
[1024];
43 sprintf (name_with_codepage
, "%s.%d", name
, codepage
);
46 if (setlocale (LC_ALL
, name_with_codepage
) == NULL
)
50 /* Hacky way to set a locale.codepage combination that setlocale() refuses
53 /* Codepage of the current locale, set with setlocale().
54 Not necessarily the same as GetACP(). */
55 extern __declspec(dllimport
) unsigned int __lc_codepage
;
58 if (setlocale (LC_ALL
, name
) == NULL
)
61 /* Clobber the codepage and MB_CUR_MAX, both set by setlocale(). */
62 __lc_codepage
= codepage
;
80 /* Test whether the codepage is really available. */
81 memset (&state
, '\0', sizeof (mbstate_t));
82 if (mbrlen (" ", 1, &state
) == (size_t)(-1))
87 /* Test zero-length input. */
89 memset (&state
, '\0', sizeof (mbstate_t));
90 ret
= mbrlen ("x", 0, &state
);
91 /* gnulib's implementation returns (size_t)(-2).
92 The AIX 5.1 implementation returns (size_t)(-1).
93 glibc's implementation returns 0. */
94 ASSERT (ret
== (size_t)(-2) || ret
== (size_t)(-1) || ret
== 0);
95 ASSERT (mbsinit (&state
));
98 /* Test NUL byte input. */
100 memset (&state
, '\0', sizeof (mbstate_t));
101 ret
= mbrlen ("", 1, &state
);
103 ASSERT (mbsinit (&state
));
106 /* Test single-byte input. */
111 memset (&state
, '\0', sizeof (mbstate_t));
112 for (c
= 0; c
< 0x100; c
++)
115 case '\t': case '\v': case '\f':
116 case ' ': case '!': case '"': case '#': case '%':
117 case '&': case '\'': case '(': case ')': case '*':
118 case '+': case ',': case '-': case '.': case '/':
119 case '0': case '1': case '2': case '3': case '4':
120 case '5': case '6': case '7': case '8': case '9':
121 case ':': case ';': case '<': case '=': case '>':
123 case 'A': case 'B': case 'C': case 'D': case 'E':
124 case 'F': case 'G': case 'H': case 'I': case 'J':
125 case 'K': case 'L': case 'M': case 'N': case 'O':
126 case 'P': case 'Q': case 'R': case 'S': case 'T':
127 case 'U': case 'V': case 'W': case 'X': case 'Y':
129 case '[': case '\\': case ']': case '^': case '_':
130 case 'a': case 'b': case 'c': case 'd': case 'e':
131 case 'f': case 'g': case 'h': case 'i': case 'j':
132 case 'k': case 'l': case 'm': case 'n': case 'o':
133 case 'p': case 'q': case 'r': case 's': case 't':
134 case 'u': case 'v': case 'w': case 'x': case 'y':
135 case 'z': case '{': case '|': case '}': case '~':
136 /* c is in the ISO C "basic character set". */
138 ret
= mbrlen (buf
, 1, &state
);
140 ASSERT (mbsinit (&state
));
145 /* Test special calling convention, passing a NULL pointer. */
147 memset (&state
, '\0', sizeof (mbstate_t));
148 ret
= mbrlen (NULL
, 5, &state
);
150 ASSERT (mbsinit (&state
));
156 /* Locale encoding is CP1252, an extension of ISO-8859-1. */
158 char input
[] = "B\374\337er"; /* "Büßer" */
159 memset (&state
, '\0', sizeof (mbstate_t));
161 ret
= mbrlen (input
, 1, &state
);
163 ASSERT (mbsinit (&state
));
166 ret
= mbrlen (input
+ 1, 1, &state
);
168 ASSERT (mbsinit (&state
));
171 ret
= mbrlen (input
+ 2, 3, &state
);
173 ASSERT (mbsinit (&state
));
176 ret
= mbrlen (input
+ 3, 2, &state
);
178 ASSERT (mbsinit (&state
));
181 ret
= mbrlen (input
+ 4, 1, &state
);
183 ASSERT (mbsinit (&state
));
188 /* Locale encoding is CP1256, not the same as ISO-8859-6. */
190 char input
[] = "x\302\341\346y"; /* "xآلوy" */
191 memset (&state
, '\0', sizeof (mbstate_t));
193 ret
= mbrlen (input
, 1, &state
);
195 ASSERT (mbsinit (&state
));
198 ret
= mbrlen (input
+ 1, 1, &state
);
200 ASSERT (mbsinit (&state
));
203 ret
= mbrlen (input
+ 2, 3, &state
);
205 ASSERT (mbsinit (&state
));
208 ret
= mbrlen (input
+ 3, 2, &state
);
210 ASSERT (mbsinit (&state
));
213 ret
= mbrlen (input
+ 4, 1, &state
);
215 ASSERT (mbsinit (&state
));
220 /* Locale encoding is CP65001 = UTF-8. */
221 if (strcmp (locale_charset (), "UTF-8") != 0)
224 char input
[] = "B\303\274\303\237er"; /* "Büßer" */
225 memset (&state
, '\0', sizeof (mbstate_t));
227 ret
= mbrlen (input
, 1, &state
);
229 ASSERT (mbsinit (&state
));
232 ret
= mbrlen (input
+ 1, 1, &state
);
233 ASSERT (ret
== (size_t)(-2));
234 ASSERT (!mbsinit (&state
));
237 ret
= mbrlen (input
+ 2, 5, &state
);
239 ASSERT (mbsinit (&state
));
242 ret
= mbrlen (input
+ 3, 4, &state
);
244 ASSERT (mbsinit (&state
));
248 ret
= mbrlen (input
+ 5, 2, &state
);
250 ASSERT (mbsinit (&state
));
253 ret
= mbrlen (input
+ 6, 1, &state
);
255 ASSERT (mbsinit (&state
));
257 /* Test some invalid input. */
258 memset (&state
, '\0', sizeof (mbstate_t));
259 ret
= mbrlen ("\377", 1, &state
); /* 0xFF */
260 ASSERT (ret
== (size_t)-1);
261 ASSERT (errno
== EILSEQ
);
263 memset (&state
, '\0', sizeof (mbstate_t));
264 ret
= mbrlen ("\303\300", 2, &state
); /* 0xC3 0xC0 */
265 ASSERT (ret
== (size_t)-1);
266 ASSERT (errno
== EILSEQ
);
268 memset (&state
, '\0', sizeof (mbstate_t));
269 ret
= mbrlen ("\343\300", 2, &state
); /* 0xE3 0xC0 */
270 ASSERT (ret
== (size_t)-1);
271 ASSERT (errno
== EILSEQ
);
273 memset (&state
, '\0', sizeof (mbstate_t));
274 ret
= mbrlen ("\343\300\200", 3, &state
); /* 0xE3 0xC0 0x80 */
275 ASSERT (ret
== (size_t)-1);
276 ASSERT (errno
== EILSEQ
);
278 memset (&state
, '\0', sizeof (mbstate_t));
279 ret
= mbrlen ("\343\200\300", 3, &state
); /* 0xE3 0x80 0xC0 */
280 ASSERT (ret
== (size_t)-1);
281 ASSERT (errno
== EILSEQ
);
283 memset (&state
, '\0', sizeof (mbstate_t));
284 ret
= mbrlen ("\363\300", 2, &state
); /* 0xF3 0xC0 */
285 ASSERT (ret
== (size_t)-1);
286 ASSERT (errno
== EILSEQ
);
288 memset (&state
, '\0', sizeof (mbstate_t));
289 ret
= mbrlen ("\363\300\200\200", 4, &state
); /* 0xF3 0xC0 0x80 0x80 */
290 ASSERT (ret
== (size_t)-1);
291 ASSERT (errno
== EILSEQ
);
293 memset (&state
, '\0', sizeof (mbstate_t));
294 ret
= mbrlen ("\363\200\300", 3, &state
); /* 0xF3 0x80 0xC0 */
295 ASSERT (ret
== (size_t)-1);
296 ASSERT (errno
== EILSEQ
);
298 memset (&state
, '\0', sizeof (mbstate_t));
299 ret
= mbrlen ("\363\200\300\200", 4, &state
); /* 0xF3 0x80 0xC0 0x80 */
300 ASSERT (ret
== (size_t)-1);
301 ASSERT (errno
== EILSEQ
);
303 memset (&state
, '\0', sizeof (mbstate_t));
304 ret
= mbrlen ("\363\200\200\300", 4, &state
); /* 0xF3 0x80 0x80 0xC0 */
305 ASSERT (ret
== (size_t)-1);
306 ASSERT (errno
== EILSEQ
);
311 /* Locale encoding is CP932, similar to Shift_JIS. */
313 char input
[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
314 memset (&state
, '\0', sizeof (mbstate_t));
316 ret
= mbrlen (input
, 1, &state
);
318 ASSERT (mbsinit (&state
));
321 ret
= mbrlen (input
+ 1, 2, &state
);
323 ASSERT (mbsinit (&state
));
327 ret
= mbrlen (input
+ 3, 1, &state
);
328 ASSERT (ret
== (size_t)(-2));
329 ASSERT (!mbsinit (&state
));
332 ret
= mbrlen (input
+ 4, 4, &state
);
334 ASSERT (mbsinit (&state
));
337 ret
= mbrlen (input
+ 5, 3, &state
);
339 ASSERT (mbsinit (&state
));
343 ret
= mbrlen (input
+ 7, 1, &state
);
345 ASSERT (mbsinit (&state
));
347 /* Test some invalid input. */
348 memset (&state
, '\0', sizeof (mbstate_t));
349 ret
= mbrlen ("\377", 1, &state
); /* 0xFF */
350 ASSERT ((ret
== (size_t)-1 && errno
== EILSEQ
) || ret
== (size_t)-2);
352 memset (&state
, '\0', sizeof (mbstate_t));
353 ret
= mbrlen ("\225\377", 2, &state
); /* 0x95 0xFF */
354 ASSERT ((ret
== (size_t)-1 && errno
== EILSEQ
) || ret
== 2);
359 /* Locale encoding is CP950, similar to Big5. */
361 char input
[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
362 memset (&state
, '\0', sizeof (mbstate_t));
364 ret
= mbrlen (input
, 1, &state
);
366 ASSERT (mbsinit (&state
));
369 ret
= mbrlen (input
+ 1, 2, &state
);
371 ASSERT (mbsinit (&state
));
375 ret
= mbrlen (input
+ 3, 1, &state
);
376 ASSERT (ret
== (size_t)(-2));
377 ASSERT (!mbsinit (&state
));
380 ret
= mbrlen (input
+ 4, 4, &state
);
382 ASSERT (mbsinit (&state
));
385 ret
= mbrlen (input
+ 5, 3, &state
);
387 ASSERT (mbsinit (&state
));
391 ret
= mbrlen (input
+ 7, 1, &state
);
393 ASSERT (mbsinit (&state
));
395 /* Test some invalid input. */
396 memset (&state
, '\0', sizeof (mbstate_t));
397 ret
= mbrlen ("\377", 1, &state
); /* 0xFF */
398 ASSERT ((ret
== (size_t)-1 && errno
== EILSEQ
) || ret
== (size_t)-2);
400 memset (&state
, '\0', sizeof (mbstate_t));
401 ret
= mbrlen ("\225\377", 2, &state
); /* 0x95 0xFF */
402 ASSERT ((ret
== (size_t)-1 && errno
== EILSEQ
) || ret
== 2);
407 /* Locale encoding is CP936 = GBK, an extension of GB2312. */
409 char input
[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
410 memset (&state
, '\0', sizeof (mbstate_t));
412 ret
= mbrlen (input
, 1, &state
);
414 ASSERT (mbsinit (&state
));
417 ret
= mbrlen (input
+ 1, 2, &state
);
419 ASSERT (mbsinit (&state
));
423 ret
= mbrlen (input
+ 3, 1, &state
);
424 ASSERT (ret
== (size_t)(-2));
425 ASSERT (!mbsinit (&state
));
428 ret
= mbrlen (input
+ 4, 4, &state
);
430 ASSERT (mbsinit (&state
));
433 ret
= mbrlen (input
+ 5, 3, &state
);
435 ASSERT (mbsinit (&state
));
439 ret
= mbrlen (input
+ 7, 1, &state
);
441 ASSERT (mbsinit (&state
));
443 /* Test some invalid input. */
444 memset (&state
, '\0', sizeof (mbstate_t));
445 ret
= mbrlen ("\377", 1, &state
); /* 0xFF */
446 ASSERT ((ret
== (size_t)-1 && errno
== EILSEQ
) || ret
== (size_t)-2);
448 memset (&state
, '\0', sizeof (mbstate_t));
449 ret
= mbrlen ("\225\377", 2, &state
); /* 0x95 0xFF */
450 ASSERT ((ret
== (size_t)-1 && errno
== EILSEQ
) || ret
== 2);
455 /* Locale encoding is CP54936 = GB18030. */
456 if (strcmp (locale_charset (), "GB18030") != 0)
459 char input
[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
460 memset (&state
, '\0', sizeof (mbstate_t));
462 ret
= mbrlen (input
, 1, &state
);
464 ASSERT (mbsinit (&state
));
467 ret
= mbrlen (input
+ 1, 1, &state
);
468 ASSERT (ret
== (size_t)(-2));
469 ASSERT (!mbsinit (&state
));
472 ret
= mbrlen (input
+ 2, 7, &state
);
474 ASSERT (mbsinit (&state
));
477 ret
= mbrlen (input
+ 3, 6, &state
);
479 ASSERT (mbsinit (&state
));
485 ret
= mbrlen (input
+ 7, 2, &state
);
487 ASSERT (mbsinit (&state
));
490 ret
= mbrlen (input
+ 8, 1, &state
);
492 ASSERT (mbsinit (&state
));
494 /* Test some invalid input. */
495 memset (&state
, '\0', sizeof (mbstate_t));
496 ret
= mbrlen ("\377", 1, &state
); /* 0xFF */
497 ASSERT (ret
== (size_t)-1);
498 ASSERT (errno
== EILSEQ
);
500 memset (&state
, '\0', sizeof (mbstate_t));
501 ret
= mbrlen ("\225\377", 2, &state
); /* 0x95 0xFF */
502 ASSERT (ret
== (size_t)-1);
503 ASSERT (errno
== EILSEQ
);
505 memset (&state
, '\0', sizeof (mbstate_t));
506 ret
= mbrlen ("\201\045", 2, &state
); /* 0x81 0x25 */
507 ASSERT (ret
== (size_t)-1);
508 ASSERT (errno
== EILSEQ
);
510 memset (&state
, '\0', sizeof (mbstate_t));
511 ret
= mbrlen ("\201\060\377", 3, &state
); /* 0x81 0x30 0xFF */
512 ASSERT (ret
== (size_t)-1);
513 ASSERT (errno
== EILSEQ
);
515 memset (&state
, '\0', sizeof (mbstate_t));
516 ret
= mbrlen ("\201\060\377\064", 4, &state
); /* 0x81 0x30 0xFF 0x34 */
517 ASSERT (ret
== (size_t)-1);
518 ASSERT (errno
== EILSEQ
);
520 memset (&state
, '\0', sizeof (mbstate_t));
521 ret
= mbrlen ("\201\060\211\072", 4, &state
); /* 0x81 0x30 0x89 0x3A */
522 ASSERT (ret
== (size_t)-1);
523 ASSERT (errno
== EILSEQ
);
533 main (int argc
, char *argv
[])
535 int codepage
= atoi (argv
[argc
- 1]);
540 for (i
= 1; i
< argc
- 1; i
++)
542 int ret
= test_one_locale (argv
[i
], codepage
);
550 if (test_exit_status
!= EXIT_SUCCESS
)
551 return test_exit_status
;
552 fprintf (stderr
, "Skipping test: found no locale with codepage %d\n",
555 return (result
? result
: test_exit_status
);
561 main (int argc
, char *argv
[])
563 fputs ("Skipping test: not a native Windows system\n", stderr
);