unistr/u{8,16,32}-uctomb: Avoid possible trouble with huge strings.
[gnulib.git] / tests / test-wcrtomb-w32.c
blob0f02ed2559e376860af0936610dc71ea9c6f1dac
1 /* Test of conversion of wide character to multibyte character.
2 Copyright (C) 2008-2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include <wchar.h>
21 #include <locale.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
26 #include "localcharset.h"
27 #include "macros.h"
29 #if defined _WIN32 && !defined __CYGWIN__
31 static int
32 test_one_locale (const char *name, int codepage)
34 char buf[64];
35 size_t ret;
37 # if 1
38 /* Portable code to set the locale. */
40 char name_with_codepage[1024];
42 sprintf (name_with_codepage, "%s.%d", name, codepage);
44 /* Set the locale. */
45 if (setlocale (LC_ALL, name_with_codepage) == NULL)
46 return 77;
48 # else
49 /* Hacky way to set a locale.codepage combination that setlocale() refuses
50 to set. */
52 /* Codepage of the current locale, set with setlocale().
53 Not necessarily the same as GetACP(). */
54 extern __declspec(dllimport) unsigned int __lc_codepage;
56 /* Set the locale. */
57 if (setlocale (LC_ALL, name) == NULL)
58 return 77;
60 /* Clobber the codepage and MB_CUR_MAX, both set by setlocale(). */
61 __lc_codepage = codepage;
62 switch (codepage)
64 case 1252:
65 case 1256:
66 MB_CUR_MAX = 1;
67 break;
68 case 932:
69 case 950:
70 case 936:
71 MB_CUR_MAX = 2;
72 break;
73 case 54936:
74 case 65001:
75 MB_CUR_MAX = 4;
76 break;
79 /* Test whether the codepage is really available. */
81 mbstate_t state;
82 wchar_t wc;
84 memset (&state, '\0', sizeof (mbstate_t));
85 if (mbrtowc (&wc, " ", 1, &state) == (size_t)(-1))
86 return 77;
89 # endif
91 /* Test NUL character. */
93 buf[0] = 'x';
94 ret = wcrtomb (buf, 0, NULL);
95 ASSERT (ret == 1);
96 ASSERT (buf[0] == '\0');
99 /* Test single bytes. */
101 int c;
103 for (c = 0; c < 0x100; c++)
104 switch (c)
106 case '\t': case '\v': case '\f':
107 case ' ': case '!': case '"': case '#': case '%':
108 case '&': case '\'': case '(': case ')': case '*':
109 case '+': case ',': case '-': case '.': case '/':
110 case '0': case '1': case '2': case '3': case '4':
111 case '5': case '6': case '7': case '8': case '9':
112 case ':': case ';': case '<': case '=': case '>':
113 case '?':
114 case 'A': case 'B': case 'C': case 'D': case 'E':
115 case 'F': case 'G': case 'H': case 'I': case 'J':
116 case 'K': case 'L': case 'M': case 'N': case 'O':
117 case 'P': case 'Q': case 'R': case 'S': case 'T':
118 case 'U': case 'V': case 'W': case 'X': case 'Y':
119 case 'Z':
120 case '[': case '\\': case ']': case '^': case '_':
121 case 'a': case 'b': case 'c': case 'd': case 'e':
122 case 'f': case 'g': case 'h': case 'i': case 'j':
123 case 'k': case 'l': case 'm': case 'n': case 'o':
124 case 'p': case 'q': case 'r': case 's': case 't':
125 case 'u': case 'v': case 'w': case 'x': case 'y':
126 case 'z': case '{': case '|': case '}': case '~':
127 /* c is in the ISO C "basic character set". */
128 ret = wcrtomb (buf, btowc (c), NULL);
129 ASSERT (ret == 1);
130 ASSERT (buf[0] == (char) c);
131 break;
135 /* Test special calling convention, passing a NULL pointer. */
137 ret = wcrtomb (NULL, '\0', NULL);
138 ASSERT (ret == 1);
139 ret = wcrtomb (NULL, btowc ('x'), NULL);
140 ASSERT (ret == 1);
143 switch (codepage)
145 case 1252:
146 /* Locale encoding is CP1252, an extension of ISO-8859-1. */
148 /* Convert "B\374\337er": "Büßer" */
149 memset (buf, 'x', 8);
150 ret = wcrtomb (buf, 0x00FC, NULL);
151 ASSERT (ret == 1);
152 ASSERT (memcmp (buf, "\374", 1) == 0);
153 ASSERT (buf[1] == 'x');
155 memset (buf, 'x', 8);
156 ret = wcrtomb (buf, 0x00DF, NULL);
157 ASSERT (ret == 1);
158 ASSERT (memcmp (buf, "\337", 1) == 0);
159 ASSERT (buf[1] == 'x');
161 return 0;
163 case 1256:
164 /* Locale encoding is CP1256, not the same as ISO-8859-6. */
166 /* Convert "x\302\341\346y": "xآلوy" */
167 memset (buf, 'x', 8);
168 ret = wcrtomb (buf, 0x0622, NULL);
169 ASSERT (ret == 1);
170 ASSERT (memcmp (buf, "\302", 1) == 0);
171 ASSERT (buf[1] == 'x');
173 memset (buf, 'x', 8);
174 ret = wcrtomb (buf, 0x0644, NULL);
175 ASSERT (ret == 1);
176 ASSERT (memcmp (buf, "\341", 1) == 0);
177 ASSERT (buf[1] == 'x');
179 memset (buf, 'x', 8);
180 ret = wcrtomb (buf, 0x0648, NULL);
181 ASSERT (ret == 1);
182 ASSERT (memcmp (buf, "\346", 1) == 0);
183 ASSERT (buf[1] == 'x');
185 return 0;
187 case 932:
188 /* Locale encoding is CP932, similar to Shift_JIS. */
190 /* Convert "<\223\372\226\173\214\352>": "<日本語>" */
191 memset (buf, 'x', 8);
192 ret = wcrtomb (buf, 0x65E5, NULL);
193 ASSERT (ret == 2);
194 ASSERT (memcmp (buf, "\223\372", 2) == 0);
195 ASSERT (buf[2] == 'x');
197 memset (buf, 'x', 8);
198 ret = wcrtomb (buf, 0x672C, NULL);
199 ASSERT (ret == 2);
200 ASSERT (memcmp (buf, "\226\173", 2) == 0);
201 ASSERT (buf[2] == 'x');
203 memset (buf, 'x', 8);
204 ret = wcrtomb (buf, 0x8A9E, NULL);
205 ASSERT (ret == 2);
206 ASSERT (memcmp (buf, "\214\352", 2) == 0);
207 ASSERT (buf[2] == 'x');
209 return 0;
211 case 950:
212 /* Locale encoding is CP950, similar to Big5. */
214 /* Convert "<\244\351\245\273\273\171>": "<日本語>" */
215 memset (buf, 'x', 8);
216 ret = wcrtomb (buf, 0x65E5, NULL);
217 ASSERT (ret == 2);
218 ASSERT (memcmp (buf, "\244\351", 2) == 0);
219 ASSERT (buf[2] == 'x');
221 memset (buf, 'x', 8);
222 ret = wcrtomb (buf, 0x672C, NULL);
223 ASSERT (ret == 2);
224 ASSERT (memcmp (buf, "\245\273", 2) == 0);
225 ASSERT (buf[2] == 'x');
227 memset (buf, 'x', 8);
228 ret = wcrtomb (buf, 0x8A9E, NULL);
229 ASSERT (ret == 2);
230 ASSERT (memcmp (buf, "\273\171", 2) == 0);
231 ASSERT (buf[2] == 'x');
233 return 0;
235 case 936:
236 /* Locale encoding is CP936 = GBK, an extension of GB2312. */
238 /* Convert "<\310\325\261\276\325\132>": "<日本語>" */
239 memset (buf, 'x', 8);
240 ret = wcrtomb (buf, 0x65E5, NULL);
241 ASSERT (ret == 2);
242 ASSERT (memcmp (buf, "\310\325", 2) == 0);
243 ASSERT (buf[2] == 'x');
245 memset (buf, 'x', 8);
246 ret = wcrtomb (buf, 0x672C, NULL);
247 ASSERT (ret == 2);
248 ASSERT (memcmp (buf, "\261\276", 2) == 0);
249 ASSERT (buf[2] == 'x');
251 memset (buf, 'x', 8);
252 ret = wcrtomb (buf, 0x8A9E, NULL);
253 ASSERT (ret == 2);
254 ASSERT (memcmp (buf, "\325\132", 2) == 0);
255 ASSERT (buf[2] == 'x');
257 return 0;
259 case 54936:
260 /* Locale encoding is CP54936 = GB18030. */
261 if (strcmp (locale_charset (), "GB18030") != 0)
262 return 77;
264 /* Convert "B\250\271\201\060\211\070er": "Büßer" */
265 memset (buf, 'x', 8);
266 ret = wcrtomb (buf, 0x00FC, NULL);
267 ASSERT (ret == 2);
268 ASSERT (memcmp (buf, "\250\271", 2) == 0);
269 ASSERT (buf[2] == 'x');
271 memset (buf, 'x', 8);
272 ret = wcrtomb (buf, 0x00DF, NULL);
273 ASSERT (ret == 4);
274 ASSERT (memcmp (buf, "\201\060\211\070", 4) == 0);
275 ASSERT (buf[4] == 'x');
277 return 0;
279 case 65001:
280 /* Locale encoding is CP65001 = UTF-8. */
281 if (strcmp (locale_charset (), "UTF-8") != 0)
282 return 77;
284 /* Convert "B\303\274\303\237er": "Büßer" */
285 memset (buf, 'x', 8);
286 ret = wcrtomb (buf, 0x00FC, NULL);
287 ASSERT (ret == 2);
288 ASSERT (memcmp (buf, "\303\274", 2) == 0);
289 ASSERT (buf[2] == 'x');
291 memset (buf, 'x', 8);
292 ret = wcrtomb (buf, 0x00DF, NULL);
293 ASSERT (ret == 2);
294 ASSERT (memcmp (buf, "\303\237", 2) == 0);
295 ASSERT (buf[2] == 'x');
297 return 0;
299 default:
300 return 1;
305 main (int argc, char *argv[])
307 int codepage = atoi (argv[argc - 1]);
308 int result;
309 int i;
311 result = 77;
312 for (i = 1; i < argc - 1; i++)
314 int ret = test_one_locale (argv[i], codepage);
316 if (ret != 77)
317 result = ret;
320 if (result == 77)
322 fprintf (stderr, "Skipping test: found no locale with codepage %d\n",
323 codepage);
325 return result;
328 #else
331 main (int argc, char *argv[])
333 fputs ("Skipping test: not a native Windows system\n", stderr);
334 return 77;
337 #endif