2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
27 #include <support/check.h>
28 #include <support/support.h>
31 test_truncated_code_unit_sequence (void)
33 /* Missing trailing code unit for a two code byte unit sequence. */
35 const char8_t
*u8s
= (const char8_t
*) u8
"\xC2";
36 char buf
[MB_LEN_MAX
] = { 0 };
39 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
41 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
42 TEST_COMPARE (errno
, EILSEQ
);
45 /* Missing first trailing code unit for a three byte code unit sequence. */
47 const char8_t
*u8s
= (const char8_t
*) u8
"\xE0";
48 char buf
[MB_LEN_MAX
] = { 0 };
51 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
53 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
54 TEST_COMPARE (errno
, EILSEQ
);
57 /* Missing second trailing code unit for a three byte code unit sequence. */
59 const char8_t
*u8s
= (const char8_t
*) u8
"\xE0\xA0";
60 char buf
[MB_LEN_MAX
] = { 0 };
63 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
64 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
66 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) -1);
67 TEST_COMPARE (errno
, EILSEQ
);
70 /* Missing first trailing code unit for a four byte code unit sequence. */
72 const char8_t
*u8s
= (const char8_t
*) u8
"\xF0";
73 char buf
[MB_LEN_MAX
] = { 0 };
76 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
78 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
79 TEST_COMPARE (errno
, EILSEQ
);
82 /* Missing second trailing code unit for a four byte code unit sequence. */
84 const char8_t
*u8s
= (const char8_t
*) u8
"\xF0\x90";
85 char buf
[MB_LEN_MAX
] = { 0 };
88 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
89 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
91 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) -1);
92 TEST_COMPARE (errno
, EILSEQ
);
95 /* Missing third trailing code unit for a four byte code unit sequence. */
97 const char8_t
*u8s
= (const char8_t
*) u8
"\xF0\x90\x80";
98 char buf
[MB_LEN_MAX
] = { 0 };
101 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
102 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
103 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 0);
105 TEST_COMPARE (c8rtomb (buf
, u8s
[3], &s
), (size_t) -1);
106 TEST_COMPARE (errno
, EILSEQ
);
113 test_invalid_trailing_code_unit_sequence (void)
115 /* Invalid trailing code unit for a two code byte unit sequence. */
117 const char8_t
*u8s
= (const char8_t
*) u8
"\xC2\xC0";
118 char buf
[MB_LEN_MAX
] = { 0 };
121 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
123 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
124 TEST_COMPARE (errno
, EILSEQ
);
127 /* Invalid first trailing code unit for a three byte code unit sequence. */
129 const char8_t
*u8s
= (const char8_t
*) u8
"\xE0\xC0";
130 char buf
[MB_LEN_MAX
] = { 0 };
133 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
135 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
136 TEST_COMPARE (errno
, EILSEQ
);
139 /* Invalid second trailing code unit for a three byte code unit sequence. */
141 const char8_t
*u8s
= (const char8_t
*) u8
"\xE0\xA0\xC0";
142 char buf
[MB_LEN_MAX
] = { 0 };
145 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
146 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
148 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) -1);
149 TEST_COMPARE (errno
, EILSEQ
);
152 /* Invalid first trailing code unit for a four byte code unit sequence. */
154 const char8_t
*u8s
= (const char8_t
*) u8
"\xF0\xC0";
155 char buf
[MB_LEN_MAX
] = { 0 };
158 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
160 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
161 TEST_COMPARE (errno
, EILSEQ
);
164 /* Invalid second trailing code unit for a four byte code unit sequence. */
166 const char8_t
*u8s
= (const char8_t
*) u8
"\xF0\x90\xC0";
167 char buf
[MB_LEN_MAX
] = { 0 };
170 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
171 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
173 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) -1);
174 TEST_COMPARE (errno
, EILSEQ
);
177 /* Invalid third trailing code unit for a four byte code unit sequence. */
179 const char8_t
*u8s
= (const char8_t
*) u8
"\xF0\x90\x80\xC0";
180 char buf
[MB_LEN_MAX
] = { 0 };
183 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
184 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
185 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 0);
187 TEST_COMPARE (c8rtomb (buf
, u8s
[3], &s
), (size_t) -1);
188 TEST_COMPARE (errno
, EILSEQ
);
195 test_lone_trailing_code_units (void)
197 /* Lone trailing code unit. */
198 const char8_t
*u8s
= (const char8_t
*) u8
"\x80";
199 char buf
[MB_LEN_MAX
] = { 0 };
203 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) -1);
204 TEST_COMPARE (errno
, EILSEQ
);
210 test_overlong_encoding (void)
212 /* Two byte overlong encoding. */
214 const char8_t
*u8s
= (const char8_t
*) u8
"\xC0\x80";
215 char buf
[MB_LEN_MAX
] = { 0 };
219 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) -1);
220 TEST_COMPARE (errno
, EILSEQ
);
223 /* Two byte overlong encoding. */
225 const char8_t
*u8s
= (const char8_t
*) u8
"\xC1\x80";
226 char buf
[MB_LEN_MAX
] = { 0 };
230 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) -1);
231 TEST_COMPARE (errno
, EILSEQ
);
234 /* Three byte overlong encoding. */
236 const char8_t
*u8s
= (const char8_t
*) u8
"\xE0\x9F\xBF";
237 char buf
[MB_LEN_MAX
] = { 0 };
240 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
242 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
243 TEST_COMPARE (errno
, EILSEQ
);
246 /* Four byte overlong encoding. */
248 const char8_t
*u8s
= (const char8_t
*) u8
"\xF0\x8F\xBF\xBF";
249 char buf
[MB_LEN_MAX
] = { 0 };
252 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
254 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
255 TEST_COMPARE (errno
, EILSEQ
);
262 test_surrogate_range (void)
264 /* Would encode U+D800. */
266 const char8_t
*u8s
= (const char8_t
*) u8
"\xED\xA0\x80";
267 char buf
[MB_LEN_MAX
] = { 0 };
270 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
272 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
273 TEST_COMPARE (errno
, EILSEQ
);
276 /* Would encode U+DFFF. */
278 const char8_t
*u8s
= (const char8_t
*) u8
"\xED\xBF\xBF";
279 char buf
[MB_LEN_MAX
] = { 0 };
282 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
284 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
285 TEST_COMPARE (errno
, EILSEQ
);
292 test_out_of_range_encoding (void)
294 /* Would encode U+00110000. */
296 const char8_t
*u8s
= (const char8_t
*) u8
"\xF4\x90\x80\x80";
297 char buf
[MB_LEN_MAX
] = { 0 };
300 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
302 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) -1);
303 TEST_COMPARE (errno
, EILSEQ
);
306 /* Would encode U+00140000. */
308 const char8_t
*u8s
= (const char8_t
*) u8
"\xF5\x90\x80\x80";
309 char buf
[MB_LEN_MAX
] = { 0 };
313 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) -1);
314 TEST_COMPARE (errno
, EILSEQ
);
321 test_null_output_buffer (void)
323 /* Null character with an initial state. */
327 TEST_COMPARE (c8rtomb (NULL
, u8
"X"[0], &s
), (size_t) 1);
328 /* Assert the state is now an initial state. */
329 TEST_VERIFY (mbsinit (&s
));
332 /* Null buffer with a state corresponding to an incompletely read code
333 unit sequence. In this case, an error occurs since insufficient
334 information is available to complete the already started code unit
335 sequence and return to the initial state. */
337 char buf
[MB_LEN_MAX
] = { 0 };
340 TEST_COMPARE (c8rtomb (buf
, u8
"\xC2"[0], &s
), (size_t) 0);
342 TEST_COMPARE (c8rtomb (NULL
, u8
"\x80"[0], &s
), (size_t) -1);
343 TEST_COMPARE (errno
, EILSEQ
);
352 xsetlocale (LC_ALL
, "de_DE.UTF-8");
354 /* Null character. */
357 const char8_t
*u8s
= (const char8_t
*) u8
"\x00";
358 char buf
[MB_LEN_MAX
] = { 0 };
361 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 1);
362 TEST_COMPARE (buf
[0], (char) 0x00);
363 TEST_VERIFY (mbsinit (&s
));
366 /* First non-null character in the code point range that maps to a single
370 const char8_t
*u8s
= (const char8_t
*) u8
"\x01";
371 char buf
[MB_LEN_MAX
] = { 0 };
374 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 1);
375 TEST_COMPARE (buf
[0], (char) 0x01);
376 TEST_VERIFY (mbsinit (&s
));
379 /* Last character in the code point range that maps to a single code unit. */
382 const char8_t
*u8s
= (const char8_t
*) u8
"\x7F";
383 char buf
[MB_LEN_MAX
] = { 0 };
386 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 1);
387 TEST_COMPARE (buf
[0], (char) 0x7F);
388 TEST_VERIFY (mbsinit (&s
));
391 /* First character in the code point range that maps to two code units. */
393 /* U+0080 => 0xC2 0x80 */
394 const char8_t
*u8s
= (const char8_t
*) u8
"\xC2\x80";
395 char buf
[MB_LEN_MAX
] = { 0 };
398 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
399 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 2);
400 TEST_COMPARE (buf
[0], (char) 0xC2);
401 TEST_COMPARE (buf
[1], (char) 0x80);
402 TEST_VERIFY (mbsinit (&s
));
405 /* Last character in the code point range that maps to two code units. */
407 /* U+07FF => 0xDF 0xBF */
408 const char8_t
*u8s
= (const char8_t
*) u8
"\u07FF";
409 char buf
[MB_LEN_MAX
] = { 0 };
412 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
413 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 2);
414 TEST_COMPARE (buf
[0], (char) 0xDF);
415 TEST_COMPARE (buf
[1], (char) 0xBF);
416 TEST_VERIFY (mbsinit (&s
));
419 /* First character in the code point range that maps to three code units. */
421 /* U+0800 => 0xE0 0xA0 0x80 */
422 const char8_t
*u8s
= (const char8_t
*) u8
"\u0800";
423 char buf
[MB_LEN_MAX
] = { 0 };
426 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
427 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
428 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 3);
429 TEST_COMPARE (buf
[0], (char) 0xE0);
430 TEST_COMPARE (buf
[1], (char) 0xA0);
431 TEST_COMPARE (buf
[2], (char) 0x80);
432 TEST_VERIFY (mbsinit (&s
));
435 /* Last character in the code point range that maps to three code units
436 before the surrogate code point range. */
438 /* U+D7FF => 0xED 0x9F 0xBF */
439 const char8_t
*u8s
= (const char8_t
*) u8
"\uD7FF";
440 char buf
[MB_LEN_MAX
] = { 0 };
443 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
444 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
445 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 3);
446 TEST_COMPARE (buf
[0], (char) 0xED);
447 TEST_COMPARE (buf
[1], (char) 0x9F);
448 TEST_COMPARE (buf
[2], (char) 0xBF);
449 TEST_VERIFY (mbsinit (&s
));
452 /* First character in the code point range that maps to three code units
453 after the surrogate code point range. */
455 /* U+E000 => 0xEE 0x80 0x80 */
456 const char8_t
*u8s
= (const char8_t
*) u8
"\uE000";
457 char buf
[MB_LEN_MAX
] = { 0 };
460 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
461 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
462 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 3);
463 TEST_COMPARE (buf
[0], (char) 0xEE);
464 TEST_COMPARE (buf
[1], (char) 0x80);
465 TEST_COMPARE (buf
[2], (char) 0x80);
466 TEST_VERIFY (mbsinit (&s
));
471 /* U+FEFF => 0xEF 0xBB 0xBF */
472 const char8_t
*u8s
= (const char8_t
*) u8
"\uFEFF";
473 char buf
[MB_LEN_MAX
] = { 0 };
476 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
477 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
478 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 3);
479 TEST_COMPARE (buf
[0], (char) 0xEF);
480 TEST_COMPARE (buf
[1], (char) 0xBB);
481 TEST_COMPARE (buf
[2], (char) 0xBF);
482 TEST_VERIFY (mbsinit (&s
));
485 /* Replacement character. */
487 /* U+FFFD => 0xEF 0xBF 0xBD */
488 const char8_t
*u8s
= (const char8_t
*) u8
"\uFFFD";
489 char buf
[MB_LEN_MAX
] = { 0 };
492 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
493 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
494 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 3);
495 TEST_COMPARE (buf
[0], (char) 0xEF);
496 TEST_COMPARE (buf
[1], (char) 0xBF);
497 TEST_COMPARE (buf
[2], (char) 0xBD);
498 TEST_VERIFY (mbsinit (&s
));
501 /* Last character in the code point range that maps to three code units. */
503 /* U+FFFF => 0xEF 0xBF 0xBF */
504 const char8_t
*u8s
= (const char8_t
*) u8
"\uFFFF";
505 char buf
[MB_LEN_MAX
] = { 0 };
508 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
509 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
510 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 3);
511 TEST_COMPARE (buf
[0], (char) 0xEF);
512 TEST_COMPARE (buf
[1], (char) 0xBF);
513 TEST_COMPARE (buf
[2], (char) 0xBF);
514 TEST_VERIFY (mbsinit (&s
));
517 /* First character in the code point range that maps to four code units. */
519 /* U+10000 => 0xF0 0x90 0x80 0x80 */
520 const char8_t
*u8s
= (const char8_t
*) u8
"\U00010000";
521 char buf
[MB_LEN_MAX
] = { 0 };
524 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
525 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
526 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 0);
527 TEST_COMPARE (c8rtomb (buf
, u8s
[3], &s
), (size_t) 4);
528 TEST_COMPARE (buf
[0], (char) 0xF0);
529 TEST_COMPARE (buf
[1], (char) 0x90);
530 TEST_COMPARE (buf
[2], (char) 0x80);
531 TEST_COMPARE (buf
[3], (char) 0x80);
532 TEST_VERIFY (mbsinit (&s
));
535 /* Last character in the code point range that maps to four code units. */
537 /* U+10FFFF => 0xF4 0x8F 0xBF 0xBF */
538 const char8_t
*u8s
= (const char8_t
*) u8
"\U0010FFFF";
539 char buf
[MB_LEN_MAX
] = { 0 };
542 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
543 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
544 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 0);
545 TEST_COMPARE (c8rtomb (buf
, u8s
[3], &s
), (size_t) 4);
546 TEST_COMPARE (buf
[0], (char) 0xF4);
547 TEST_COMPARE (buf
[1], (char) 0x8F);
548 TEST_COMPARE (buf
[2], (char) 0xBF);
549 TEST_COMPARE (buf
[3], (char) 0xBF);
550 TEST_VERIFY (mbsinit (&s
));
557 test_big5_hkscs (void)
559 xsetlocale (LC_ALL
, "zh_HK.BIG5-HKSCS");
561 /* A pair of two byte UTF-8 code unit sequences that map a Unicode code
562 point and combining character to a single double byte character. */
564 /* U+00CA U+0304 => 0x88 0x62 */
565 const char8_t
*u8s
= (const char8_t
*) u8
"\u00CA\u0304";
566 char buf
[MB_LEN_MAX
] = { 0 };
569 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
570 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
571 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 0);
572 TEST_COMPARE (c8rtomb (buf
, u8s
[3], &s
), (size_t) 2);
573 TEST_COMPARE (buf
[0], (char) 0x88);
574 TEST_COMPARE (buf
[1], (char) 0x62);
575 TEST_VERIFY (mbsinit (&s
));
578 /* Another pair of two byte UTF-8 code unit sequences that map a Unicode code
579 point and combining character to a single double byte character. */
581 /* U+00EA U+030C => 0x88 0xA5 */
582 const char8_t
*u8s
= (const char8_t
*) u8
"\u00EA\u030C";
583 char buf
[MB_LEN_MAX
] = { 0 };
586 TEST_COMPARE (c8rtomb (buf
, u8s
[0], &s
), (size_t) 0);
587 TEST_COMPARE (c8rtomb (buf
, u8s
[1], &s
), (size_t) 0);
588 TEST_COMPARE (c8rtomb (buf
, u8s
[2], &s
), (size_t) 0);
589 TEST_COMPARE (c8rtomb (buf
, u8s
[3], &s
), (size_t) 2);
590 TEST_COMPARE (buf
[0], (char) 0x88);
591 TEST_COMPARE (buf
[1], (char) 0xA5);
592 TEST_VERIFY (mbsinit (&s
));
601 test_truncated_code_unit_sequence ();
602 test_invalid_trailing_code_unit_sequence ();
603 test_lone_trailing_code_units ();
604 test_overlong_encoding ();
605 test_surrogate_range ();
606 test_out_of_range_encoding ();
607 test_null_output_buffer ();
613 #include <support/test-driver.c>