all: prefer https: URLs
[gnulib.git] / tests / test-striconveh.c
blob82e13358fe074ed591acec2cd6eef67444152ee5
1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
19 #include <config.h>
21 #include "striconveh.h"
23 #if HAVE_ICONV
24 # include <iconv.h>
25 #endif
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
31 #include "macros.h"
33 /* Magic number for detecting bounds violations. */
34 #define MAGIC 0x1983EFF1
36 static size_t *
37 new_offsets (size_t n)
39 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
40 offsets[n] = MAGIC;
41 return offsets;
44 int
45 main ()
47 static enum iconv_ilseq_handler handlers[] =
48 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
49 size_t indirect;
50 size_t h;
51 size_t o;
52 size_t i;
54 #if HAVE_ICONV
55 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
56 ISO-8859-2, and UTF-8. */
57 iconv_t cd_ascii_to_88591 = iconv_open ("ISO-8859-1", "ASCII");
58 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
59 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
60 iconv_t cd_ascii_to_utf8 = iconv_open ("UTF-8", "ASCII");
61 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
62 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
63 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
64 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
65 iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7");
66 iconveh_t cdeh_ascii_to_88591;
67 iconveh_t cdeh_ascii_to_88591_indirectly;
68 iconveh_t cdeh_88592_to_88591;
69 iconveh_t cdeh_88592_to_88591_indirectly;
70 iconveh_t cdeh_ascii_to_utf8;
71 iconveh_t cdeh_88591_to_utf8;
72 iconveh_t cdeh_utf8_to_88591;
73 iconveh_t cdeh_utf7_to_utf8;
75 ASSERT (cd_ascii_to_utf8 != (iconv_t)(-1));
76 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
77 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
78 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
79 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
81 cdeh_ascii_to_88591.cd = cd_ascii_to_88591;
82 cdeh_ascii_to_88591.cd1 = cd_ascii_to_utf8;
83 cdeh_ascii_to_88591.cd2 = cd_utf8_to_88591;
85 cdeh_ascii_to_88591_indirectly.cd = (iconv_t)(-1);
86 cdeh_ascii_to_88591_indirectly.cd1 = cd_ascii_to_utf8;
87 cdeh_ascii_to_88591_indirectly.cd2 = cd_utf8_to_88591;
89 cdeh_88592_to_88591.cd = cd_88592_to_88591;
90 cdeh_88592_to_88591.cd1 = cd_88592_to_utf8;
91 cdeh_88592_to_88591.cd2 = cd_utf8_to_88591;
93 cdeh_88592_to_88591_indirectly.cd = (iconv_t)(-1);
94 cdeh_88592_to_88591_indirectly.cd1 = cd_88592_to_utf8;
95 cdeh_88592_to_88591_indirectly.cd2 = cd_utf8_to_88591;
97 cdeh_ascii_to_utf8.cd = cd_ascii_to_utf8;
98 cdeh_ascii_to_utf8.cd1 = cd_ascii_to_utf8;
99 cdeh_ascii_to_utf8.cd2 = (iconv_t)(-1);
101 cdeh_88591_to_utf8.cd = cd_88591_to_utf8;
102 cdeh_88591_to_utf8.cd1 = cd_88591_to_utf8;
103 cdeh_88591_to_utf8.cd2 = (iconv_t)(-1);
105 cdeh_utf8_to_88591.cd = cd_utf8_to_88591;
106 cdeh_utf8_to_88591.cd1 = (iconv_t)(-1);
107 cdeh_utf8_to_88591.cd2 = cd_utf8_to_88591;
109 cdeh_utf7_to_utf8.cd = cd_utf7_to_utf8;
110 cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8;
111 cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1);
113 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
115 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
116 for (indirect = 0; indirect <= 1; indirect++)
118 for (h = 0; h < SIZEOF (handlers); h++)
120 enum iconv_ilseq_handler handler = handlers[h];
121 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
122 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
123 for (o = 0; o < 2; o++)
125 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
126 char *result = NULL;
127 size_t length = 0;
128 int retval = mem_cd_iconveh (input, strlen (input),
129 (indirect
130 ? &cdeh_88592_to_88591_indirectly
131 : &cdeh_88592_to_88591),
132 handler,
133 offsets,
134 &result, &length);
135 ASSERT (retval == 0);
136 ASSERT (length == strlen (expected));
137 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
138 if (o)
140 for (i = 0; i < 37; i++)
141 ASSERT (offsets[i] == i);
142 ASSERT (offsets[37] == MAGIC);
143 free (offsets);
145 free (result);
150 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
151 for (indirect = 0; indirect <= 1; indirect++)
153 for (h = 0; h < SIZEOF (handlers); h++)
155 enum iconv_ilseq_handler handler = handlers[h];
156 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
157 for (o = 0; o < 2; o++)
159 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
160 char *result = NULL;
161 size_t length = 0;
162 int retval = mem_cd_iconveh (input, strlen (input),
163 (indirect
164 ? &cdeh_ascii_to_88591_indirectly
165 : &cdeh_ascii_to_88591),
166 handler,
167 offsets,
168 &result, &length);
169 switch (handler)
171 case iconveh_error:
172 ASSERT (retval == -1 && errno == EILSEQ);
173 ASSERT (result == NULL);
174 if (o)
175 free (offsets);
176 break;
177 case iconveh_question_mark:
178 case iconveh_escape_sequence:
180 static const char expected[] = "Rafa? Maszkowski";
181 ASSERT (retval == 0);
182 ASSERT (length == strlen (expected));
183 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
184 if (o)
186 for (i = 0; i < 16; i++)
187 ASSERT (offsets[i] == i);
188 ASSERT (offsets[16] == MAGIC);
189 free (offsets);
191 free (result);
193 break;
199 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
200 for (indirect = 0; indirect <= 1; indirect++)
202 for (h = 0; h < SIZEOF (handlers); h++)
204 enum iconv_ilseq_handler handler = handlers[h];
205 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
206 for (o = 0; o < 2; o++)
208 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
209 char *result = NULL;
210 size_t length = 0;
211 int retval = mem_cd_iconveh (input, strlen (input),
212 (indirect
213 ? &cdeh_88592_to_88591_indirectly
214 : &cdeh_88592_to_88591),
215 handler,
216 offsets,
217 &result, &length);
218 switch (handler)
220 case iconveh_error:
221 ASSERT (retval == -1 && errno == EILSEQ);
222 ASSERT (result == NULL);
223 if (o)
224 free (offsets);
225 break;
226 case iconveh_question_mark:
228 static const char expected[] = "Rafa? Maszkowski";
229 ASSERT (retval == 0);
230 ASSERT (length == strlen (expected));
231 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
232 if (o)
234 for (i = 0; i < 16; i++)
235 ASSERT (offsets[i] == i);
236 ASSERT (offsets[16] == MAGIC);
237 free (offsets);
239 free (result);
241 break;
242 case iconveh_escape_sequence:
244 static const char expected[] = "Rafa\\u0142 Maszkowski";
245 ASSERT (retval == 0);
246 ASSERT (length == strlen (expected));
247 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
248 if (o)
250 for (i = 0; i < 16; i++)
251 ASSERT (offsets[i] == (i < 5 ? i :
252 i + 5));
253 ASSERT (offsets[16] == MAGIC);
254 free (offsets);
256 free (result);
258 break;
264 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
265 for (h = 0; h < SIZEOF (handlers); h++)
267 enum iconv_ilseq_handler handler = handlers[h];
268 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
269 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
270 for (o = 0; o < 2; o++)
272 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
273 char *result = NULL;
274 size_t length = 0;
275 int retval = mem_cd_iconveh (input, strlen (input),
276 &cdeh_88591_to_utf8,
277 handler,
278 offsets,
279 &result, &length);
280 ASSERT (retval == 0);
281 ASSERT (length == strlen (expected));
282 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
283 if (o)
285 for (i = 0; i < 37; i++)
286 ASSERT (offsets[i] == (i < 1 ? i :
287 i < 12 ? i + 1 :
288 i < 18 ? i + 2 :
289 i + 3));
290 ASSERT (offsets[37] == MAGIC);
291 free (offsets);
293 free (result);
297 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
298 for (h = 0; h < SIZEOF (handlers); h++)
300 enum iconv_ilseq_handler handler = handlers[h];
301 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
302 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
303 for (o = 0; o < 2; o++)
305 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
306 char *result = NULL;
307 size_t length = 0;
308 int retval = mem_cd_iconveh (input, strlen (input),
309 &cdeh_utf8_to_88591,
310 handler,
311 offsets,
312 &result, &length);
313 ASSERT (retval == 0);
314 ASSERT (length == strlen (expected));
315 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
316 if (o)
318 for (i = 0; i < 41; i++)
319 ASSERT (offsets[i] == (i < 1 ? i :
320 i == 1 ? (size_t)(-1) :
321 i < 13 ? i - 1 :
322 i == 13 ? (size_t)(-1) :
323 i < 20 ? i - 2 :
324 i == 20 ? (size_t)(-1) :
325 i < 40 ? i - 3 :
326 (size_t)(-1)));
327 ASSERT (offsets[41] == MAGIC);
328 free (offsets);
330 free (result);
334 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
335 for (h = 0; h < SIZEOF (handlers); h++)
337 enum iconv_ilseq_handler handler = handlers[h];
338 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
339 for (o = 0; o < 2; o++)
341 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
342 char *result = NULL;
343 size_t length = 0;
344 int retval = mem_cd_iconveh (input, strlen (input),
345 &cdeh_ascii_to_utf8,
346 handler,
347 offsets,
348 &result, &length);
349 switch (handler)
351 case iconveh_error:
352 ASSERT (retval == -1 && errno == EILSEQ);
353 ASSERT (result == NULL);
354 if (o)
355 free (offsets);
356 break;
357 case iconveh_question_mark:
358 case iconveh_escape_sequence:
360 static const char expected[] = "Rafa? Maszkowski";
361 ASSERT (retval == 0);
362 ASSERT (length == strlen (expected));
363 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
364 if (o)
366 for (i = 0; i < 16; i++)
367 ASSERT (offsets[i] == i);
368 ASSERT (offsets[16] == MAGIC);
369 free (offsets);
371 free (result);
373 break;
378 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
379 for (h = 0; h < SIZEOF (handlers); h++)
381 enum iconv_ilseq_handler handler = handlers[h];
382 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
383 for (o = 0; o < 2; o++)
385 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
386 char *result = NULL;
387 size_t length = 0;
388 int retval = mem_cd_iconveh (input, strlen (input),
389 &cdeh_utf8_to_88591,
390 handler,
391 offsets,
392 &result, &length);
393 switch (handler)
395 case iconveh_error:
396 ASSERT (retval == -1 && errno == EILSEQ);
397 ASSERT (result == NULL);
398 if (o)
399 free (offsets);
400 break;
401 case iconveh_question_mark:
403 static const char expected[] = "Rafa? Maszkowski";
404 ASSERT (retval == 0);
405 ASSERT (length == strlen (expected));
406 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
407 if (o)
409 for (i = 0; i < 17; i++)
410 ASSERT (offsets[i] == (i < 5 ? i :
411 i == 5 ? (size_t)(-1) :
412 i - 1));
413 ASSERT (offsets[17] == MAGIC);
414 free (offsets);
416 free (result);
418 break;
419 case iconveh_escape_sequence:
421 static const char expected[] = "Rafa\\u0142 Maszkowski";
422 ASSERT (retval == 0);
423 ASSERT (length == strlen (expected));
424 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
425 if (o)
427 for (i = 0; i < 17; i++)
428 ASSERT (offsets[i] == (i < 5 ? i :
429 i == 5 ? (size_t)(-1) :
430 i + 4));
431 ASSERT (offsets[17] == MAGIC);
432 free (offsets);
434 free (result);
436 break;
441 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
442 for (h = 0; h < SIZEOF (handlers); h++)
444 enum iconv_ilseq_handler handler = handlers[h];
445 static const char input[] = "\342";
446 for (o = 0; o < 2; o++)
448 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
449 char *result = NULL;
450 size_t length = 0;
451 int retval = mem_cd_iconveh (input, strlen (input),
452 &cdeh_utf8_to_88591,
453 handler,
454 offsets,
455 &result, &length);
456 ASSERT (retval == 0);
457 ASSERT (length == 0);
458 if (o)
460 ASSERT (offsets[0] == 0);
461 ASSERT (offsets[1] == MAGIC);
462 free (offsets);
464 free (result);
468 if (cd_utf7_to_utf8 != (iconv_t)(-1))
470 /* Disabled on Solaris, because Solaris 9 iconv() is buggy: it returns
471 -1 / EILSEQ when converting the 7th byte of the input "+VDLYP9hA". */
472 # if !(defined __sun && !defined _LIBICONV_VERSION)
473 /* Test conversion from UTF-7 to UTF-8 with EINVAL. */
474 for (h = 0; h < SIZEOF (handlers); h++)
476 enum iconv_ilseq_handler handler = handlers[h];
477 /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
478 convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
479 static const char input[] = "+VDLYP9hA";
480 static const char expected1[] = "\345\220\262"; /* 吲 glibc */
481 static const char expected2[] = ""; /* libiconv */
482 char *result = NULL;
483 size_t length = 0;
484 int retval = mem_cd_iconveh (input, 7,
485 &cdeh_utf7_to_utf8,
486 handler,
487 NULL,
488 &result, &length);
489 ASSERT (retval == 0);
490 ASSERT (length == strlen (expected1) || length == strlen (expected2));
491 ASSERT (result != NULL);
492 if (length == strlen (expected1))
493 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
494 else
495 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
496 free (result);
499 /* Disabled on NetBSD, because NetBSD 5.0 iconv() is buggy: it converts
500 the input "+2D/YQNhB" to U+1FED8 U+3FD8 U+40D8. */
501 # if !(defined __NetBSD__ && !defined _LIBICONV_VERSION)
502 /* Test conversion from UTF-7 to UTF-8 with EILSEQ. */
503 for (h = 0; h < SIZEOF (handlers); h++)
505 enum iconv_ilseq_handler handler = handlers[h];
506 /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
507 convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
508 static const char input[] = "+2D/YQNhB";
509 char *result = NULL;
510 size_t length = 0;
511 int retval = mem_cd_iconveh (input, strlen (input),
512 &cdeh_utf7_to_utf8,
513 handler,
514 NULL,
515 &result, &length);
516 switch (handler)
518 case iconveh_error:
519 ASSERT (retval == -1 && errno == EILSEQ);
520 ASSERT (result == NULL);
521 break;
522 case iconveh_question_mark:
523 case iconveh_escape_sequence:
525 /* glibc result */
526 static const char expected1[] = "?????";
527 /* libiconv <= 1.12 result */
528 static const char expected2[] = "?2D/YQNhB";
529 /* libiconv behaviour changed in version 1.13: the result is
530 '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
531 by 6 bits. */
532 static const char expected3[] = "?\340\277\266\341\200\266";
533 ASSERT (retval == 0);
534 ASSERT (length == strlen (expected1)
535 || length == strlen (expected2)
536 || length == strlen (expected3));
537 ASSERT (result != NULL);
538 if (length == strlen (expected1))
539 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
540 else if (length == strlen (expected2))
541 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
542 else
543 ASSERT (memcmp (result, expected3, strlen (expected3)) == 0);
544 free (result);
546 break;
549 # endif
550 # endif
553 /* ------------------------ Test str_cd_iconveh() ------------------------ */
555 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
556 for (indirect = 0; indirect <= 1; indirect++)
558 for (h = 0; h < SIZEOF (handlers); h++)
560 enum iconv_ilseq_handler handler = handlers[h];
561 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
562 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
563 char *result = str_cd_iconveh (input,
564 (indirect
565 ? &cdeh_88592_to_88591_indirectly
566 : &cdeh_88592_to_88591),
567 handler);
568 ASSERT (result != NULL);
569 ASSERT (strcmp (result, expected) == 0);
570 free (result);
574 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
575 for (indirect = 0; indirect <= 1; indirect++)
577 for (h = 0; h < SIZEOF (handlers); h++)
579 enum iconv_ilseq_handler handler = handlers[h];
580 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
581 char *result = str_cd_iconveh (input,
582 (indirect
583 ? &cdeh_ascii_to_88591_indirectly
584 : &cdeh_ascii_to_88591),
585 handler);
586 switch (handler)
588 case iconveh_error:
589 ASSERT (result == NULL && errno == EILSEQ);
590 break;
591 case iconveh_question_mark:
592 case iconveh_escape_sequence:
594 static const char expected[] = "Rafa? Maszkowski";
595 ASSERT (result != NULL);
596 ASSERT (strcmp (result, expected) == 0);
597 free (result);
599 break;
604 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
605 for (indirect = 0; indirect <= 1; indirect++)
607 for (h = 0; h < SIZEOF (handlers); h++)
609 enum iconv_ilseq_handler handler = handlers[h];
610 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
611 char *result = str_cd_iconveh (input,
612 (indirect
613 ? &cdeh_88592_to_88591_indirectly
614 : &cdeh_88592_to_88591),
615 handler);
616 switch (handler)
618 case iconveh_error:
619 ASSERT (result == NULL && errno == EILSEQ);
620 break;
621 case iconveh_question_mark:
623 static const char expected[] = "Rafa? Maszkowski";
624 ASSERT (result != NULL);
625 ASSERT (strcmp (result, expected) == 0);
626 free (result);
628 break;
629 case iconveh_escape_sequence:
631 static const char expected[] = "Rafa\\u0142 Maszkowski";
632 ASSERT (result != NULL);
633 ASSERT (strcmp (result, expected) == 0);
634 free (result);
636 break;
641 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
642 for (h = 0; h < SIZEOF (handlers); h++)
644 enum iconv_ilseq_handler handler = handlers[h];
645 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
646 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
647 char *result = str_cd_iconveh (input,
648 &cdeh_88591_to_utf8,
649 handler);
650 ASSERT (result != NULL);
651 ASSERT (strcmp (result, expected) == 0);
652 free (result);
655 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
656 for (h = 0; h < SIZEOF (handlers); h++)
658 enum iconv_ilseq_handler handler = handlers[h];
659 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
660 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
661 char *result = str_cd_iconveh (input,
662 &cdeh_utf8_to_88591,
663 handler);
664 ASSERT (result != NULL);
665 ASSERT (strcmp (result, expected) == 0);
666 free (result);
669 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
670 for (h = 0; h < SIZEOF (handlers); h++)
672 enum iconv_ilseq_handler handler = handlers[h];
673 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
674 char *result = str_cd_iconveh (input,
675 &cdeh_ascii_to_utf8,
676 handler);
677 switch (handler)
679 case iconveh_error:
680 ASSERT (result == NULL && errno == EILSEQ);
681 break;
682 case iconveh_question_mark:
683 case iconveh_escape_sequence:
685 static const char expected[] = "Rafa? Maszkowski";
686 ASSERT (result != NULL);
687 ASSERT (strcmp (result, expected) == 0);
688 free (result);
690 break;
694 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
695 for (h = 0; h < SIZEOF (handlers); h++)
697 enum iconv_ilseq_handler handler = handlers[h];
698 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
699 char *result = str_cd_iconveh (input,
700 &cdeh_utf8_to_88591,
701 handler);
702 switch (handler)
704 case iconveh_error:
705 ASSERT (result == NULL && errno == EILSEQ);
706 break;
707 case iconveh_question_mark:
709 static const char expected[] = "Costs: 27 ?";
710 ASSERT (result != NULL);
711 ASSERT (strcmp (result, expected) == 0);
712 free (result);
714 break;
715 case iconveh_escape_sequence:
717 static const char expected[] = "Costs: 27 \\u20AC";
718 ASSERT (result != NULL);
719 ASSERT (strcmp (result, expected) == 0);
720 free (result);
722 break;
726 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
727 for (h = 0; h < SIZEOF (handlers); h++)
729 enum iconv_ilseq_handler handler = handlers[h];
730 static const char input[] = "\342";
731 char *result = str_cd_iconveh (input,
732 &cdeh_utf8_to_88591,
733 handler);
734 ASSERT (result != NULL);
735 ASSERT (strcmp (result, "") == 0);
736 free (result);
739 if (cd_88591_to_88592 != (iconv_t)(-1))
740 iconv_close (cd_88591_to_88592);
741 if (cd_88592_to_88591 != (iconv_t)(-1))
742 iconv_close (cd_88592_to_88591);
743 iconv_close (cd_88591_to_utf8);
744 iconv_close (cd_utf8_to_88591);
745 iconv_close (cd_88592_to_utf8);
746 iconv_close (cd_utf8_to_88592);
748 /* ------------------------- Test mem_iconveh() ------------------------- */
750 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
751 for (h = 0; h < SIZEOF (handlers); h++)
753 enum iconv_ilseq_handler handler = handlers[h];
754 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
755 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
756 for (o = 0; o < 2; o++)
758 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
759 char *result = NULL;
760 size_t length = 0;
761 int retval = mem_iconveh (input, strlen (input),
762 "ISO-8859-2", "ISO-8859-1",
763 handler,
764 offsets,
765 &result, &length);
766 ASSERT (retval == 0);
767 ASSERT (length == strlen (expected));
768 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
769 if (o)
771 for (i = 0; i < 37; i++)
772 ASSERT (offsets[i] == i);
773 ASSERT (offsets[37] == MAGIC);
774 free (offsets);
776 free (result);
780 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
781 for (h = 0; h < SIZEOF (handlers); h++)
783 enum iconv_ilseq_handler handler = handlers[h];
784 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
785 for (o = 0; o < 2; o++)
787 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
788 char *result = NULL;
789 size_t length = 0;
790 int retval = mem_iconveh (input, strlen (input),
791 "ISO-8859-2", "ISO-8859-1",
792 handler,
793 offsets,
794 &result, &length);
795 switch (handler)
797 case iconveh_error:
798 ASSERT (retval == -1 && errno == EILSEQ);
799 ASSERT (result == NULL);
800 if (o)
801 free (offsets);
802 break;
803 case iconveh_question_mark:
805 static const char expected[] = "Rafa? Maszkowski";
806 ASSERT (retval == 0);
807 ASSERT (length == strlen (expected));
808 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
809 if (o)
811 for (i = 0; i < 16; i++)
812 ASSERT (offsets[i] == i);
813 ASSERT (offsets[16] == MAGIC);
814 free (offsets);
816 free (result);
818 break;
819 case iconveh_escape_sequence:
821 static const char expected[] = "Rafa\\u0142 Maszkowski";
822 ASSERT (retval == 0);
823 ASSERT (length == strlen (expected));
824 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
825 if (o)
827 for (i = 0; i < 16; i++)
828 ASSERT (offsets[i] == (i < 5 ? i :
829 i + 5));
830 ASSERT (offsets[16] == MAGIC);
831 free (offsets);
833 free (result);
835 break;
840 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
841 for (h = 0; h < SIZEOF (handlers); h++)
843 enum iconv_ilseq_handler handler = handlers[h];
844 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
845 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
846 for (o = 0; o < 2; o++)
848 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
849 char *result = NULL;
850 size_t length = 0;
851 int retval = mem_iconveh (input, strlen (input),
852 "ISO-8859-1", "UTF-8",
853 handler,
854 offsets,
855 &result, &length);
856 ASSERT (retval == 0);
857 ASSERT (length == strlen (expected));
858 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
859 if (o)
861 for (i = 0; i < 37; i++)
862 ASSERT (offsets[i] == (i < 1 ? i :
863 i < 12 ? i + 1 :
864 i < 18 ? i + 2 :
865 i + 3));
866 ASSERT (offsets[37] == MAGIC);
867 free (offsets);
869 free (result);
873 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
874 for (h = 0; h < SIZEOF (handlers); h++)
876 enum iconv_ilseq_handler handler = handlers[h];
877 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
878 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
879 for (o = 0; o < 2; o++)
881 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
882 char *result = NULL;
883 size_t length = 0;
884 int retval = mem_iconveh (input, strlen (input),
885 "UTF-8", "ISO-8859-1",
886 handler,
887 offsets,
888 &result, &length);
889 ASSERT (retval == 0);
890 ASSERT (length == strlen (expected));
891 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
892 if (o)
894 for (i = 0; i < 41; i++)
895 ASSERT (offsets[i] == (i < 1 ? i :
896 i == 1 ? (size_t)(-1) :
897 i < 13 ? i - 1 :
898 i == 13 ? (size_t)(-1) :
899 i < 20 ? i - 2 :
900 i == 20 ? (size_t)(-1) :
901 i < 40 ? i - 3 :
902 (size_t)(-1)));
903 ASSERT (offsets[41] == MAGIC);
904 free (offsets);
906 free (result);
910 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
911 for (h = 0; h < SIZEOF (handlers); h++)
913 enum iconv_ilseq_handler handler = handlers[h];
914 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
915 for (o = 0; o < 2; o++)
917 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
918 char *result = NULL;
919 size_t length = 0;
920 int retval = mem_iconveh (input, strlen (input),
921 "UTF-8", "ISO-8859-1",
922 handler,
923 offsets,
924 &result, &length);
925 switch (handler)
927 case iconveh_error:
928 ASSERT (retval == -1 && errno == EILSEQ);
929 ASSERT (result == NULL);
930 if (o)
931 free (offsets);
932 break;
933 case iconveh_question_mark:
935 static const char expected[] = "Rafa? Maszkowski";
936 ASSERT (retval == 0);
937 ASSERT (length == strlen (expected));
938 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
939 if (o)
941 for (i = 0; i < 17; i++)
942 ASSERT (offsets[i] == (i < 5 ? i :
943 i == 5 ? (size_t)(-1) :
944 i - 1));
945 ASSERT (offsets[17] == MAGIC);
946 free (offsets);
948 free (result);
950 break;
951 case iconveh_escape_sequence:
953 static const char expected[] = "Rafa\\u0142 Maszkowski";
954 ASSERT (retval == 0);
955 ASSERT (length == strlen (expected));
956 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
957 if (o)
959 for (i = 0; i < 17; i++)
960 ASSERT (offsets[i] == (i < 5 ? i :
961 i == 5 ? (size_t)(-1) :
962 i + 4));
963 ASSERT (offsets[17] == MAGIC);
964 free (offsets);
966 free (result);
968 break;
973 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
974 for (h = 0; h < SIZEOF (handlers); h++)
976 enum iconv_ilseq_handler handler = handlers[h];
977 static const char input[] = "\342";
978 for (o = 0; o < 2; o++)
980 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
981 char *result = NULL;
982 size_t length = 0;
983 int retval = mem_iconveh (input, strlen (input),
984 "UTF-8", "ISO-8859-1",
985 handler,
986 offsets,
987 &result, &length);
988 ASSERT (retval == 0);
989 ASSERT (length == 0);
990 if (o)
992 ASSERT (offsets[0] == 0);
993 ASSERT (offsets[1] == MAGIC);
994 free (offsets);
996 free (result);
1000 /* ------------------------- Test str_iconveh() ------------------------- */
1002 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
1003 for (h = 0; h < SIZEOF (handlers); h++)
1005 enum iconv_ilseq_handler handler = handlers[h];
1006 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1007 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1008 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
1009 ASSERT (result != NULL);
1010 ASSERT (strcmp (result, expected) == 0);
1011 free (result);
1014 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
1015 for (h = 0; h < SIZEOF (handlers); h++)
1017 enum iconv_ilseq_handler handler = handlers[h];
1018 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
1019 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
1020 switch (handler)
1022 case iconveh_error:
1023 ASSERT (result == NULL && errno == EILSEQ);
1024 break;
1025 case iconveh_question_mark:
1027 static const char expected[] = "Rafa? Maszkowski";
1028 ASSERT (result != NULL);
1029 ASSERT (strcmp (result, expected) == 0);
1030 free (result);
1032 break;
1033 case iconveh_escape_sequence:
1035 static const char expected[] = "Rafa\\u0142 Maszkowski";
1036 ASSERT (result != NULL);
1037 ASSERT (strcmp (result, expected) == 0);
1038 free (result);
1040 break;
1044 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
1045 for (h = 0; h < SIZEOF (handlers); h++)
1047 enum iconv_ilseq_handler handler = handlers[h];
1048 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1049 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1050 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
1051 ASSERT (result != NULL);
1052 ASSERT (strcmp (result, expected) == 0);
1053 free (result);
1056 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
1057 for (h = 0; h < SIZEOF (handlers); h++)
1059 enum iconv_ilseq_handler handler = handlers[h];
1060 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1061 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1062 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1063 ASSERT (result != NULL);
1064 ASSERT (strcmp (result, expected) == 0);
1065 free (result);
1068 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1069 for (h = 0; h < SIZEOF (handlers); h++)
1071 enum iconv_ilseq_handler handler = handlers[h];
1072 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
1073 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1074 switch (handler)
1076 case iconveh_error:
1077 ASSERT (result == NULL && errno == EILSEQ);
1078 break;
1079 case iconveh_question_mark:
1081 static const char expected[] = "Costs: 27 ?";
1082 ASSERT (result != NULL);
1083 ASSERT (strcmp (result, expected) == 0);
1084 free (result);
1086 break;
1087 case iconveh_escape_sequence:
1089 static const char expected[] = "Costs: 27 \\u20AC";
1090 ASSERT (result != NULL);
1091 ASSERT (strcmp (result, expected) == 0);
1092 free (result);
1094 break;
1098 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1099 for (h = 0; h < SIZEOF (handlers); h++)
1101 enum iconv_ilseq_handler handler = handlers[h];
1102 static const char input[] = "\342";
1103 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1104 ASSERT (result != NULL);
1105 ASSERT (strcmp (result, "") == 0);
1106 free (result);
1109 #endif
1111 return 0;