1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveh.h"
33 /* Magic number for detecting bounds violations. */
34 #define MAGIC 0x1983EFF1
37 new_offsets (size_t n
)
39 size_t *offsets
= (size_t *) malloc ((n
+ 1) * sizeof (size_t));
47 static enum iconv_ilseq_handler handlers
[] =
48 { iconveh_error
, iconveh_question_mark
, iconveh_escape_sequence
};
55 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
56 ISO-8859-2, and UTF-8. */
57 iconv_t cd_ascii_to_88591
= iconv_open ("ISO-8859-1", "ASCII");
58 iconv_t cd_88591_to_88592
= iconv_open ("ISO-8859-2", "ISO-8859-1");
59 iconv_t cd_88592_to_88591
= iconv_open ("ISO-8859-1", "ISO-8859-2");
60 iconv_t cd_ascii_to_utf8
= iconv_open ("UTF-8", "ASCII");
61 iconv_t cd_88591_to_utf8
= iconv_open ("UTF-8", "ISO-8859-1");
62 iconv_t cd_utf8_to_88591
= iconv_open ("ISO-8859-1", "UTF-8");
63 iconv_t cd_88592_to_utf8
= iconv_open ("UTF-8", "ISO-8859-2");
64 iconv_t cd_utf8_to_88592
= iconv_open ("ISO-8859-2", "UTF-8");
65 iconv_t cd_utf7_to_utf8
= iconv_open ("UTF-8", "UTF-7");
66 iconveh_t cdeh_ascii_to_88591
;
67 iconveh_t cdeh_ascii_to_88591_indirectly
;
68 iconveh_t cdeh_88592_to_88591
;
69 iconveh_t cdeh_88592_to_88591_indirectly
;
70 iconveh_t cdeh_ascii_to_utf8
;
71 iconveh_t cdeh_88591_to_utf8
;
72 iconveh_t cdeh_utf8_to_88591
;
73 iconveh_t cdeh_utf7_to_utf8
;
75 ASSERT (cd_ascii_to_utf8
!= (iconv_t
)(-1));
76 ASSERT (cd_88591_to_utf8
!= (iconv_t
)(-1));
77 ASSERT (cd_utf8_to_88591
!= (iconv_t
)(-1));
78 ASSERT (cd_88592_to_utf8
!= (iconv_t
)(-1));
79 ASSERT (cd_utf8_to_88592
!= (iconv_t
)(-1));
81 cdeh_ascii_to_88591
.cd
= cd_ascii_to_88591
;
82 cdeh_ascii_to_88591
.cd1
= cd_ascii_to_utf8
;
83 cdeh_ascii_to_88591
.cd2
= cd_utf8_to_88591
;
85 cdeh_ascii_to_88591_indirectly
.cd
= (iconv_t
)(-1);
86 cdeh_ascii_to_88591_indirectly
.cd1
= cd_ascii_to_utf8
;
87 cdeh_ascii_to_88591_indirectly
.cd2
= cd_utf8_to_88591
;
89 cdeh_88592_to_88591
.cd
= cd_88592_to_88591
;
90 cdeh_88592_to_88591
.cd1
= cd_88592_to_utf8
;
91 cdeh_88592_to_88591
.cd2
= cd_utf8_to_88591
;
93 cdeh_88592_to_88591_indirectly
.cd
= (iconv_t
)(-1);
94 cdeh_88592_to_88591_indirectly
.cd1
= cd_88592_to_utf8
;
95 cdeh_88592_to_88591_indirectly
.cd2
= cd_utf8_to_88591
;
97 cdeh_ascii_to_utf8
.cd
= cd_ascii_to_utf8
;
98 cdeh_ascii_to_utf8
.cd1
= cd_ascii_to_utf8
;
99 cdeh_ascii_to_utf8
.cd2
= (iconv_t
)(-1);
101 cdeh_88591_to_utf8
.cd
= cd_88591_to_utf8
;
102 cdeh_88591_to_utf8
.cd1
= cd_88591_to_utf8
;
103 cdeh_88591_to_utf8
.cd2
= (iconv_t
)(-1);
105 cdeh_utf8_to_88591
.cd
= cd_utf8_to_88591
;
106 cdeh_utf8_to_88591
.cd1
= (iconv_t
)(-1);
107 cdeh_utf8_to_88591
.cd2
= cd_utf8_to_88591
;
109 cdeh_utf7_to_utf8
.cd
= cd_utf7_to_utf8
;
110 cdeh_utf7_to_utf8
.cd1
= cd_utf7_to_utf8
;
111 cdeh_utf7_to_utf8
.cd2
= (iconv_t
)(-1);
113 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
115 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
116 for (indirect
= 0; indirect
<= 1; indirect
++)
118 for (h
= 0; h
< SIZEOF (handlers
); h
++)
120 enum iconv_ilseq_handler handler
= handlers
[h
];
121 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
122 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
123 for (o
= 0; o
< 2; o
++)
125 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
128 int retval
= mem_cd_iconveh (input
, strlen (input
),
130 ? &cdeh_88592_to_88591_indirectly
131 : &cdeh_88592_to_88591
),
135 ASSERT (retval
== 0);
136 ASSERT (length
== strlen (expected
));
137 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
140 for (i
= 0; i
< 37; i
++)
141 ASSERT (offsets
[i
] == i
);
142 ASSERT (offsets
[37] == MAGIC
);
150 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
151 for (indirect
= 0; indirect
<= 1; indirect
++)
153 for (h
= 0; h
< SIZEOF (handlers
); h
++)
155 enum iconv_ilseq_handler handler
= handlers
[h
];
156 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
157 for (o
= 0; o
< 2; o
++)
159 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
162 int retval
= mem_cd_iconveh (input
, strlen (input
),
164 ? &cdeh_ascii_to_88591_indirectly
165 : &cdeh_ascii_to_88591
),
172 ASSERT (retval
== -1 && errno
== EILSEQ
);
173 ASSERT (result
== NULL
);
177 case iconveh_question_mark
:
178 case iconveh_escape_sequence
:
180 static const char expected
[] = "Rafa? Maszkowski";
181 ASSERT (retval
== 0);
182 ASSERT (length
== strlen (expected
));
183 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
186 for (i
= 0; i
< 16; i
++)
187 ASSERT (offsets
[i
] == i
);
188 ASSERT (offsets
[16] == MAGIC
);
199 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
200 for (indirect
= 0; indirect
<= 1; indirect
++)
202 for (h
= 0; h
< SIZEOF (handlers
); h
++)
204 enum iconv_ilseq_handler handler
= handlers
[h
];
205 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
206 for (o
= 0; o
< 2; o
++)
208 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
211 int retval
= mem_cd_iconveh (input
, strlen (input
),
213 ? &cdeh_88592_to_88591_indirectly
214 : &cdeh_88592_to_88591
),
221 ASSERT (retval
== -1 && errno
== EILSEQ
);
222 ASSERT (result
== NULL
);
226 case iconveh_question_mark
:
228 static const char expected
[] = "Rafa? Maszkowski";
229 ASSERT (retval
== 0);
230 ASSERT (length
== strlen (expected
));
231 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
234 for (i
= 0; i
< 16; i
++)
235 ASSERT (offsets
[i
] == i
);
236 ASSERT (offsets
[16] == MAGIC
);
242 case iconveh_escape_sequence
:
244 static const char expected
[] = "Rafa\\u0142 Maszkowski";
245 ASSERT (retval
== 0);
246 ASSERT (length
== strlen (expected
));
247 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
250 for (i
= 0; i
< 16; i
++)
251 ASSERT (offsets
[i
] == (i
< 5 ? i
:
253 ASSERT (offsets
[16] == MAGIC
);
264 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
265 for (h
= 0; h
< SIZEOF (handlers
); h
++)
267 enum iconv_ilseq_handler handler
= handlers
[h
];
268 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
269 static const char expected
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
270 for (o
= 0; o
< 2; o
++)
272 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
275 int retval
= mem_cd_iconveh (input
, strlen (input
),
280 ASSERT (retval
== 0);
281 ASSERT (length
== strlen (expected
));
282 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
285 for (i
= 0; i
< 37; i
++)
286 ASSERT (offsets
[i
] == (i
< 1 ? i
:
290 ASSERT (offsets
[37] == MAGIC
);
297 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
298 for (h
= 0; h
< SIZEOF (handlers
); h
++)
300 enum iconv_ilseq_handler handler
= handlers
[h
];
301 static const char input
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
302 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
303 for (o
= 0; o
< 2; o
++)
305 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
308 int retval
= mem_cd_iconveh (input
, strlen (input
),
313 ASSERT (retval
== 0);
314 ASSERT (length
== strlen (expected
));
315 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
318 for (i
= 0; i
< 41; i
++)
319 ASSERT (offsets
[i
] == (i
< 1 ? i
:
320 i
== 1 ? (size_t)(-1) :
322 i
== 13 ? (size_t)(-1) :
324 i
== 20 ? (size_t)(-1) :
327 ASSERT (offsets
[41] == MAGIC
);
334 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
335 for (h
= 0; h
< SIZEOF (handlers
); h
++)
337 enum iconv_ilseq_handler handler
= handlers
[h
];
338 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
339 for (o
= 0; o
< 2; o
++)
341 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
344 int retval
= mem_cd_iconveh (input
, strlen (input
),
352 ASSERT (retval
== -1 && errno
== EILSEQ
);
353 ASSERT (result
== NULL
);
357 case iconveh_question_mark
:
358 case iconveh_escape_sequence
:
360 static const char expected
[] = "Rafa? Maszkowski";
361 ASSERT (retval
== 0);
362 ASSERT (length
== strlen (expected
));
363 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
366 for (i
= 0; i
< 16; i
++)
367 ASSERT (offsets
[i
] == i
);
368 ASSERT (offsets
[16] == MAGIC
);
378 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
379 for (h
= 0; h
< SIZEOF (handlers
); h
++)
381 enum iconv_ilseq_handler handler
= handlers
[h
];
382 static const char input
[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
383 for (o
= 0; o
< 2; o
++)
385 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
388 int retval
= mem_cd_iconveh (input
, strlen (input
),
396 ASSERT (retval
== -1 && errno
== EILSEQ
);
397 ASSERT (result
== NULL
);
401 case iconveh_question_mark
:
403 static const char expected
[] = "Rafa? Maszkowski";
404 ASSERT (retval
== 0);
405 ASSERT (length
== strlen (expected
));
406 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
409 for (i
= 0; i
< 17; i
++)
410 ASSERT (offsets
[i
] == (i
< 5 ? i
:
411 i
== 5 ? (size_t)(-1) :
413 ASSERT (offsets
[17] == MAGIC
);
419 case iconveh_escape_sequence
:
421 static const char expected
[] = "Rafa\\u0142 Maszkowski";
422 ASSERT (retval
== 0);
423 ASSERT (length
== strlen (expected
));
424 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
427 for (i
= 0; i
< 17; i
++)
428 ASSERT (offsets
[i
] == (i
< 5 ? i
:
429 i
== 5 ? (size_t)(-1) :
431 ASSERT (offsets
[17] == MAGIC
);
441 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
442 for (h
= 0; h
< SIZEOF (handlers
); h
++)
444 enum iconv_ilseq_handler handler
= handlers
[h
];
445 static const char input
[] = "\342";
446 for (o
= 0; o
< 2; o
++)
448 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
451 int retval
= mem_cd_iconveh (input
, strlen (input
),
456 ASSERT (retval
== 0);
457 ASSERT (length
== 0);
460 ASSERT (offsets
[0] == 0);
461 ASSERT (offsets
[1] == MAGIC
);
468 if (cd_utf7_to_utf8
!= (iconv_t
)(-1))
470 /* Disabled on Solaris, because Solaris 9 iconv() is buggy: it returns
471 -1 / EILSEQ when converting the 7th byte of the input "+VDLYP9hA". */
472 # if !(defined __sun && !defined _LIBICONV_VERSION)
473 /* Test conversion from UTF-7 to UTF-8 with EINVAL. */
474 for (h
= 0; h
< SIZEOF (handlers
); h
++)
476 enum iconv_ilseq_handler handler
= handlers
[h
];
477 /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
478 convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
479 static const char input
[] = "+VDLYP9hA";
480 static const char expected1
[] = "\345\220\262"; /* 吲 glibc */
481 static const char expected2
[] = ""; /* libiconv */
484 int retval
= mem_cd_iconveh (input
, 7,
489 ASSERT (retval
== 0);
490 ASSERT (length
== strlen (expected1
) || length
== strlen (expected2
));
491 ASSERT (result
!= NULL
);
492 if (length
== strlen (expected1
))
493 ASSERT (memcmp (result
, expected1
, strlen (expected1
)) == 0);
495 ASSERT (memcmp (result
, expected2
, strlen (expected2
)) == 0);
499 /* Disabled on NetBSD, because NetBSD 5.0 iconv() is buggy: it converts
500 the input "+2D/YQNhB" to U+1FED8 U+3FD8 U+40D8. */
501 # if !(defined __NetBSD__ && !defined _LIBICONV_VERSION)
502 /* Test conversion from UTF-7 to UTF-8 with EILSEQ. */
503 for (h
= 0; h
< SIZEOF (handlers
); h
++)
505 enum iconv_ilseq_handler handler
= handlers
[h
];
506 /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
507 convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
508 static const char input
[] = "+2D/YQNhB";
511 int retval
= mem_cd_iconveh (input
, strlen (input
),
519 ASSERT (retval
== -1 && errno
== EILSEQ
);
520 ASSERT (result
== NULL
);
522 case iconveh_question_mark
:
523 case iconveh_escape_sequence
:
526 static const char expected1
[] = "?????";
527 /* libiconv <= 1.12 result */
528 static const char expected2
[] = "?2D/YQNhB";
529 /* libiconv behaviour changed in version 1.13: the result is
530 '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
532 static const char expected3
[] = "?\340\277\266\341\200\266";
533 ASSERT (retval
== 0);
534 ASSERT (length
== strlen (expected1
)
535 || length
== strlen (expected2
)
536 || length
== strlen (expected3
));
537 ASSERT (result
!= NULL
);
538 if (length
== strlen (expected1
))
539 ASSERT (memcmp (result
, expected1
, strlen (expected1
)) == 0);
540 else if (length
== strlen (expected2
))
541 ASSERT (memcmp (result
, expected2
, strlen (expected2
)) == 0);
543 ASSERT (memcmp (result
, expected3
, strlen (expected3
)) == 0);
553 /* ------------------------ Test str_cd_iconveh() ------------------------ */
555 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
556 for (indirect
= 0; indirect
<= 1; indirect
++)
558 for (h
= 0; h
< SIZEOF (handlers
); h
++)
560 enum iconv_ilseq_handler handler
= handlers
[h
];
561 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
562 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
563 char *result
= str_cd_iconveh (input
,
565 ? &cdeh_88592_to_88591_indirectly
566 : &cdeh_88592_to_88591
),
568 ASSERT (result
!= NULL
);
569 ASSERT (strcmp (result
, expected
) == 0);
574 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
575 for (indirect
= 0; indirect
<= 1; indirect
++)
577 for (h
= 0; h
< SIZEOF (handlers
); h
++)
579 enum iconv_ilseq_handler handler
= handlers
[h
];
580 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
581 char *result
= str_cd_iconveh (input
,
583 ? &cdeh_ascii_to_88591_indirectly
584 : &cdeh_ascii_to_88591
),
589 ASSERT (result
== NULL
&& errno
== EILSEQ
);
591 case iconveh_question_mark
:
592 case iconveh_escape_sequence
:
594 static const char expected
[] = "Rafa? Maszkowski";
595 ASSERT (result
!= NULL
);
596 ASSERT (strcmp (result
, expected
) == 0);
604 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
605 for (indirect
= 0; indirect
<= 1; indirect
++)
607 for (h
= 0; h
< SIZEOF (handlers
); h
++)
609 enum iconv_ilseq_handler handler
= handlers
[h
];
610 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
611 char *result
= str_cd_iconveh (input
,
613 ? &cdeh_88592_to_88591_indirectly
614 : &cdeh_88592_to_88591
),
619 ASSERT (result
== NULL
&& errno
== EILSEQ
);
621 case iconveh_question_mark
:
623 static const char expected
[] = "Rafa? Maszkowski";
624 ASSERT (result
!= NULL
);
625 ASSERT (strcmp (result
, expected
) == 0);
629 case iconveh_escape_sequence
:
631 static const char expected
[] = "Rafa\\u0142 Maszkowski";
632 ASSERT (result
!= NULL
);
633 ASSERT (strcmp (result
, expected
) == 0);
641 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
642 for (h
= 0; h
< SIZEOF (handlers
); h
++)
644 enum iconv_ilseq_handler handler
= handlers
[h
];
645 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
646 static const char expected
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
647 char *result
= str_cd_iconveh (input
,
650 ASSERT (result
!= NULL
);
651 ASSERT (strcmp (result
, expected
) == 0);
655 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
656 for (h
= 0; h
< SIZEOF (handlers
); h
++)
658 enum iconv_ilseq_handler handler
= handlers
[h
];
659 static const char input
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
660 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
661 char *result
= str_cd_iconveh (input
,
664 ASSERT (result
!= NULL
);
665 ASSERT (strcmp (result
, expected
) == 0);
669 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
670 for (h
= 0; h
< SIZEOF (handlers
); h
++)
672 enum iconv_ilseq_handler handler
= handlers
[h
];
673 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
674 char *result
= str_cd_iconveh (input
,
680 ASSERT (result
== NULL
&& errno
== EILSEQ
);
682 case iconveh_question_mark
:
683 case iconveh_escape_sequence
:
685 static const char expected
[] = "Rafa? Maszkowski";
686 ASSERT (result
!= NULL
);
687 ASSERT (strcmp (result
, expected
) == 0);
694 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
695 for (h
= 0; h
< SIZEOF (handlers
); h
++)
697 enum iconv_ilseq_handler handler
= handlers
[h
];
698 static const char input
[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
699 char *result
= str_cd_iconveh (input
,
705 ASSERT (result
== NULL
&& errno
== EILSEQ
);
707 case iconveh_question_mark
:
709 static const char expected
[] = "Costs: 27 ?";
710 ASSERT (result
!= NULL
);
711 ASSERT (strcmp (result
, expected
) == 0);
715 case iconveh_escape_sequence
:
717 static const char expected
[] = "Costs: 27 \\u20AC";
718 ASSERT (result
!= NULL
);
719 ASSERT (strcmp (result
, expected
) == 0);
726 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
727 for (h
= 0; h
< SIZEOF (handlers
); h
++)
729 enum iconv_ilseq_handler handler
= handlers
[h
];
730 static const char input
[] = "\342";
731 char *result
= str_cd_iconveh (input
,
734 ASSERT (result
!= NULL
);
735 ASSERT (strcmp (result
, "") == 0);
739 if (cd_88591_to_88592
!= (iconv_t
)(-1))
740 iconv_close (cd_88591_to_88592
);
741 if (cd_88592_to_88591
!= (iconv_t
)(-1))
742 iconv_close (cd_88592_to_88591
);
743 iconv_close (cd_88591_to_utf8
);
744 iconv_close (cd_utf8_to_88591
);
745 iconv_close (cd_88592_to_utf8
);
746 iconv_close (cd_utf8_to_88592
);
748 /* ------------------------- Test mem_iconveh() ------------------------- */
750 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
751 for (h
= 0; h
< SIZEOF (handlers
); h
++)
753 enum iconv_ilseq_handler handler
= handlers
[h
];
754 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
755 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
756 for (o
= 0; o
< 2; o
++)
758 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
761 int retval
= mem_iconveh (input
, strlen (input
),
762 "ISO-8859-2", "ISO-8859-1",
766 ASSERT (retval
== 0);
767 ASSERT (length
== strlen (expected
));
768 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
771 for (i
= 0; i
< 37; i
++)
772 ASSERT (offsets
[i
] == i
);
773 ASSERT (offsets
[37] == MAGIC
);
780 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
781 for (h
= 0; h
< SIZEOF (handlers
); h
++)
783 enum iconv_ilseq_handler handler
= handlers
[h
];
784 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
785 for (o
= 0; o
< 2; o
++)
787 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
790 int retval
= mem_iconveh (input
, strlen (input
),
791 "ISO-8859-2", "ISO-8859-1",
798 ASSERT (retval
== -1 && errno
== EILSEQ
);
799 ASSERT (result
== NULL
);
803 case iconveh_question_mark
:
805 static const char expected
[] = "Rafa? Maszkowski";
806 ASSERT (retval
== 0);
807 ASSERT (length
== strlen (expected
));
808 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
811 for (i
= 0; i
< 16; i
++)
812 ASSERT (offsets
[i
] == i
);
813 ASSERT (offsets
[16] == MAGIC
);
819 case iconveh_escape_sequence
:
821 static const char expected
[] = "Rafa\\u0142 Maszkowski";
822 ASSERT (retval
== 0);
823 ASSERT (length
== strlen (expected
));
824 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
827 for (i
= 0; i
< 16; i
++)
828 ASSERT (offsets
[i
] == (i
< 5 ? i
:
830 ASSERT (offsets
[16] == MAGIC
);
840 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
841 for (h
= 0; h
< SIZEOF (handlers
); h
++)
843 enum iconv_ilseq_handler handler
= handlers
[h
];
844 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
845 static const char expected
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
846 for (o
= 0; o
< 2; o
++)
848 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
851 int retval
= mem_iconveh (input
, strlen (input
),
852 "ISO-8859-1", "UTF-8",
856 ASSERT (retval
== 0);
857 ASSERT (length
== strlen (expected
));
858 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
861 for (i
= 0; i
< 37; i
++)
862 ASSERT (offsets
[i
] == (i
< 1 ? i
:
866 ASSERT (offsets
[37] == MAGIC
);
873 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
874 for (h
= 0; h
< SIZEOF (handlers
); h
++)
876 enum iconv_ilseq_handler handler
= handlers
[h
];
877 static const char input
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
878 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
879 for (o
= 0; o
< 2; o
++)
881 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
884 int retval
= mem_iconveh (input
, strlen (input
),
885 "UTF-8", "ISO-8859-1",
889 ASSERT (retval
== 0);
890 ASSERT (length
== strlen (expected
));
891 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
894 for (i
= 0; i
< 41; i
++)
895 ASSERT (offsets
[i
] == (i
< 1 ? i
:
896 i
== 1 ? (size_t)(-1) :
898 i
== 13 ? (size_t)(-1) :
900 i
== 20 ? (size_t)(-1) :
903 ASSERT (offsets
[41] == MAGIC
);
910 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
911 for (h
= 0; h
< SIZEOF (handlers
); h
++)
913 enum iconv_ilseq_handler handler
= handlers
[h
];
914 static const char input
[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
915 for (o
= 0; o
< 2; o
++)
917 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
920 int retval
= mem_iconveh (input
, strlen (input
),
921 "UTF-8", "ISO-8859-1",
928 ASSERT (retval
== -1 && errno
== EILSEQ
);
929 ASSERT (result
== NULL
);
933 case iconveh_question_mark
:
935 static const char expected
[] = "Rafa? Maszkowski";
936 ASSERT (retval
== 0);
937 ASSERT (length
== strlen (expected
));
938 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
941 for (i
= 0; i
< 17; i
++)
942 ASSERT (offsets
[i
] == (i
< 5 ? i
:
943 i
== 5 ? (size_t)(-1) :
945 ASSERT (offsets
[17] == MAGIC
);
951 case iconveh_escape_sequence
:
953 static const char expected
[] = "Rafa\\u0142 Maszkowski";
954 ASSERT (retval
== 0);
955 ASSERT (length
== strlen (expected
));
956 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
959 for (i
= 0; i
< 17; i
++)
960 ASSERT (offsets
[i
] == (i
< 5 ? i
:
961 i
== 5 ? (size_t)(-1) :
963 ASSERT (offsets
[17] == MAGIC
);
973 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
974 for (h
= 0; h
< SIZEOF (handlers
); h
++)
976 enum iconv_ilseq_handler handler
= handlers
[h
];
977 static const char input
[] = "\342";
978 for (o
= 0; o
< 2; o
++)
980 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
983 int retval
= mem_iconveh (input
, strlen (input
),
984 "UTF-8", "ISO-8859-1",
988 ASSERT (retval
== 0);
989 ASSERT (length
== 0);
992 ASSERT (offsets
[0] == 0);
993 ASSERT (offsets
[1] == MAGIC
);
1000 /* ------------------------- Test str_iconveh() ------------------------- */
1002 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
1003 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1005 enum iconv_ilseq_handler handler
= handlers
[h
];
1006 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1007 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1008 char *result
= str_iconveh (input
, "ISO-8859-2", "ISO-8859-1", handler
);
1009 ASSERT (result
!= NULL
);
1010 ASSERT (strcmp (result
, expected
) == 0);
1014 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
1015 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1017 enum iconv_ilseq_handler handler
= handlers
[h
];
1018 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
1019 char *result
= str_iconveh (input
, "ISO-8859-2", "ISO-8859-1", handler
);
1023 ASSERT (result
== NULL
&& errno
== EILSEQ
);
1025 case iconveh_question_mark
:
1027 static const char expected
[] = "Rafa? Maszkowski";
1028 ASSERT (result
!= NULL
);
1029 ASSERT (strcmp (result
, expected
) == 0);
1033 case iconveh_escape_sequence
:
1035 static const char expected
[] = "Rafa\\u0142 Maszkowski";
1036 ASSERT (result
!= NULL
);
1037 ASSERT (strcmp (result
, expected
) == 0);
1044 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
1045 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1047 enum iconv_ilseq_handler handler
= handlers
[h
];
1048 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1049 static const char expected
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1050 char *result
= str_iconveh (input
, "ISO-8859-1", "UTF-8", handler
);
1051 ASSERT (result
!= NULL
);
1052 ASSERT (strcmp (result
, expected
) == 0);
1056 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
1057 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1059 enum iconv_ilseq_handler handler
= handlers
[h
];
1060 static const char input
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1061 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1062 char *result
= str_iconveh (input
, "UTF-8", "ISO-8859-1", handler
);
1063 ASSERT (result
!= NULL
);
1064 ASSERT (strcmp (result
, expected
) == 0);
1068 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1069 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1071 enum iconv_ilseq_handler handler
= handlers
[h
];
1072 static const char input
[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
1073 char *result
= str_iconveh (input
, "UTF-8", "ISO-8859-1", handler
);
1077 ASSERT (result
== NULL
&& errno
== EILSEQ
);
1079 case iconveh_question_mark
:
1081 static const char expected
[] = "Costs: 27 ?";
1082 ASSERT (result
!= NULL
);
1083 ASSERT (strcmp (result
, expected
) == 0);
1087 case iconveh_escape_sequence
:
1089 static const char expected
[] = "Costs: 27 \\u20AC";
1090 ASSERT (result
!= NULL
);
1091 ASSERT (strcmp (result
, expected
) == 0);
1098 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1099 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1101 enum iconv_ilseq_handler handler
= handlers
[h
];
1102 static const char input
[] = "\342";
1103 char *result
= str_iconveh (input
, "UTF-8", "ISO-8859-1", handler
);
1104 ASSERT (result
!= NULL
);
1105 ASSERT (strcmp (result
, "") == 0);