2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "torture/torture.h"
25 #include "system/iconv.h"
26 #include "system/time.h"
27 #include "libcli/raw/libcliraw.h"
28 #include "param/param.h"
29 #include "torture/util.h"
34 static bool iconv_untestable(struct torture_context
*tctx
)
38 cd
= iconv_open("UTF-16LE", "UCS-4LE");
39 if (cd
== (iconv_t
)-1)
40 torture_skip(tctx
, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
43 cd
= iconv_open("UTF-16LE", "CP850");
44 if (cd
== (iconv_t
)-1)
45 torture_skip(tctx
, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
52 generate a UTF-16LE buffer for a given unicode codepoint
54 static int gen_codepoint_utf16(unsigned int codepoint
,
55 char *buf
, size_t *size
)
60 size_t size_in
, size_out
, ret
;
62 cd
= iconv_open("UTF-16LE", "UCS-4LE");
63 if (cd
== (iconv_t
)-1) {
69 in
[0] = codepoint
& 0xFF;
70 in
[1] = (codepoint
>>8) & 0xFF;
71 in
[2] = (codepoint
>>16) & 0xFF;
72 in
[3] = (codepoint
>>24) & 0xFF;
78 ret
= iconv(cd
, &ptr_in
, &size_in
, &buf
, &size_out
);
87 work out the unicode codepoint of the first UTF-8 character in the buffer
89 static unsigned int get_codepoint(char *buf
, size_t size
, const char *charset
)
94 size_t size_out
, size_in
, ret
;
96 cd
= iconv_open("UCS-4LE", charset
);
99 ptr_out
= (char *)out
;
100 size_out
= sizeof(out
);
101 memset(out
, 0, sizeof(out
));
103 ret
= iconv(cd
, &buf
, &size_in
, &ptr_out
, &size_out
);
107 return out
[0] | (out
[1]<<8) | (out
[2]<<16) | (out
[3]<<24);
111 display a buffer with name prefix
113 static void show_buf(const char *name
, uint8_t *buf
, size_t size
)
117 for (i
=0;i
<size
;i
++) {
118 printf("%02x ", buf
[i
]);
124 given a UTF-16LE buffer, test the system and built-in iconv code to
125 make sure they do exactly the same thing in converting the buffer to
126 "charset", then convert it back again and ensure we get the same
129 static bool test_buffer(struct torture_context
*test
,
130 uint8_t *inbuf
, size_t size
, const char *charset
)
132 uint8_t buf1
[1000], buf2
[1000], buf3
[1000];
133 size_t outsize1
, outsize2
, outsize3
;
136 size_t size_in1
, size_in2
, size_in3
;
137 size_t ret1
, ret2
, ret3
, len1
, len2
;
140 static smb_iconv_t cd2
, cd3
;
141 static const char *last_charset
;
143 if (cd
&& last_charset
) {
145 smb_iconv_close(cd2
);
146 smb_iconv_close(cd3
);
151 cd
= iconv_open(charset
, "UTF-16LE");
152 if (cd
== (iconv_t
)-1) {
154 talloc_asprintf(test
,
155 "failed to open %s to UTF-16LE",
158 cd2
= smb_iconv_open_ex(test
, charset
, "UTF-16LE", lpcfg_parm_bool(test
->lp_ctx
, NULL
, "iconv", "use_builtin_handlers", true));
159 if (cd2
== (iconv_t
)-1) {
161 talloc_asprintf(test
,
162 "failed to open %s to UTF-16LE via smb_iconv_open_ex",
165 cd3
= smb_iconv_open_ex(test
, "UTF-16LE", charset
, lpcfg_parm_bool(test
->lp_ctx
, NULL
, "iconv", "use_builtin_handlers", true));
166 if (cd3
== (iconv_t
)-1) {
168 talloc_asprintf(test
,
169 "failed to open UTF-16LE to %s via smb_iconv_open_ex",
172 last_charset
= charset
;
175 /* internal convert to charset - placing result in buf1 */
176 ptr_in
= (char *)inbuf
;
177 ptr_out
= (char *)buf1
;
179 outsize1
= sizeof(buf1
);
181 memset(ptr_out
, 0, outsize1
);
183 ret1
= smb_iconv(cd2
, (const char **) &ptr_in
, &size_in1
, &ptr_out
, &outsize1
);
186 /* system convert to charset - placing result in buf2 */
187 ptr_in
= (char *)inbuf
;
188 ptr_out
= (char *)buf2
;
190 outsize2
= sizeof(buf2
);
192 memset(ptr_out
, 0, outsize2
);
194 ret2
= iconv(cd
, &ptr_in
, &size_in2
, &ptr_out
, &outsize2
);
197 len1
= sizeof(buf1
) - outsize1
;
198 len2
= sizeof(buf2
) - outsize2
;
200 /* codepoints above 1M are not interesting for now */
202 memcmp(buf1
, buf2
, len1
) == 0 &&
203 get_codepoint((char *)(buf2
+len1
), len2
-len1
, charset
) >= (1<<20)) {
207 memcmp(buf1
, buf2
, len2
) == 0 &&
208 get_codepoint((char *)(buf1
+len2
), len1
-len2
, charset
) >= (1<<20)) {
212 torture_assert_int_equal(test
, ret1
, ret2
, "ret mismatch");
214 if (errno1
!= errno2
) {
215 show_buf(" rem1:", inbuf
+(size
-size_in1
), size_in1
);
216 show_buf(" rem2:", inbuf
+(size
-size_in2
), size_in2
);
217 torture_fail(test
, talloc_asprintf(test
,
218 "errno mismatch with %s internal=%d/%s system=%d/%s",
220 errno1
, strerror(errno1
),
221 errno2
, strerror(errno2
)));
224 torture_assert_int_equal(test
, outsize1
, outsize2
, "outsize mismatch");
226 torture_assert_int_equal(test
, size_in1
, size_in2
, "size_in mismatch");
229 memcmp(buf1
, buf2
, len1
) != 0) {
230 torture_comment(test
, "size=%d ret1=%d ret2=%d", (int)size
, (int)ret1
, (int)ret2
);
231 show_buf(" IN1:", inbuf
, size
-size_in1
);
232 show_buf(" IN2:", inbuf
, size
-size_in2
);
233 show_buf("OUT1:", buf1
, len1
);
234 show_buf("OUT2:", buf2
, len2
);
235 if (len2
> len1
&& memcmp(buf1
, buf2
, len1
) == 0) {
236 torture_comment(test
, "next codepoint is %u",
237 get_codepoint((char *)(buf2
+len1
), len2
-len1
, charset
));
239 if (len1
> len2
&& memcmp(buf1
, buf2
, len2
) == 0) {
240 torture_comment(test
, "next codepoint is %u",
241 get_codepoint((char *)(buf1
+len2
),len1
-len2
, charset
));
244 torture_fail(test
, "failed");
247 /* convert back to UTF-16, putting result in buf3 */
248 size
= size
- size_in1
;
249 ptr_in
= (char *)buf1
;
250 ptr_out
= (char *)buf3
;
252 outsize3
= sizeof(buf3
);
254 memset(ptr_out
, 0, outsize3
);
255 ret3
= smb_iconv(cd3
, (const char **) &ptr_in
, &size_in3
, &ptr_out
, &outsize3
);
257 /* we only internally support the first 1M codepoints */
258 if (outsize3
!= sizeof(buf3
) - size
&&
259 get_codepoint((char *)(inbuf
+sizeof(buf3
) - outsize3
),
260 size
- (sizeof(buf3
) - outsize3
),
261 "UTF-16LE") >= (1<<20)) {
265 torture_assert_int_equal(test
, ret3
, 0, talloc_asprintf(test
,
266 "pull failed - %s", strerror(errno
)));
268 if (strncmp(charset
, "UTF", 3) != 0) {
269 /* don't expect perfect mappings for non UTF charsets */
274 torture_assert_int_equal(test
, outsize3
, sizeof(buf3
) - size
,
277 if (memcmp(buf3
, inbuf
, size
) != 0) {
278 torture_comment(test
, "pull bytes mismatch:");
279 show_buf("inbuf", inbuf
, size
);
280 show_buf(" buf3", buf3
, sizeof(buf3
) - outsize3
);
281 torture_comment(test
, "next codepoint is %u\n",
282 get_codepoint((char *)(inbuf
+sizeof(buf3
) - outsize3
),
283 size
- (sizeof(buf3
) - outsize3
),
285 torture_fail(test
, "");
293 test the push_codepoint() and next_codepoint() functions for a given
296 static bool test_codepoint(struct torture_context
*tctx
, unsigned int codepoint
)
302 size
= push_codepoint_handle(lpcfg_iconv_handle(tctx
->lp_ctx
), (char *)buf
, codepoint
);
303 torture_assert(tctx
, size
!= -1 || (codepoint
>= 0xd800 && codepoint
<= 0x10000),
304 "Invalid Codepoint range");
306 if (size
== -1) return true;
308 buf
[size
] = random();
309 buf
[size
+1] = random();
310 buf
[size
+2] = random();
311 buf
[size
+3] = random();
313 c
= next_codepoint_handle(lpcfg_iconv_handle(tctx
->lp_ctx
), (char *)buf
, &size2
);
315 torture_assert(tctx
, c
== codepoint
,
316 talloc_asprintf(tctx
,
317 "next_codepoint(%u) failed - gave %u", codepoint
, c
));
319 torture_assert(tctx
, size2
== size
,
320 talloc_asprintf(tctx
, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
321 codepoint
, (int)size2
, (int)size
));
326 static bool test_next_codepoint(struct torture_context
*tctx
)
328 unsigned int codepoint
;
329 if (iconv_untestable(tctx
))
332 for (codepoint
=0;codepoint
<(1<<20);codepoint
++) {
333 if (!test_codepoint(tctx
, codepoint
))
339 static bool test_first_1m(struct torture_context
*tctx
)
341 unsigned int codepoint
;
343 unsigned char inbuf
[1000];
345 if (iconv_untestable(tctx
))
348 for (codepoint
=0;codepoint
<(1<<20);codepoint
++) {
349 if (gen_codepoint_utf16(codepoint
, (char *)inbuf
, &size
) != 0) {
353 if (codepoint
% 1000 == 0) {
354 if (torture_setting_bool(tctx
, "progress", true)) {
355 torture_comment(tctx
, "codepoint=%u \r", codepoint
);
360 if (!test_buffer(tctx
, inbuf
, size
, "UTF-8"))
366 static bool test_random_5m(struct torture_context
*tctx
)
368 unsigned char inbuf
[1000];
371 if (iconv_untestable(tctx
))
374 for (i
=0;i
<500000;i
++) {
379 if (torture_setting_bool(tctx
, "progress", true)) {
380 torture_comment(tctx
, "i=%u \r", i
);
385 size
= random() % 100;
386 for (c
=0;c
<size
;c
++) {
387 if (random() % 100 < 80) {
388 inbuf
[c
] = random() % 128;
392 if (random() % 10 == 0) {
395 if (random() % 10 == 0) {
399 if (!test_buffer(tctx
, inbuf
, size
, "UTF-8")) {
400 printf("i=%d failed UTF-8\n", i
);
404 if (!test_buffer(tctx
, inbuf
, size
, "CP850")) {
405 printf("i=%d failed CP850\n", i
);
413 static bool test_string2key(struct torture_context
*tctx
)
417 TALLOC_CTX
*mem_ctx
= talloc_new(tctx
);
418 size_t len
= (random()%1000)+1;
419 const uint16_t in1
[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
425 const char *correct
= "a\357\277\275b\357\277\275c\001defg";
427 buf
= talloc_size(mem_ctx
, len
*2);
428 generate_random_buffer((uint8_t *)buf
, len
*2);
430 torture_comment(tctx
, "converting random buffer\n");
432 if (!convert_string_talloc(mem_ctx
, CH_UTF16MUNGED
, CH_UTF8
, (void *)buf
, len
*2, (void**)&dest
, &ret
)) {
433 torture_fail(tctx
, "Failed to convert random buffer\n");
437 SSVAL(&le1
[2*i
], 0, in1
[i
]);
440 torture_comment(tctx
, "converting fixed buffer to UTF16\n");
442 if (!convert_string_talloc(mem_ctx
, CH_UTF16MUNGED
, CH_UTF16
, (void *)le1
, 20, (void**)&munged1
, &ret
)) {
443 torture_fail(tctx
, "Failed to convert fixed buffer to UTF16_MUNGED\n");
446 torture_assert(tctx
, ret
== 20, "conversion should give 20 bytes\n");
448 torture_comment(tctx
, "converting fixed buffer to UTF8\n");
450 if (!convert_string_talloc(mem_ctx
, CH_UTF16MUNGED
, CH_UTF8
, (void *)le1
, 20, (void**)&out1
, &ret
)) {
451 torture_fail(tctx
, "Failed to convert fixed buffer to UTF8\n");
454 torture_assert(tctx
, strcmp(correct
, (const char *) out1
) == 0,
455 "conversion gave incorrect result\n");
457 talloc_free(mem_ctx
);
462 struct torture_suite
*torture_local_iconv(TALLOC_CTX
*mem_ctx
)
464 struct torture_suite
*suite
= torture_suite_create(mem_ctx
, "iconv");
466 torture_suite_add_simple_test(suite
, "string2key",
469 torture_suite_add_simple_test(suite
, "next_codepoint()",
470 test_next_codepoint
);
472 torture_suite_add_simple_test(suite
, "first 1M codepoints",
475 torture_suite_add_simple_test(suite
, "5M random UTF-16LE sequences",
478 torture_suite_add_simple_test(suite
, "string2key",
485 struct torture_suite
*torture_local_iconv(TALLOC_CTX
*mem_ctx
)
487 printf("No native iconv library - can't run iconv test\n");