2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "torture/torture.h"
25 #include "system/iconv.h"
26 #include "system/time.h"
27 #include "libcli/raw/libcliraw.h"
28 #include "param/param.h"
29 #include "torture/util.h"
33 static bool iconv_untestable(struct torture_context
*tctx
)
37 if (!lp_parm_bool(tctx
->lp_ctx
, NULL
, "iconv", "native", true))
38 torture_skip(tctx
, "system iconv disabled - skipping test");
40 cd
= iconv_open("UTF-16LE", "UCS-4LE");
41 if (cd
== (iconv_t
)-1)
42 torture_skip(tctx
, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
45 cd
= iconv_open("UTF-16LE", "CP850");
46 if (cd
== (iconv_t
)-1)
47 torture_skip(tctx
, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
54 generate a UTF-16LE buffer for a given unicode codepoint
56 static int gen_codepoint_utf16(unsigned int codepoint
,
57 char *buf
, size_t *size
)
62 size_t size_in
, size_out
, ret
;
64 cd
= iconv_open("UTF-16LE", "UCS-4LE");
65 if (cd
== (iconv_t
)-1) {
71 in
[0] = codepoint
& 0xFF;
72 in
[1] = (codepoint
>>8) & 0xFF;
73 in
[2] = (codepoint
>>16) & 0xFF;
74 in
[3] = (codepoint
>>24) & 0xFF;
80 ret
= iconv(cd
, &ptr_in
, &size_in
, &buf
, &size_out
);
89 work out the unicode codepoint of the first UTF-8 character in the buffer
91 static unsigned int get_codepoint(char *buf
, size_t size
, const char *charset
)
96 size_t size_out
, size_in
, ret
;
98 cd
= iconv_open("UCS-4LE", charset
);
101 ptr_out
= (char *)out
;
102 size_out
= sizeof(out
);
103 memset(out
, 0, sizeof(out
));
105 ret
= iconv(cd
, &buf
, &size_in
, &ptr_out
, &size_out
);
109 return out
[0] | (out
[1]<<8) | (out
[2]<<16) | (out
[3]<<24);
113 display a buffer with name prefix
115 static void show_buf(const char *name
, uint8_t *buf
, size_t size
)
119 for (i
=0;i
<size
;i
++) {
120 printf("%02x ", buf
[i
]);
126 given a UTF-16LE buffer, test the system and built-in iconv code to
127 make sure they do exactly the same thing in converting the buffer to
128 "charset", then convert it back again and ensure we get the same
131 static bool test_buffer(struct torture_context
*test
,
132 uint8_t *inbuf
, size_t size
, const char *charset
)
134 uint8_t buf1
[1000], buf2
[1000], buf3
[1000];
135 size_t outsize1
, outsize2
, outsize3
;
138 size_t size_in1
, size_in2
, size_in3
;
139 size_t ret1
, ret2
, ret3
, len1
, len2
;
142 static smb_iconv_t cd2
, cd3
;
143 static const char *last_charset
;
145 if (cd
&& last_charset
) {
147 smb_iconv_close(cd2
);
148 smb_iconv_close(cd3
);
153 cd
= iconv_open(charset
, "UTF-16LE");
154 if (cd
== (iconv_t
)-1) {
156 talloc_asprintf(test
,
157 "failed to open %s to UTF-16LE",
160 cd2
= smb_iconv_open_ex(test
, charset
, "UTF-16LE", lp_parm_bool(test
->lp_ctx
, NULL
, "iconv", "native", true));
161 cd3
= smb_iconv_open_ex(test
, "UTF-16LE", charset
, lp_parm_bool(test
->lp_ctx
, NULL
, "iconv", "native", true));
162 last_charset
= charset
;
165 /* internal convert to charset - placing result in buf1 */
166 ptr_in
= (const char *)inbuf
;
167 ptr_out
= (char *)buf1
;
169 outsize1
= sizeof(buf1
);
171 memset(ptr_out
, 0, outsize1
);
173 ret1
= smb_iconv(cd2
, &ptr_in
, &size_in1
, &ptr_out
, &outsize1
);
176 /* system convert to charset - placing result in buf2 */
177 ptr_in
= (const char *)inbuf
;
178 ptr_out
= (char *)buf2
;
180 outsize2
= sizeof(buf2
);
182 memset(ptr_out
, 0, outsize2
);
184 ret2
= iconv(cd
, discard_const_p(char *, &ptr_in
), &size_in2
, &ptr_out
, &outsize2
);
187 len1
= sizeof(buf1
) - outsize1
;
188 len2
= sizeof(buf2
) - outsize2
;
190 /* codepoints above 1M are not interesting for now */
192 memcmp(buf1
, buf2
, len1
) == 0 &&
193 get_codepoint((char *)(buf2
+len1
), len2
-len1
, charset
) >= (1<<20)) {
197 memcmp(buf1
, buf2
, len2
) == 0 &&
198 get_codepoint((char *)(buf1
+len2
), len1
-len2
, charset
) >= (1<<20)) {
202 torture_assert_int_equal(test
, ret1
, ret2
, "ret mismatch");
204 if (errno1
!= errno2
) {
205 show_buf(" rem1:", inbuf
+(size
-size_in1
), size_in1
);
206 show_buf(" rem2:", inbuf
+(size
-size_in2
), size_in2
);
207 torture_fail(test
, talloc_asprintf(test
,
209 errno1
, strerror(errno1
),
210 errno2
, strerror(errno2
)));
213 torture_assert_int_equal(test
, outsize1
, outsize2
, "outsize mismatch");
215 torture_assert_int_equal(test
, size_in1
, size_in2
, "size_in mismatch");
218 memcmp(buf1
, buf2
, len1
) != 0) {
219 torture_comment(test
, "size=%d ret1=%d ret2=%d", (int)size
, (int)ret1
, (int)ret2
);
220 show_buf(" IN1:", inbuf
, size
-size_in1
);
221 show_buf(" IN2:", inbuf
, size
-size_in2
);
222 show_buf("OUT1:", buf1
, len1
);
223 show_buf("OUT2:", buf2
, len2
);
224 if (len2
> len1
&& memcmp(buf1
, buf2
, len1
) == 0) {
225 torture_comment(test
, "next codepoint is %u",
226 get_codepoint((char *)(buf2
+len1
), len2
-len1
, charset
));
228 if (len1
> len2
&& memcmp(buf1
, buf2
, len2
) == 0) {
229 torture_comment(test
, "next codepoint is %u",
230 get_codepoint((char *)(buf1
+len2
),len1
-len2
, charset
));
233 torture_fail(test
, "failed");
236 /* convert back to UTF-16, putting result in buf3 */
237 size
= size
- size_in1
;
238 ptr_in
= (const char *)buf1
;
239 ptr_out
= (char *)buf3
;
241 outsize3
= sizeof(buf3
);
243 memset(ptr_out
, 0, outsize3
);
244 ret3
= smb_iconv(cd3
, &ptr_in
, &size_in3
, &ptr_out
, &outsize3
);
246 /* we only internally support the first 1M codepoints */
247 if (outsize3
!= sizeof(buf3
) - size
&&
248 get_codepoint((char *)(inbuf
+sizeof(buf3
) - outsize3
),
249 size
- (sizeof(buf3
) - outsize3
),
250 "UTF-16LE") >= (1<<20)) {
254 torture_assert_int_equal(test
, ret3
, 0, talloc_asprintf(test
,
255 "pull failed - %s", strerror(errno
)));
257 if (strncmp(charset
, "UTF", 3) != 0) {
258 /* don't expect perfect mappings for non UTF charsets */
263 torture_assert_int_equal(test
, outsize3
, sizeof(buf3
) - size
,
266 if (memcmp(buf3
, inbuf
, size
) != 0) {
267 torture_comment(test
, "pull bytes mismatch:");
268 show_buf("inbuf", inbuf
, size
);
269 show_buf(" buf3", buf3
, sizeof(buf3
) - outsize3
);
270 torture_comment(test
, "next codepoint is %u\n",
271 get_codepoint((char *)(inbuf
+sizeof(buf3
) - outsize3
),
272 size
- (sizeof(buf3
) - outsize3
),
274 torture_fail(test
, "");
282 test the push_codepoint() and next_codepoint() functions for a given
285 static bool test_codepoint(struct torture_context
*tctx
, unsigned int codepoint
)
291 size
= push_codepoint(lp_iconv_convenience(tctx
->lp_ctx
), (char *)buf
, codepoint
);
292 torture_assert(tctx
, size
!= -1 || (codepoint
>= 0xd800 && codepoint
<= 0x10000),
293 "Invalid Codepoint range");
295 if (size
== -1) return true;
297 buf
[size
] = random();
298 buf
[size
+1] = random();
299 buf
[size
+2] = random();
300 buf
[size
+3] = random();
302 c
= next_codepoint_convenience(lp_iconv_convenience(tctx
->lp_ctx
), (char *)buf
, &size2
);
304 torture_assert(tctx
, c
== codepoint
,
305 talloc_asprintf(tctx
,
306 "next_codepoint(%u) failed - gave %u", codepoint
, c
));
308 torture_assert(tctx
, size2
== size
,
309 talloc_asprintf(tctx
, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
310 codepoint
, (int)size2
, (int)size
));
315 static bool test_next_codepoint(struct torture_context
*tctx
)
317 unsigned int codepoint
;
318 if (iconv_untestable(tctx
))
321 for (codepoint
=0;codepoint
<(1<<20);codepoint
++) {
322 if (!test_codepoint(tctx
, codepoint
))
328 static bool test_first_1m(struct torture_context
*tctx
)
330 unsigned int codepoint
;
332 unsigned char inbuf
[1000];
334 if (iconv_untestable(tctx
))
337 for (codepoint
=0;codepoint
<(1<<20);codepoint
++) {
338 if (gen_codepoint_utf16(codepoint
, (char *)inbuf
, &size
) != 0) {
342 if (codepoint
% 1000 == 0) {
343 if (torture_setting_bool(tctx
, "progress", true)) {
344 torture_comment(tctx
, "codepoint=%u \r", codepoint
);
349 if (!test_buffer(tctx
, inbuf
, size
, "UTF-8"))
355 static bool test_random_5m(struct torture_context
*tctx
)
357 unsigned char inbuf
[1000];
360 if (iconv_untestable(tctx
))
363 for (i
=0;i
<500000;i
++) {
368 if (torture_setting_bool(tctx
, "progress", true)) {
369 torture_comment(tctx
, "i=%u \r", i
);
374 size
= random() % 100;
375 for (c
=0;c
<size
;c
++) {
376 if (random() % 100 < 80) {
377 inbuf
[c
] = random() % 128;
381 if (random() % 10 == 0) {
384 if (random() % 10 == 0) {
388 if (!test_buffer(tctx
, inbuf
, size
, "UTF-8")) {
389 printf("i=%d failed UTF-8\n", i
);
393 if (!test_buffer(tctx
, inbuf
, size
, "CP850")) {
394 printf("i=%d failed CP850\n", i
);
402 static bool test_string2key(struct torture_context
*tctx
)
406 TALLOC_CTX
*mem_ctx
= talloc_new(tctx
);
408 size_t len
= (random()%1000)+1;
409 const uint16_t in1
[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
414 const char *correct
= "a\357\277\275b\357\277\275c\001defg";
416 buf
= talloc_size(mem_ctx
, len
*2);
417 generate_random_buffer((uint8_t *)buf
, len
*2);
419 torture_comment(tctx
, "converting random buffer\n");
421 ret
= convert_string_talloc(mem_ctx
, CH_UTF16MUNGED
, CH_UTF8
, (void *)buf
, len
*2, (void**)&dest
, false);
423 torture_fail(tctx
, "Failed to convert random buffer\n");
427 SSVAL(&le1
[2*i
], 0, in1
[i
]);
430 torture_comment(tctx
, "converting fixed buffer to UTF16\n");
432 ret
= convert_string_talloc(mem_ctx
, CH_UTF16MUNGED
, CH_UTF16
, (void *)le1
, 20, (void**)&munged1
, false);
434 torture_fail(tctx
, "Failed to convert fixed buffer to UTF16_MUNGED\n");
437 torture_assert(tctx
, ret
== 20, "conversion should give 20 bytes\n");
439 torture_comment(tctx
, "converting fixed buffer to UTF8\n");
441 ret
= convert_string_talloc(mem_ctx
, CH_UTF16MUNGED
, CH_UTF8
, (void *)le1
, 20, (void**)&out1
, false);
443 torture_fail(tctx
, "Failed to convert fixed buffer to UTF8\n");
446 torture_assert(tctx
, strcmp(correct
, out1
) == 0, "conversion gave incorrect result\n");
448 talloc_free(mem_ctx
);
453 struct torture_suite
*torture_local_iconv(TALLOC_CTX
*mem_ctx
)
455 struct torture_suite
*suite
= torture_suite_create(mem_ctx
, "ICONV");
457 torture_suite_add_simple_test(suite
, "string2key",
460 torture_suite_add_simple_test(suite
, "next_codepoint()",
461 test_next_codepoint
);
463 torture_suite_add_simple_test(suite
, "first 1M codepoints",
466 torture_suite_add_simple_test(suite
, "5M random UTF-16LE sequences",
469 torture_suite_add_simple_test(suite
, "string2key",
476 struct torture_suite
*torture_local_iconv(TALLOC_CTX
*mem_ctx
)
478 printf("No native iconv library - can't run iconv test\n");