2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
33 generate a UTF-16LE buffer for a given unicode codepoint
35 static int gen_codepoint_utf16(unsigned int codepoint
,
36 char *buf
, size_t *size
)
41 size_t size_in
, size_out
, ret
;
43 cd
= iconv_open("UTF-16LE", "UCS-4LE");
44 if (cd
== (iconv_t
)-1) {
50 in
[0] = codepoint
& 0xFF;
51 in
[1] = (codepoint
>>8) & 0xFF;
52 in
[2] = (codepoint
>>16) & 0xFF;
53 in
[3] = (codepoint
>>24) & 0xFF;
59 ret
= iconv(cd
, &ptr_in
, &size_in
, &buf
, &size_out
);
68 work out the unicode codepoint of the first UTF-8 character in the buffer
70 static unsigned int get_codepoint(char *buf
, size_t size
, const char *charset
)
75 size_t size_out
, size_in
, ret
;
77 cd
= iconv_open("UCS-4LE", charset
);
80 ptr_out
= (char *)out
;
81 size_out
= sizeof(out
);
82 memset(out
, 0, sizeof(out
));
84 ret
= iconv(cd
, &buf
, &size_in
, &ptr_out
, &size_out
);
88 return out
[0] | (out
[1]<<8) | (out
[2]<<16) | (out
[3]<<24);
92 display a buffer with name prefix
94 static void show_buf(const char *name
, uint8_t *buf
, size_t size
)
98 for (i
=0;i
<size
;i
++) {
99 printf("%02x ", buf
[i
]);
105 given a UTF-16LE buffer, test the system and built-in iconv code to
106 make sure they do exactly the same thing in converting the buffer to
107 "charset", then convert it back again and ensure we get the same
110 static bool test_buffer(struct torture_context
*test
,
111 uint8_t *inbuf
, size_t size
, const char *charset
)
113 uint8_t buf1
[1000], buf2
[1000], buf3
[1000];
114 size_t outsize1
, outsize2
, outsize3
;
117 size_t size_in1
, size_in2
, size_in3
;
118 size_t ret1
, ret2
, ret3
, len1
, len2
;
121 static smb_iconv_t cd2
, cd3
;
122 static const char *last_charset
;
124 if (cd
&& last_charset
) {
126 smb_iconv_close(cd2
);
127 smb_iconv_close(cd3
);
132 cd
= iconv_open(charset
, "UTF-16LE");
133 if (cd
== (iconv_t
)-1) {
135 talloc_asprintf(test
,
136 "failed to open %s to UTF-16LE",
139 cd2
= smb_iconv_open(charset
, "UTF-16LE");
140 cd3
= smb_iconv_open("UTF-16LE", charset
);
141 last_charset
= charset
;
144 /* internal convert to charset - placing result in buf1 */
145 ptr_in
= (const char *)inbuf
;
146 ptr_out
= (char *)buf1
;
148 outsize1
= sizeof(buf1
);
150 memset(ptr_out
, 0, outsize1
);
152 ret1
= smb_iconv(cd2
, &ptr_in
, &size_in1
, &ptr_out
, &outsize1
);
155 /* system convert to charset - placing result in buf2 */
156 ptr_in
= (const char *)inbuf
;
157 ptr_out
= (char *)buf2
;
159 outsize2
= sizeof(buf2
);
161 memset(ptr_out
, 0, outsize2
);
163 ret2
= iconv(cd
, discard_const_p(char *, &ptr_in
), &size_in2
, &ptr_out
, &outsize2
);
166 len1
= sizeof(buf1
) - outsize1
;
167 len2
= sizeof(buf2
) - outsize2
;
169 /* codepoints above 1M are not interesting for now */
171 memcmp(buf1
, buf2
, len1
) == 0 &&
172 get_codepoint((char *)(buf2
+len1
), len2
-len1
, charset
) >= (1<<20)) {
176 memcmp(buf1
, buf2
, len2
) == 0 &&
177 get_codepoint((char *)(buf1
+len2
), len1
-len2
, charset
) >= (1<<20)) {
181 torture_assert_int_equal(test
, ret1
, ret2
, "ret mismatch");
183 if (errno1
!= errno2
) {
184 show_buf(" rem1:", inbuf
+(size
-size_in1
), size_in1
);
185 show_buf(" rem2:", inbuf
+(size
-size_in2
), size_in2
);
186 torture_fail(test
, talloc_asprintf(test
,
188 errno1
, strerror(errno1
),
189 errno2
, strerror(errno2
)));
192 torture_assert_int_equal(test
, outsize1
, outsize2
, "outsize mismatch");
194 torture_assert_int_equal(test
, size_in1
, size_in2
, "size_in mismatch");
197 memcmp(buf1
, buf2
, len1
) != 0) {
198 torture_comment(test
, "size=%d ret1=%d ret2=%d", (int)size
, (int)ret1
, (int)ret2
);
199 show_buf(" IN1:", inbuf
, size
-size_in1
);
200 show_buf(" IN2:", inbuf
, size
-size_in2
);
201 show_buf("OUT1:", buf1
, len1
);
202 show_buf("OUT2:", buf2
, len2
);
203 if (len2
> len1
&& memcmp(buf1
, buf2
, len1
) == 0) {
204 torture_comment(test
, "next codepoint is %u",
205 get_codepoint((char *)(buf2
+len1
), len2
-len1
, charset
));
207 if (len1
> len2
&& memcmp(buf1
, buf2
, len2
) == 0) {
208 torture_comment(test
, "next codepoint is %u",
209 get_codepoint((char *)(buf1
+len2
),len1
-len2
, charset
));
212 torture_fail(test
, "failed");
215 /* convert back to UTF-16, putting result in buf3 */
216 size
= size
- size_in1
;
217 ptr_in
= (const char *)buf1
;
218 ptr_out
= (char *)buf3
;
220 outsize3
= sizeof(buf3
);
222 memset(ptr_out
, 0, outsize3
);
223 ret3
= smb_iconv(cd3
, &ptr_in
, &size_in3
, &ptr_out
, &outsize3
);
225 /* we only internally support the first 1M codepoints */
226 if (outsize3
!= sizeof(buf3
) - size
&&
227 get_codepoint((char *)(inbuf
+sizeof(buf3
) - outsize3
),
228 size
- (sizeof(buf3
) - outsize3
),
229 "UTF-16LE") >= (1<<20)) {
233 torture_assert_int_equal(test
, ret3
, 0, talloc_asprintf(test
,
234 "pull failed - %s", strerror(errno
)));
236 if (strncmp(charset
, "UTF", 3) != 0) {
237 /* don't expect perfect mappings for non UTF charsets */
242 torture_assert_int_equal(test
, outsize3
, sizeof(buf3
) - size
,
245 if (memcmp(buf3
, inbuf
, size
) != 0) {
246 torture_comment(test
, "pull bytes mismatch:");
247 show_buf("inbuf", inbuf
, size
);
248 show_buf(" buf3", buf3
, sizeof(buf3
) - outsize3
);
249 torture_comment(test
, "next codepoint is %u\n",
250 get_codepoint((char *)(inbuf
+sizeof(buf3
) - outsize3
),
251 size
- (sizeof(buf3
) - outsize3
),
253 torture_fail(test
, "");
261 test the push_codepoint() and next_codepoint() functions for a given
264 static bool test_codepoint(struct torture_context
*tctx
, unsigned int codepoint
)
270 size
= push_codepoint((char *)buf
, codepoint
);
271 torture_assert(tctx
, size
!= -1 || (codepoint
>= 0xd800 && codepoint
<= 0x10000),
272 "Invalid Codepoint range");
274 if (size
== -1) return true;
276 buf
[size
] = random();
277 buf
[size
+1] = random();
278 buf
[size
+2] = random();
279 buf
[size
+3] = random();
281 c
= next_codepoint((char *)buf
, &size2
);
283 torture_assert(tctx
, c
== codepoint
,
284 talloc_asprintf(tctx
,
285 "next_codepoint(%u) failed - gave %u", codepoint
, c
));
287 torture_assert(tctx
, size2
== size
,
288 talloc_asprintf(tctx
, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
289 codepoint
, (int)size2
, (int)size
));
294 static bool test_next_codepoint(struct torture_context
*tctx
)
296 unsigned int codepoint
;
297 for (codepoint
=0;codepoint
<(1<<20);codepoint
++) {
298 if (!test_codepoint(tctx
, codepoint
))
304 static bool test_first_1m(struct torture_context
*tctx
)
306 unsigned int codepoint
;
308 unsigned char inbuf
[1000];
310 for (codepoint
=0;codepoint
<(1<<20);codepoint
++) {
311 if (gen_codepoint_utf16(codepoint
, (char *)inbuf
, &size
) != 0) {
315 if (codepoint
% 1000 == 0) {
316 if (torture_setting_bool(tctx
, "progress", True
)) {
317 torture_comment(tctx
, "codepoint=%u \r", codepoint
);
322 if (!test_buffer(tctx
, inbuf
, size
, "UTF-8"))
328 static bool test_random_5m(struct torture_context
*tctx
)
330 unsigned char inbuf
[1000];
332 for (i
=0;i
<500000;i
++) {
337 if (torture_setting_bool(tctx
, "progress", true)) {
338 torture_comment(tctx
, "i=%u \r", i
);
343 size
= random() % 100;
344 for (c
=0;c
<size
;c
++) {
345 if (random() % 100 < 80) {
346 inbuf
[c
] = random() % 128;
350 if (random() % 10 == 0) {
353 if (random() % 10 == 0) {
357 if (!test_buffer(tctx
, inbuf
, size
, "UTF-8")) {
358 printf("i=%d failed UTF-8\n", i
);
362 if (!test_buffer(tctx
, inbuf
, size
, "CP850")) {
363 printf("i=%d failed CP850\n", i
);
370 struct torture_suite
*torture_local_iconv(TALLOC_CTX
*mem_ctx
)
373 struct torture_suite
*suite
= torture_suite_create(mem_ctx
, "ICONV");
375 if (!lp_parm_bool(-1, "iconv", "native", True
)) {
376 printf("system iconv disabled - skipping test\n");
380 cd
= iconv_open("UTF-16LE", "UCS-4LE");
381 if (cd
== (iconv_t
)-1) {
382 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
387 cd
= iconv_open("UTF-16LE", "CP850");
388 if (cd
== (iconv_t
)-1) {
389 printf("unable to test - system iconv library does not support UTF-16LE -> CP850\n");
396 torture_suite_add_simple_test(suite
, "next_codepoint()",
397 test_next_codepoint
);
399 torture_suite_add_simple_test(suite
, "first 1M codepoints",
402 torture_suite_add_simple_test(suite
, "5M random UTF-16LE sequences",
409 struct torture_suite
*torture_local_iconv(TALLOC_CTX
*mem_ctx
)
411 printf("No native iconv library - can't run iconv test\n");