librpc: Shorten dcerpc_binding_handle_call a bit
[Samba/gebeck_regimport.git] / lib / util / charset / tests / iconv.c
blob670454101619ef1e13a39aa457d51312e5c11a34
1 /*
2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
24 #include "torture/torture.h"
25 #include "system/iconv.h"
26 #include "system/time.h"
27 #include "libcli/raw/libcliraw.h"
28 #include "param/param.h"
29 #include "torture/util.h"
30 #include "talloc.h"
32 #if HAVE_NATIVE_ICONV
34 static bool iconv_untestable(struct torture_context *tctx)
36 iconv_t cd;
38 cd = iconv_open("UTF-16LE", "UCS-4LE");
39 if (cd == (iconv_t)-1)
40 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
41 iconv_close(cd);
43 cd = iconv_open("UTF-16LE", "CP850");
44 if (cd == (iconv_t)-1)
45 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
46 iconv_close(cd);
48 return false;
52 generate a UTF-16LE buffer for a given unicode codepoint
54 static int gen_codepoint_utf16(unsigned int codepoint,
55 char *buf, size_t *size)
57 static iconv_t cd;
58 uint8_t in[4];
59 char *ptr_in;
60 size_t size_in, size_out, ret;
61 if (!cd) {
62 cd = iconv_open("UTF-16LE", "UCS-4LE");
63 if (cd == (iconv_t)-1) {
64 cd = NULL;
65 return -1;
69 in[0] = codepoint & 0xFF;
70 in[1] = (codepoint>>8) & 0xFF;
71 in[2] = (codepoint>>16) & 0xFF;
72 in[3] = (codepoint>>24) & 0xFF;
74 ptr_in = (char *)in;
75 size_in = 4;
76 size_out = 8;
78 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
80 *size = 8 - size_out;
82 return ret;
87 work out the unicode codepoint of the first UTF-8 character in the buffer
89 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
91 iconv_t cd;
92 uint8_t out[4];
93 char *ptr_out;
94 size_t size_out, size_in, ret;
96 cd = iconv_open("UCS-4LE", charset);
98 size_in = size;
99 ptr_out = (char *)out;
100 size_out = sizeof(out);
101 memset(out, 0, sizeof(out));
103 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
105 iconv_close(cd);
107 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
111 display a buffer with name prefix
113 static void show_buf(const char *name, uint8_t *buf, size_t size)
115 int i;
116 printf("%s ", name);
117 for (i=0;i<size;i++) {
118 printf("%02x ", buf[i]);
120 printf("\n");
124 given a UTF-16LE buffer, test the system and built-in iconv code to
125 make sure they do exactly the same thing in converting the buffer to
126 "charset", then convert it back again and ensure we get the same
127 buffer back
129 static bool test_buffer(struct torture_context *test,
130 uint8_t *inbuf, size_t size, const char *charset)
132 uint8_t buf1[1000], buf2[1000], buf3[1000];
133 size_t outsize1, outsize2, outsize3;
134 char *ptr_in;
135 char *ptr_out;
136 size_t size_in1, size_in2, size_in3;
137 size_t ret1, ret2, ret3, len1, len2;
138 int errno1, errno2;
139 static iconv_t cd;
140 static smb_iconv_t cd2, cd3;
141 static const char *last_charset;
143 if (cd && last_charset) {
144 iconv_close(cd);
145 smb_iconv_close(cd2);
146 smb_iconv_close(cd3);
147 cd = NULL;
150 if (!cd) {
151 cd = iconv_open(charset, "UTF-16LE");
152 if (cd == (iconv_t)-1) {
153 torture_fail(test,
154 talloc_asprintf(test,
155 "failed to open %s to UTF-16LE",
156 charset));
158 cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "use_builtin_handlers", true));
159 if (cd2 == (iconv_t)-1) {
160 torture_fail(test,
161 talloc_asprintf(test,
162 "failed to open %s to UTF-16LE via smb_iconv_open_ex",
163 charset));
165 cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "use_builtin_handlers", true));
166 if (cd3 == (iconv_t)-1) {
167 torture_fail(test,
168 talloc_asprintf(test,
169 "failed to open UTF-16LE to %s via smb_iconv_open_ex",
170 charset));
172 last_charset = charset;
175 /* internal convert to charset - placing result in buf1 */
176 ptr_in = (char *)inbuf;
177 ptr_out = (char *)buf1;
178 size_in1 = size;
179 outsize1 = sizeof(buf1);
181 memset(ptr_out, 0, outsize1);
182 errno = 0;
183 ret1 = smb_iconv(cd2, (const char **) &ptr_in, &size_in1, &ptr_out, &outsize1);
184 errno1 = errno;
186 /* system convert to charset - placing result in buf2 */
187 ptr_in = (char *)inbuf;
188 ptr_out = (char *)buf2;
189 size_in2 = size;
190 outsize2 = sizeof(buf2);
192 memset(ptr_out, 0, outsize2);
193 errno = 0;
194 ret2 = iconv(cd, &ptr_in, &size_in2, &ptr_out, &outsize2);
195 errno2 = errno;
197 len1 = sizeof(buf1) - outsize1;
198 len2 = sizeof(buf2) - outsize2;
200 /* codepoints above 1M are not interesting for now */
201 if (len2 > len1 &&
202 memcmp(buf1, buf2, len1) == 0 &&
203 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
204 return true;
206 if (len1 > len2 &&
207 memcmp(buf1, buf2, len2) == 0 &&
208 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
209 return true;
212 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
214 if (errno1 != errno2) {
215 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
216 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
217 torture_fail(test, talloc_asprintf(test,
218 "errno mismatch with %s internal=%d/%s system=%d/%s",
219 charset,
220 errno1, strerror(errno1),
221 errno2, strerror(errno2)));
224 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
226 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
228 if (len1 != len2 ||
229 memcmp(buf1, buf2, len1) != 0) {
230 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
231 show_buf(" IN1:", inbuf, size-size_in1);
232 show_buf(" IN2:", inbuf, size-size_in2);
233 show_buf("OUT1:", buf1, len1);
234 show_buf("OUT2:", buf2, len2);
235 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
236 torture_comment(test, "next codepoint is %u",
237 get_codepoint((char *)(buf2+len1), len2-len1, charset));
239 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
240 torture_comment(test, "next codepoint is %u",
241 get_codepoint((char *)(buf1+len2),len1-len2, charset));
244 torture_fail(test, "failed");
247 /* convert back to UTF-16, putting result in buf3 */
248 size = size - size_in1;
249 ptr_in = (char *)buf1;
250 ptr_out = (char *)buf3;
251 size_in3 = len1;
252 outsize3 = sizeof(buf3);
254 memset(ptr_out, 0, outsize3);
255 ret3 = smb_iconv(cd3, (const char **) &ptr_in, &size_in3, &ptr_out, &outsize3);
257 /* we only internally support the first 1M codepoints */
258 if (outsize3 != sizeof(buf3) - size &&
259 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
260 size - (sizeof(buf3) - outsize3),
261 "UTF-16LE") >= (1<<20)) {
262 return true;
265 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
266 "pull failed - %s", strerror(errno)));
268 if (strncmp(charset, "UTF", 3) != 0) {
269 /* don't expect perfect mappings for non UTF charsets */
270 return true;
274 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
275 "wrong outsize3");
277 if (memcmp(buf3, inbuf, size) != 0) {
278 torture_comment(test, "pull bytes mismatch:");
279 show_buf("inbuf", inbuf, size);
280 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
281 torture_comment(test, "next codepoint is %u\n",
282 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
283 size - (sizeof(buf3) - outsize3),
284 "UTF-16LE"));
285 torture_fail(test, "");
288 return true;
293 test the push_codepoint() and next_codepoint() functions for a given
294 codepoint
296 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
298 uint8_t buf[10];
299 size_t size, size2;
300 codepoint_t c;
302 size = push_codepoint_handle(lpcfg_iconv_handle(tctx->lp_ctx), (char *)buf, codepoint);
303 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
304 "Invalid Codepoint range");
306 if (size == -1) return true;
308 buf[size] = random();
309 buf[size+1] = random();
310 buf[size+2] = random();
311 buf[size+3] = random();
313 c = next_codepoint_handle(lpcfg_iconv_handle(tctx->lp_ctx), (char *)buf, &size2);
315 torture_assert(tctx, c == codepoint,
316 talloc_asprintf(tctx,
317 "next_codepoint(%u) failed - gave %u", codepoint, c));
319 torture_assert(tctx, size2 == size,
320 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
321 codepoint, (int)size2, (int)size));
323 return true;
326 static bool test_next_codepoint(struct torture_context *tctx)
328 unsigned int codepoint;
329 if (iconv_untestable(tctx))
330 return true;
332 for (codepoint=0;codepoint<(1<<20);codepoint++) {
333 if (!test_codepoint(tctx, codepoint))
334 return false;
336 return true;
339 static bool test_first_1m(struct torture_context *tctx)
341 unsigned int codepoint;
342 size_t size;
343 unsigned char inbuf[1000];
345 if (iconv_untestable(tctx))
346 return true;
348 for (codepoint=0;codepoint<(1<<20);codepoint++) {
349 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
350 continue;
353 if (codepoint % 1000 == 0) {
354 if (torture_setting_bool(tctx, "progress", true)) {
355 torture_comment(tctx, "codepoint=%u \r", codepoint);
356 fflush(stdout);
360 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
361 return false;
363 return true;
366 static bool test_random_5m(struct torture_context *tctx)
368 unsigned char inbuf[1000];
369 unsigned int i;
371 if (iconv_untestable(tctx))
372 return true;
374 for (i=0;i<500000;i++) {
375 size_t size;
376 unsigned int c;
378 if (i % 1000 == 0) {
379 if (torture_setting_bool(tctx, "progress", true)) {
380 torture_comment(tctx, "i=%u \r", i);
381 fflush(stdout);
385 size = random() % 100;
386 for (c=0;c<size;c++) {
387 if (random() % 100 < 80) {
388 inbuf[c] = random() % 128;
389 } else {
390 inbuf[c] = random();
392 if (random() % 10 == 0) {
393 inbuf[c] |= 0xd8;
395 if (random() % 10 == 0) {
396 inbuf[c] |= 0xdc;
399 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
400 printf("i=%d failed UTF-8\n", i);
401 return false;
404 if (!test_buffer(tctx, inbuf, size, "CP850")) {
405 printf("i=%d failed CP850\n", i);
406 return false;
409 return true;
413 static bool test_string2key(struct torture_context *tctx)
415 uint16_t *buf;
416 char *dest = NULL;
417 TALLOC_CTX *mem_ctx = talloc_new(tctx);
418 size_t len = (random()%1000)+1;
419 const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
420 uint8_t le1[20];
421 uint8_t *munged1;
422 uint8_t *out1;
423 size_t ret;
424 int i;
425 const char *correct = "a\357\277\275b\357\277\275c\001defg";
427 buf = talloc_size(mem_ctx, len*2);
428 generate_random_buffer((uint8_t *)buf, len*2);
430 torture_comment(tctx, "converting random buffer\n");
432 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret)) {
433 torture_fail(tctx, "Failed to convert random buffer\n");
436 for (i=0;i<10;i++) {
437 SSVAL(&le1[2*i], 0, in1[i]);
440 torture_comment(tctx, "converting fixed buffer to UTF16\n");
442 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret)) {
443 torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
446 torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
448 torture_comment(tctx, "converting fixed buffer to UTF8\n");
450 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret)) {
451 torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
454 torture_assert(tctx, strcmp(correct, (const char *) out1) == 0,
455 "conversion gave incorrect result\n");
457 talloc_free(mem_ctx);
459 return true;
462 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
464 struct torture_suite *suite = torture_suite_create(mem_ctx, "iconv");
466 torture_suite_add_simple_test(suite, "string2key",
467 test_string2key);
469 torture_suite_add_simple_test(suite, "next_codepoint()",
470 test_next_codepoint);
472 torture_suite_add_simple_test(suite, "first 1M codepoints",
473 test_first_1m);
475 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
476 test_random_5m);
478 torture_suite_add_simple_test(suite, "string2key",
479 test_string2key);
480 return suite;
483 #else
485 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
487 printf("No native iconv library - can't run iconv test\n");
488 return NULL;
491 #endif