Add allow_badcharcnv argument to all conversion function, for
[Samba/fernandojvsilva.git] / lib / util / charset / tests / iconv.c
blobfbe7b103ab5ee3c5f333399a148e469f8433969e
1 /*
2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
24 #include "torture/torture.h"
25 #include "system/iconv.h"
26 #include "system/time.h"
27 #include "libcli/raw/libcliraw.h"
28 #include "param/param.h"
29 #include "torture/util.h"
31 #if HAVE_NATIVE_ICONV
33 static bool iconv_untestable(struct torture_context *tctx)
35 iconv_t cd;
37 if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true))
38 torture_skip(tctx, "system iconv disabled - skipping test");
40 cd = iconv_open("UTF-16LE", "UCS-4LE");
41 if (cd == (iconv_t)-1)
42 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
43 iconv_close(cd);
45 cd = iconv_open("UTF-16LE", "CP850");
46 if (cd == (iconv_t)-1)
47 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
48 iconv_close(cd);
50 return false;
54 generate a UTF-16LE buffer for a given unicode codepoint
56 static int gen_codepoint_utf16(unsigned int codepoint,
57 char *buf, size_t *size)
59 static iconv_t cd;
60 uint8_t in[4];
61 char *ptr_in;
62 size_t size_in, size_out, ret;
63 if (!cd) {
64 cd = iconv_open("UTF-16LE", "UCS-4LE");
65 if (cd == (iconv_t)-1) {
66 cd = NULL;
67 return -1;
71 in[0] = codepoint & 0xFF;
72 in[1] = (codepoint>>8) & 0xFF;
73 in[2] = (codepoint>>16) & 0xFF;
74 in[3] = (codepoint>>24) & 0xFF;
76 ptr_in = (char *)in;
77 size_in = 4;
78 size_out = 8;
80 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
82 *size = 8 - size_out;
84 return ret;
89 work out the unicode codepoint of the first UTF-8 character in the buffer
91 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
93 iconv_t cd;
94 uint8_t out[4];
95 char *ptr_out;
96 size_t size_out, size_in, ret;
98 cd = iconv_open("UCS-4LE", charset);
100 size_in = size;
101 ptr_out = (char *)out;
102 size_out = sizeof(out);
103 memset(out, 0, sizeof(out));
105 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
107 iconv_close(cd);
109 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
113 display a buffer with name prefix
115 static void show_buf(const char *name, uint8_t *buf, size_t size)
117 int i;
118 printf("%s ", name);
119 for (i=0;i<size;i++) {
120 printf("%02x ", buf[i]);
122 printf("\n");
126 given a UTF-16LE buffer, test the system and built-in iconv code to
127 make sure they do exactly the same thing in converting the buffer to
128 "charset", then convert it back again and ensure we get the same
129 buffer back
131 static bool test_buffer(struct torture_context *test,
132 uint8_t *inbuf, size_t size, const char *charset)
134 uint8_t buf1[1000], buf2[1000], buf3[1000];
135 size_t outsize1, outsize2, outsize3;
136 const char *ptr_in;
137 char *ptr_out;
138 size_t size_in1, size_in2, size_in3;
139 size_t ret1, ret2, ret3, len1, len2;
140 int errno1, errno2;
141 static iconv_t cd;
142 static smb_iconv_t cd2, cd3;
143 static const char *last_charset;
145 if (cd && last_charset) {
146 iconv_close(cd);
147 smb_iconv_close(cd2);
148 smb_iconv_close(cd3);
149 cd = NULL;
152 if (!cd) {
153 cd = iconv_open(charset, "UTF-16LE");
154 if (cd == (iconv_t)-1) {
155 torture_fail(test,
156 talloc_asprintf(test,
157 "failed to open %s to UTF-16LE",
158 charset));
160 cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
161 cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
162 last_charset = charset;
165 /* internal convert to charset - placing result in buf1 */
166 ptr_in = (const char *)inbuf;
167 ptr_out = (char *)buf1;
168 size_in1 = size;
169 outsize1 = sizeof(buf1);
171 memset(ptr_out, 0, outsize1);
172 errno = 0;
173 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
174 errno1 = errno;
176 /* system convert to charset - placing result in buf2 */
177 ptr_in = (const char *)inbuf;
178 ptr_out = (char *)buf2;
179 size_in2 = size;
180 outsize2 = sizeof(buf2);
182 memset(ptr_out, 0, outsize2);
183 errno = 0;
184 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
185 errno2 = errno;
187 len1 = sizeof(buf1) - outsize1;
188 len2 = sizeof(buf2) - outsize2;
190 /* codepoints above 1M are not interesting for now */
191 if (len2 > len1 &&
192 memcmp(buf1, buf2, len1) == 0 &&
193 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
194 return true;
196 if (len1 > len2 &&
197 memcmp(buf1, buf2, len2) == 0 &&
198 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
199 return true;
202 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
204 if (errno1 != errno2) {
205 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
206 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
207 torture_fail(test, talloc_asprintf(test,
208 "e1=%d/%s e2=%d/%s",
209 errno1, strerror(errno1),
210 errno2, strerror(errno2)));
213 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
215 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
217 if (len1 != len2 ||
218 memcmp(buf1, buf2, len1) != 0) {
219 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
220 show_buf(" IN1:", inbuf, size-size_in1);
221 show_buf(" IN2:", inbuf, size-size_in2);
222 show_buf("OUT1:", buf1, len1);
223 show_buf("OUT2:", buf2, len2);
224 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
225 torture_comment(test, "next codepoint is %u",
226 get_codepoint((char *)(buf2+len1), len2-len1, charset));
228 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
229 torture_comment(test, "next codepoint is %u",
230 get_codepoint((char *)(buf1+len2),len1-len2, charset));
233 torture_fail(test, "failed");
236 /* convert back to UTF-16, putting result in buf3 */
237 size = size - size_in1;
238 ptr_in = (const char *)buf1;
239 ptr_out = (char *)buf3;
240 size_in3 = len1;
241 outsize3 = sizeof(buf3);
243 memset(ptr_out, 0, outsize3);
244 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
246 /* we only internally support the first 1M codepoints */
247 if (outsize3 != sizeof(buf3) - size &&
248 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
249 size - (sizeof(buf3) - outsize3),
250 "UTF-16LE") >= (1<<20)) {
251 return true;
254 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
255 "pull failed - %s", strerror(errno)));
257 if (strncmp(charset, "UTF", 3) != 0) {
258 /* don't expect perfect mappings for non UTF charsets */
259 return true;
263 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
264 "wrong outsize3");
266 if (memcmp(buf3, inbuf, size) != 0) {
267 torture_comment(test, "pull bytes mismatch:");
268 show_buf("inbuf", inbuf, size);
269 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
270 torture_comment(test, "next codepoint is %u\n",
271 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
272 size - (sizeof(buf3) - outsize3),
273 "UTF-16LE"));
274 torture_fail(test, "");
277 return true;
282 test the push_codepoint() and next_codepoint() functions for a given
283 codepoint
285 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
287 uint8_t buf[10];
288 size_t size, size2;
289 codepoint_t c;
291 size = push_codepoint(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint);
292 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
293 "Invalid Codepoint range");
295 if (size == -1) return true;
297 buf[size] = random();
298 buf[size+1] = random();
299 buf[size+2] = random();
300 buf[size+3] = random();
302 c = next_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2);
304 torture_assert(tctx, c == codepoint,
305 talloc_asprintf(tctx,
306 "next_codepoint(%u) failed - gave %u", codepoint, c));
308 torture_assert(tctx, size2 == size,
309 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
310 codepoint, (int)size2, (int)size));
312 return true;
315 static bool test_next_codepoint(struct torture_context *tctx)
317 unsigned int codepoint;
318 if (iconv_untestable(tctx))
319 return true;
321 for (codepoint=0;codepoint<(1<<20);codepoint++) {
322 if (!test_codepoint(tctx, codepoint))
323 return false;
325 return true;
328 static bool test_first_1m(struct torture_context *tctx)
330 unsigned int codepoint;
331 size_t size;
332 unsigned char inbuf[1000];
334 if (iconv_untestable(tctx))
335 return true;
337 for (codepoint=0;codepoint<(1<<20);codepoint++) {
338 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
339 continue;
342 if (codepoint % 1000 == 0) {
343 if (torture_setting_bool(tctx, "progress", true)) {
344 torture_comment(tctx, "codepoint=%u \r", codepoint);
345 fflush(stdout);
349 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
350 return false;
352 return true;
355 static bool test_random_5m(struct torture_context *tctx)
357 unsigned char inbuf[1000];
358 unsigned int i;
360 if (iconv_untestable(tctx))
361 return true;
363 for (i=0;i<500000;i++) {
364 size_t size;
365 unsigned int c;
367 if (i % 1000 == 0) {
368 if (torture_setting_bool(tctx, "progress", true)) {
369 torture_comment(tctx, "i=%u \r", i);
370 fflush(stdout);
374 size = random() % 100;
375 for (c=0;c<size;c++) {
376 if (random() % 100 < 80) {
377 inbuf[c] = random() % 128;
378 } else {
379 inbuf[c] = random();
381 if (random() % 10 == 0) {
382 inbuf[c] |= 0xd8;
384 if (random() % 10 == 0) {
385 inbuf[c] |= 0xdc;
388 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
389 printf("i=%d failed UTF-8\n", i);
390 return false;
393 if (!test_buffer(tctx, inbuf, size, "CP850")) {
394 printf("i=%d failed CP850\n", i);
395 return false;
398 return true;
402 static bool test_string2key(struct torture_context *tctx)
404 uint16_t *buf;
405 char *dest = NULL;
406 TALLOC_CTX *mem_ctx = talloc_new(tctx);
407 ssize_t ret;
408 size_t len = (random()%1000)+1;
409 const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
410 uint8_t le1[20];
411 uint8_t *munged1;
412 uint8_t *out1;
413 int i;
414 const char *correct = "a\357\277\275b\357\277\275c\001defg";
416 buf = talloc_size(mem_ctx, len*2);
417 generate_random_buffer((uint8_t *)buf, len*2);
419 torture_comment(tctx, "converting random buffer\n");
421 ret = convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, false);
422 if (ret == -1) {
423 torture_fail(tctx, "Failed to convert random buffer\n");
426 for (i=0;i<10;i++) {
427 SSVAL(&le1[2*i], 0, in1[i]);
430 torture_comment(tctx, "converting fixed buffer to UTF16\n");
432 ret = convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, false);
433 if (ret == -1) {
434 torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
437 torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
439 torture_comment(tctx, "converting fixed buffer to UTF8\n");
441 ret = convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, false);
442 if (ret == -1) {
443 torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
446 torture_assert(tctx, strcmp(correct, out1) == 0, "conversion gave incorrect result\n");
448 talloc_free(mem_ctx);
450 return true;
453 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
455 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
457 torture_suite_add_simple_test(suite, "string2key",
458 test_string2key);
460 torture_suite_add_simple_test(suite, "next_codepoint()",
461 test_next_codepoint);
463 torture_suite_add_simple_test(suite, "first 1M codepoints",
464 test_first_1m);
466 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
467 test_random_5m);
469 torture_suite_add_simple_test(suite, "string2key",
470 test_string2key);
471 return suite;
474 #else
476 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
478 printf("No native iconv library - can't run iconv test\n");
479 return NULL;
482 #endif