s4:tests/iconv - Fix a warning
[Samba/kamenim.git] / lib / util / charset / tests / iconv.c
blob3e2546dc01b65cdc60f8fa74b3e52750dc3081f4
1 /*
2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
24 #include "torture/torture.h"
25 #include "system/iconv.h"
26 #include "system/time.h"
27 #include "libcli/raw/libcliraw.h"
28 #include "param/param.h"
29 #include "torture/util.h"
30 #include "talloc.h"
32 #if HAVE_NATIVE_ICONV
34 static bool iconv_untestable(struct torture_context *tctx)
36 iconv_t cd;
38 if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true))
39 torture_skip(tctx, "system iconv disabled - skipping test");
41 cd = iconv_open("UTF-16LE", "UCS-4LE");
42 if (cd == (iconv_t)-1)
43 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
44 iconv_close(cd);
46 cd = iconv_open("UTF-16LE", "CP850");
47 if (cd == (iconv_t)-1)
48 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
49 iconv_close(cd);
51 return false;
55 generate a UTF-16LE buffer for a given unicode codepoint
57 static int gen_codepoint_utf16(unsigned int codepoint,
58 char *buf, size_t *size)
60 static iconv_t cd;
61 uint8_t in[4];
62 char *ptr_in;
63 size_t size_in, size_out, ret;
64 if (!cd) {
65 cd = iconv_open("UTF-16LE", "UCS-4LE");
66 if (cd == (iconv_t)-1) {
67 cd = NULL;
68 return -1;
72 in[0] = codepoint & 0xFF;
73 in[1] = (codepoint>>8) & 0xFF;
74 in[2] = (codepoint>>16) & 0xFF;
75 in[3] = (codepoint>>24) & 0xFF;
77 ptr_in = (char *)in;
78 size_in = 4;
79 size_out = 8;
81 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
83 *size = 8 - size_out;
85 return ret;
90 work out the unicode codepoint of the first UTF-8 character in the buffer
92 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
94 iconv_t cd;
95 uint8_t out[4];
96 char *ptr_out;
97 size_t size_out, size_in, ret;
99 cd = iconv_open("UCS-4LE", charset);
101 size_in = size;
102 ptr_out = (char *)out;
103 size_out = sizeof(out);
104 memset(out, 0, sizeof(out));
106 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
108 iconv_close(cd);
110 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
114 display a buffer with name prefix
116 static void show_buf(const char *name, uint8_t *buf, size_t size)
118 int i;
119 printf("%s ", name);
120 for (i=0;i<size;i++) {
121 printf("%02x ", buf[i]);
123 printf("\n");
127 given a UTF-16LE buffer, test the system and built-in iconv code to
128 make sure they do exactly the same thing in converting the buffer to
129 "charset", then convert it back again and ensure we get the same
130 buffer back
132 static bool test_buffer(struct torture_context *test,
133 uint8_t *inbuf, size_t size, const char *charset)
135 uint8_t buf1[1000], buf2[1000], buf3[1000];
136 size_t outsize1, outsize2, outsize3;
137 const char *ptr_in;
138 char *ptr_out;
139 size_t size_in1, size_in2, size_in3;
140 size_t ret1, ret2, ret3, len1, len2;
141 int errno1, errno2;
142 static iconv_t cd;
143 static smb_iconv_t cd2, cd3;
144 static const char *last_charset;
146 if (cd && last_charset) {
147 iconv_close(cd);
148 smb_iconv_close(cd2);
149 smb_iconv_close(cd3);
150 cd = NULL;
153 if (!cd) {
154 cd = iconv_open(charset, "UTF-16LE");
155 if (cd == (iconv_t)-1) {
156 torture_fail(test,
157 talloc_asprintf(test,
158 "failed to open %s to UTF-16LE",
159 charset));
161 cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
162 cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
163 last_charset = charset;
166 /* internal convert to charset - placing result in buf1 */
167 ptr_in = (const char *)inbuf;
168 ptr_out = (char *)buf1;
169 size_in1 = size;
170 outsize1 = sizeof(buf1);
172 memset(ptr_out, 0, outsize1);
173 errno = 0;
174 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
175 errno1 = errno;
177 /* system convert to charset - placing result in buf2 */
178 ptr_in = (const char *)inbuf;
179 ptr_out = (char *)buf2;
180 size_in2 = size;
181 outsize2 = sizeof(buf2);
183 memset(ptr_out, 0, outsize2);
184 errno = 0;
185 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
186 errno2 = errno;
188 len1 = sizeof(buf1) - outsize1;
189 len2 = sizeof(buf2) - outsize2;
191 /* codepoints above 1M are not interesting for now */
192 if (len2 > len1 &&
193 memcmp(buf1, buf2, len1) == 0 &&
194 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
195 return true;
197 if (len1 > len2 &&
198 memcmp(buf1, buf2, len2) == 0 &&
199 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
200 return true;
203 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
205 if (errno1 != errno2) {
206 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
207 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
208 torture_fail(test, talloc_asprintf(test,
209 "e1=%d/%s e2=%d/%s",
210 errno1, strerror(errno1),
211 errno2, strerror(errno2)));
214 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
216 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
218 if (len1 != len2 ||
219 memcmp(buf1, buf2, len1) != 0) {
220 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
221 show_buf(" IN1:", inbuf, size-size_in1);
222 show_buf(" IN2:", inbuf, size-size_in2);
223 show_buf("OUT1:", buf1, len1);
224 show_buf("OUT2:", buf2, len2);
225 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
226 torture_comment(test, "next codepoint is %u",
227 get_codepoint((char *)(buf2+len1), len2-len1, charset));
229 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
230 torture_comment(test, "next codepoint is %u",
231 get_codepoint((char *)(buf1+len2),len1-len2, charset));
234 torture_fail(test, "failed");
237 /* convert back to UTF-16, putting result in buf3 */
238 size = size - size_in1;
239 ptr_in = (const char *)buf1;
240 ptr_out = (char *)buf3;
241 size_in3 = len1;
242 outsize3 = sizeof(buf3);
244 memset(ptr_out, 0, outsize3);
245 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
247 /* we only internally support the first 1M codepoints */
248 if (outsize3 != sizeof(buf3) - size &&
249 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
250 size - (sizeof(buf3) - outsize3),
251 "UTF-16LE") >= (1<<20)) {
252 return true;
255 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
256 "pull failed - %s", strerror(errno)));
258 if (strncmp(charset, "UTF", 3) != 0) {
259 /* don't expect perfect mappings for non UTF charsets */
260 return true;
264 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
265 "wrong outsize3");
267 if (memcmp(buf3, inbuf, size) != 0) {
268 torture_comment(test, "pull bytes mismatch:");
269 show_buf("inbuf", inbuf, size);
270 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
271 torture_comment(test, "next codepoint is %u\n",
272 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
273 size - (sizeof(buf3) - outsize3),
274 "UTF-16LE"));
275 torture_fail(test, "");
278 return true;
283 test the push_codepoint() and next_codepoint() functions for a given
284 codepoint
286 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
288 uint8_t buf[10];
289 size_t size, size2;
290 codepoint_t c;
292 size = push_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint);
293 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
294 "Invalid Codepoint range");
296 if (size == -1) return true;
298 buf[size] = random();
299 buf[size+1] = random();
300 buf[size+2] = random();
301 buf[size+3] = random();
303 c = next_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2);
305 torture_assert(tctx, c == codepoint,
306 talloc_asprintf(tctx,
307 "next_codepoint(%u) failed - gave %u", codepoint, c));
309 torture_assert(tctx, size2 == size,
310 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
311 codepoint, (int)size2, (int)size));
313 return true;
316 static bool test_next_codepoint(struct torture_context *tctx)
318 unsigned int codepoint;
319 if (iconv_untestable(tctx))
320 return true;
322 for (codepoint=0;codepoint<(1<<20);codepoint++) {
323 if (!test_codepoint(tctx, codepoint))
324 return false;
326 return true;
329 static bool test_first_1m(struct torture_context *tctx)
331 unsigned int codepoint;
332 size_t size;
333 unsigned char inbuf[1000];
335 if (iconv_untestable(tctx))
336 return true;
338 for (codepoint=0;codepoint<(1<<20);codepoint++) {
339 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
340 continue;
343 if (codepoint % 1000 == 0) {
344 if (torture_setting_bool(tctx, "progress", true)) {
345 torture_comment(tctx, "codepoint=%u \r", codepoint);
346 fflush(stdout);
350 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
351 return false;
353 return true;
356 static bool test_random_5m(struct torture_context *tctx)
358 unsigned char inbuf[1000];
359 unsigned int i;
361 if (iconv_untestable(tctx))
362 return true;
364 for (i=0;i<500000;i++) {
365 size_t size;
366 unsigned int c;
368 if (i % 1000 == 0) {
369 if (torture_setting_bool(tctx, "progress", true)) {
370 torture_comment(tctx, "i=%u \r", i);
371 fflush(stdout);
375 size = random() % 100;
376 for (c=0;c<size;c++) {
377 if (random() % 100 < 80) {
378 inbuf[c] = random() % 128;
379 } else {
380 inbuf[c] = random();
382 if (random() % 10 == 0) {
383 inbuf[c] |= 0xd8;
385 if (random() % 10 == 0) {
386 inbuf[c] |= 0xdc;
389 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
390 printf("i=%d failed UTF-8\n", i);
391 return false;
394 if (!test_buffer(tctx, inbuf, size, "CP850")) {
395 printf("i=%d failed CP850\n", i);
396 return false;
399 return true;
403 static bool test_string2key(struct torture_context *tctx)
405 uint16_t *buf;
406 char *dest = NULL;
407 TALLOC_CTX *mem_ctx = talloc_new(tctx);
408 size_t len = (random()%1000)+1;
409 const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
410 uint8_t le1[20];
411 uint8_t *munged1;
412 uint8_t *out1;
413 size_t ret;
414 int i;
415 const char *correct = "a\357\277\275b\357\277\275c\001defg";
417 buf = talloc_size(mem_ctx, len*2);
418 generate_random_buffer((uint8_t *)buf, len*2);
420 torture_comment(tctx, "converting random buffer\n");
422 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret, false)) {
423 torture_fail(tctx, "Failed to convert random buffer\n");
426 for (i=0;i<10;i++) {
427 SSVAL(&le1[2*i], 0, in1[i]);
430 torture_comment(tctx, "converting fixed buffer to UTF16\n");
432 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret, false)) {
433 torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
436 torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
438 torture_comment(tctx, "converting fixed buffer to UTF8\n");
440 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret, false)) {
441 torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
444 torture_assert(tctx, strcmp(correct, (const char *) out1) == 0,
445 "conversion gave incorrect result\n");
447 talloc_free(mem_ctx);
449 return true;
452 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
454 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
456 torture_suite_add_simple_test(suite, "string2key",
457 test_string2key);
459 torture_suite_add_simple_test(suite, "next_codepoint()",
460 test_next_codepoint);
462 torture_suite_add_simple_test(suite, "first 1M codepoints",
463 test_first_1m);
465 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
466 test_random_5m);
468 torture_suite_add_simple_test(suite, "string2key",
469 test_string2key);
470 return suite;
473 #else
475 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
477 printf("No native iconv library - can't run iconv test\n");
478 return NULL;
481 #endif