r22579: disable progress printing in the build-farm
[Samba.git] / source / torture / local / iconv.c
blob4c96d075c096a6c3339c24a98f930c3c96394a04
1 /*
2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "includes.h"
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
31 #if HAVE_NATIVE_ICONV
33 generate a UTF-16LE buffer for a given unicode codepoint
35 static int gen_codepoint_utf16(unsigned int codepoint,
36 char *buf, size_t *size)
38 static iconv_t cd;
39 uint8_t in[4];
40 char *ptr_in;
41 size_t size_in, size_out, ret;
42 if (!cd) {
43 cd = iconv_open("UTF-16LE", "UCS-4LE");
44 if (cd == (iconv_t)-1) {
45 cd = NULL;
46 return -1;
50 in[0] = codepoint & 0xFF;
51 in[1] = (codepoint>>8) & 0xFF;
52 in[2] = (codepoint>>16) & 0xFF;
53 in[3] = (codepoint>>24) & 0xFF;
55 ptr_in = (char *)in;
56 size_in = 4;
57 size_out = 8;
59 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
61 *size = 8 - size_out;
63 return ret;
68 work out the unicode codepoint of the first UTF-8 character in the buffer
70 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
72 iconv_t cd;
73 uint8_t out[4];
74 char *ptr_out;
75 size_t size_out, size_in, ret;
77 cd = iconv_open("UCS-4LE", charset);
79 size_in = size;
80 ptr_out = (char *)out;
81 size_out = sizeof(out);
82 memset(out, 0, sizeof(out));
84 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
86 iconv_close(cd);
88 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
92 display a buffer with name prefix
94 static void show_buf(const char *name, uint8_t *buf, size_t size)
96 int i;
97 printf("%s ", name);
98 for (i=0;i<size;i++) {
99 printf("%02x ", buf[i]);
101 printf("\n");
105 given a UTF-16LE buffer, test the system and built-in iconv code to
106 make sure they do exactly the same thing in converting the buffer to
107 "charset", then convert it back again and ensure we get the same
108 buffer back
110 static bool test_buffer(struct torture_context *test,
111 uint8_t *inbuf, size_t size, const char *charset)
113 uint8_t buf1[1000], buf2[1000], buf3[1000];
114 size_t outsize1, outsize2, outsize3;
115 const char *ptr_in;
116 char *ptr_out;
117 size_t size_in1, size_in2, size_in3;
118 size_t ret1, ret2, ret3, len1, len2;
119 int errno1, errno2;
120 static iconv_t cd;
121 static smb_iconv_t cd2, cd3;
122 static const char *last_charset;
124 if (cd && last_charset) {
125 iconv_close(cd);
126 smb_iconv_close(cd2);
127 smb_iconv_close(cd3);
128 cd = NULL;
131 if (!cd) {
132 cd = iconv_open(charset, "UTF-16LE");
133 if (cd == (iconv_t)-1) {
134 torture_fail(test,
135 talloc_asprintf(test,
136 "failed to open %s to UTF-16LE",
137 charset));
139 cd2 = smb_iconv_open(charset, "UTF-16LE");
140 cd3 = smb_iconv_open("UTF-16LE", charset);
141 last_charset = charset;
144 /* internal convert to charset - placing result in buf1 */
145 ptr_in = (const char *)inbuf;
146 ptr_out = (char *)buf1;
147 size_in1 = size;
148 outsize1 = sizeof(buf1);
150 memset(ptr_out, 0, outsize1);
151 errno = 0;
152 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
153 errno1 = errno;
155 /* system convert to charset - placing result in buf2 */
156 ptr_in = (const char *)inbuf;
157 ptr_out = (char *)buf2;
158 size_in2 = size;
159 outsize2 = sizeof(buf2);
161 memset(ptr_out, 0, outsize2);
162 errno = 0;
163 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
164 errno2 = errno;
166 len1 = sizeof(buf1) - outsize1;
167 len2 = sizeof(buf2) - outsize2;
169 /* codepoints above 1M are not interesting for now */
170 if (len2 > len1 &&
171 memcmp(buf1, buf2, len1) == 0 &&
172 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
173 return true;
175 if (len1 > len2 &&
176 memcmp(buf1, buf2, len2) == 0 &&
177 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
178 return true;
181 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
183 if (errno1 != errno2) {
184 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
185 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
186 torture_fail(test, talloc_asprintf(test,
187 "e1=%d/%s e2=%d/%s",
188 errno1, strerror(errno1),
189 errno2, strerror(errno2)));
192 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
194 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
196 if (len1 != len2 ||
197 memcmp(buf1, buf2, len1) != 0) {
198 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
199 show_buf(" IN1:", inbuf, size-size_in1);
200 show_buf(" IN2:", inbuf, size-size_in2);
201 show_buf("OUT1:", buf1, len1);
202 show_buf("OUT2:", buf2, len2);
203 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
204 torture_comment(test, "next codepoint is %u",
205 get_codepoint((char *)(buf2+len1), len2-len1, charset));
207 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
208 torture_comment(test, "next codepoint is %u",
209 get_codepoint((char *)(buf1+len2),len1-len2, charset));
212 torture_fail(test, "failed");
215 /* convert back to UTF-16, putting result in buf3 */
216 size = size - size_in1;
217 ptr_in = (const char *)buf1;
218 ptr_out = (char *)buf3;
219 size_in3 = len1;
220 outsize3 = sizeof(buf3);
222 memset(ptr_out, 0, outsize3);
223 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
225 /* we only internally support the first 1M codepoints */
226 if (outsize3 != sizeof(buf3) - size &&
227 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
228 size - (sizeof(buf3) - outsize3),
229 "UTF-16LE") >= (1<<20)) {
230 return true;
233 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
234 "pull failed - %s", strerror(errno)));
236 if (strncmp(charset, "UTF", 3) != 0) {
237 /* don't expect perfect mappings for non UTF charsets */
238 return true;
242 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
243 "wrong outsize3");
245 if (memcmp(buf3, inbuf, size) != 0) {
246 torture_comment(test, "pull bytes mismatch:");
247 show_buf("inbuf", inbuf, size);
248 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
249 torture_comment(test, "next codepoint is %u\n",
250 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
251 size - (sizeof(buf3) - outsize3),
252 "UTF-16LE"));
253 torture_fail(test, "");
256 return true;
261 test the push_codepoint() and next_codepoint() functions for a given
262 codepoint
264 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
266 uint8_t buf[10];
267 size_t size, size2;
268 codepoint_t c;
270 size = push_codepoint((char *)buf, codepoint);
271 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
272 "Invalid Codepoint range");
274 if (size == -1) return true;
276 buf[size] = random();
277 buf[size+1] = random();
278 buf[size+2] = random();
279 buf[size+3] = random();
281 c = next_codepoint((char *)buf, &size2);
283 torture_assert(tctx, c == codepoint,
284 talloc_asprintf(tctx,
285 "next_codepoint(%u) failed - gave %u", codepoint, c));
287 torture_assert(tctx, size2 == size,
288 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
289 codepoint, (int)size2, (int)size));
291 return true;
294 static bool test_next_codepoint(struct torture_context *tctx)
296 unsigned int codepoint;
297 for (codepoint=0;codepoint<(1<<20);codepoint++) {
298 if (!test_codepoint(tctx, codepoint))
299 return false;
301 return true;
304 static bool test_first_1m(struct torture_context *tctx)
306 unsigned int codepoint;
307 size_t size;
308 unsigned char inbuf[1000];
310 for (codepoint=0;codepoint<(1<<20);codepoint++) {
311 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
312 continue;
315 if (codepoint % 1000 == 0) {
316 if (torture_setting_bool(tctx, "progress", True)) {
317 torture_comment(tctx, "codepoint=%u \r", codepoint);
318 fflush(stdout);
322 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
323 return false;
325 return true;
328 static bool test_random_5m(struct torture_context *tctx)
330 unsigned char inbuf[1000];
331 unsigned int i;
332 for (i=0;i<500000;i++) {
333 size_t size;
334 unsigned int c;
336 if (i % 1000 == 0) {
337 if (torture_setting_bool(tctx, "progress", true)) {
338 torture_comment(tctx, "i=%u \r", i);
339 fflush(stdout);
343 size = random() % 100;
344 for (c=0;c<size;c++) {
345 if (random() % 100 < 80) {
346 inbuf[c] = random() % 128;
347 } else {
348 inbuf[c] = random();
350 if (random() % 10 == 0) {
351 inbuf[c] |= 0xd8;
353 if (random() % 10 == 0) {
354 inbuf[c] |= 0xdc;
357 if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
358 printf("i=%d failed UTF-8\n", i);
359 return false;
362 if (!test_buffer(tctx, inbuf, size, "CP850")) {
363 printf("i=%d failed CP850\n", i);
364 return false;
367 return true;
370 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
372 static iconv_t cd;
373 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
375 if (!lp_parm_bool(-1, "iconv", "native", True)) {
376 printf("system iconv disabled - skipping test\n");
377 return NULL;
380 cd = iconv_open("UTF-16LE", "UCS-4LE");
381 if (cd == (iconv_t)-1) {
382 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
383 return NULL;
385 iconv_close(cd);
387 cd = iconv_open("UTF-16LE", "CP850");
388 if (cd == (iconv_t)-1) {
389 printf("unable to test - system iconv library does not support UTF-16LE -> CP850\n");
390 return NULL;
392 iconv_close(cd);
394 srandom(time(NULL));
396 torture_suite_add_simple_test(suite, "next_codepoint()",
397 test_next_codepoint);
399 torture_suite_add_simple_test(suite, "first 1M codepoints",
400 test_first_1m);
402 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
403 test_random_5m);
404 return suite;
407 #else
409 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
411 printf("No native iconv library - can't run iconv test\n");
412 return NULL;
415 #endif