r19404: fixed the LOCAL-ICONV test
[Samba/aatanasov.git] / source4 / torture / local / iconv.c
blob8d66f4fd58230364b41f04088b07bfb3d357ea0a
1 /*
2 Unix SMB/CIFS implementation.
4 local testing of iconv routines. This tests the system iconv code against
5 the built-in iconv code
7 Copyright (C) Andrew Tridgell 2004
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "includes.h"
25 #include "torture/torture.h"
26 #include "system/iconv.h"
27 #include "system/time.h"
28 #include "libcli/raw/libcliraw.h"
29 #include "torture/util.h"
31 #if HAVE_NATIVE_ICONV
33 generate a UTF-16LE buffer for a given unicode codepoint
35 static int gen_codepoint_utf16(unsigned int codepoint,
36 char *buf, size_t *size)
38 static iconv_t cd;
39 uint8_t in[4];
40 char *ptr_in;
41 size_t size_in, size_out, ret;
42 if (!cd) {
43 cd = iconv_open("UTF-16LE", "UCS-4LE");
44 if (cd == (iconv_t)-1) {
45 cd = NULL;
46 return -1;
50 in[0] = codepoint & 0xFF;
51 in[1] = (codepoint>>8) & 0xFF;
52 in[2] = (codepoint>>16) & 0xFF;
53 in[3] = (codepoint>>24) & 0xFF;
55 ptr_in = (char *)in;
56 size_in = 4;
57 size_out = 8;
59 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
61 *size = 8 - size_out;
63 return ret;
68 work out the unicode codepoint of the first UTF-8 character in the buffer
70 static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
72 iconv_t cd;
73 uint8_t out[4];
74 char *ptr_out;
75 size_t size_out, size_in, ret;
77 cd = iconv_open("UCS-4LE", charset);
79 size_in = size;
80 ptr_out = (char *)out;
81 size_out = sizeof(out);
82 memset(out, 0, sizeof(out));
84 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
86 iconv_close(cd);
88 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
92 display a buffer with name prefix
94 static void show_buf(const char *name, uint8_t *buf, size_t size)
96 int i;
97 printf("%s ", name);
98 for (i=0;i<size;i++) {
99 printf("%02x ", buf[i]);
101 printf("\n");
105 given a UTF-16LE buffer, test the system and built-in iconv code to
106 make sure they do exactly the same thing in converting the buffer to
107 "charset", then convert it back again and ensure we get the same
108 buffer back
110 static bool test_buffer(struct torture_context *test,
111 uint8_t *inbuf, size_t size, const char *charset)
113 uint8_t buf1[1000], buf2[1000], buf3[1000];
114 size_t outsize1, outsize2, outsize3;
115 const char *ptr_in;
116 char *ptr_out;
117 size_t size_in1, size_in2, size_in3;
118 size_t ret1, ret2, ret3, len1, len2;
119 int errno1, errno2;
120 static iconv_t cd;
121 static smb_iconv_t cd2, cd3;
122 static const char *last_charset;
124 if (cd && last_charset) {
125 iconv_close(cd);
126 smb_iconv_close(cd2);
127 smb_iconv_close(cd3);
128 cd = NULL;
131 if (!cd) {
132 cd = iconv_open(charset, "UTF-16LE");
133 if (cd == (iconv_t)-1) {
134 cd = NULL;
135 return false;
137 cd2 = smb_iconv_open(charset, "UTF-16LE");
138 cd3 = smb_iconv_open("UTF-16LE", charset);
139 last_charset = charset;
142 /* internal convert to charset - placing result in buf1 */
143 ptr_in = (const char *)inbuf;
144 ptr_out = (char *)buf1;
145 size_in1 = size;
146 outsize1 = sizeof(buf1);
148 memset(ptr_out, 0, outsize1);
149 errno = 0;
150 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
151 errno1 = errno;
153 /* system convert to charset - placing result in buf2 */
154 ptr_in = (const char *)inbuf;
155 ptr_out = (char *)buf2;
156 size_in2 = size;
157 outsize2 = sizeof(buf2);
159 memset(ptr_out, 0, outsize2);
160 errno = 0;
161 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
162 errno2 = errno;
164 len1 = sizeof(buf1) - outsize1;
165 len2 = sizeof(buf2) - outsize2;
167 /* codepoints above 1M are not interesting for now */
168 if (len2 > len1 &&
169 memcmp(buf1, buf2, len1) == 0 &&
170 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
171 return true;
173 if (len1 > len2 &&
174 memcmp(buf1, buf2, len2) == 0 &&
175 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
176 return true;
179 torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
181 if (errno1 != errno2) {
182 show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
183 show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
184 torture_fail(test, talloc_asprintf(test,
185 "e1=%s e2=%s", strerror(errno1), strerror(errno2)));
188 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
190 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
192 if (len1 != len2 ||
193 memcmp(buf1, buf2, len1) != 0) {
194 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
195 show_buf(" IN1:", inbuf, size-size_in1);
196 show_buf(" IN2:", inbuf, size-size_in2);
197 show_buf("OUT1:", buf1, len1);
198 show_buf("OUT2:", buf2, len2);
199 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
200 torture_comment(test, "next codepoint is %u",
201 get_codepoint((char *)(buf2+len1), len2-len1, charset));
203 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
204 torture_comment(test, "next codepoint is %u",
205 get_codepoint((char *)(buf1+len2),len1-len2, charset));
208 torture_fail(test, "failed");
211 /* convert back to UTF-16, putting result in buf3 */
212 size = size - size_in1;
213 ptr_in = (const char *)buf1;
214 ptr_out = (char *)buf3;
215 size_in3 = len1;
216 outsize3 = sizeof(buf3);
218 memset(ptr_out, 0, outsize3);
219 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
221 /* we only internally support the first 1M codepoints */
222 if (outsize3 != sizeof(buf3) - size &&
223 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
224 size - (sizeof(buf3) - outsize3),
225 "UTF-16LE") >= (1<<20)) {
226 return true;
229 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
230 "pull failed - %s", strerror(errno)));
232 if (strncmp(charset, "UTF", 3) != 0) {
233 /* don't expect perfect mappings for non UTF charsets */
234 return true;
238 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
239 "wrong outsize3");
241 if (memcmp(buf3, inbuf, size) != 0) {
242 torture_comment(test, "pull bytes mismatch:");
243 show_buf("inbuf", inbuf, size);
244 show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
245 torture_fail(test, "");
246 torture_comment(test, "next codepoint is %u\n",
247 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
248 size - (sizeof(buf3) - outsize3),
249 "UTF-16LE"));
252 return true;
257 test the push_codepoint() and next_codepoint() functions for a given
258 codepoint
260 static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
262 uint8_t buf[10];
263 size_t size, size2;
264 codepoint_t c;
266 size = push_codepoint((char *)buf, codepoint);
267 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
268 "Invalid Codepoint range");
270 if (size == -1) return true;
272 buf[size] = random();
273 buf[size+1] = random();
274 buf[size+2] = random();
275 buf[size+3] = random();
277 c = next_codepoint((char *)buf, &size2);
279 torture_assert(tctx, c == codepoint,
280 talloc_asprintf(tctx,
281 "next_codepoint(%u) failed - gave %u", codepoint, c));
283 torture_assert(tctx, size2 == size,
284 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
285 codepoint, (int)size2, (int)size));
287 return true;
290 static bool test_next_codepoint(struct torture_context *tctx)
292 unsigned int codepoint;
293 for (codepoint=0;codepoint<(1<<20);codepoint++) {
294 if (!test_codepoint(tctx, codepoint))
295 return false;
297 return true;
300 static bool test_first_1m(struct torture_context *tctx)
302 unsigned int codepoint;
303 size_t size;
304 unsigned char inbuf[1000];
306 for (codepoint=0;codepoint<(1<<20);codepoint++) {
307 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
308 continue;
311 if (codepoint % 1000 == 0) {
312 if (!torture_setting_bool(tctx, "progress", True)) {
313 torture_comment(tctx, "codepoint=%u \r", codepoint);
317 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
318 return false;
320 return true;
323 static bool test_random_5m(struct torture_context *tctx)
325 unsigned char inbuf[1000];
326 unsigned int i;
327 for (i=0;i<500000;i++) {
328 size_t size;
329 unsigned int c;
331 if (i % 1000 == 0) {
332 if (!torture_setting_bool(tctx, "progress", true)) {
333 torture_comment(tctx, "i=%u \r", i);
337 size = random() % 100;
338 for (c=0;c<size;c++) {
339 if (random() % 100 < 80) {
340 inbuf[c] = random() % 128;
341 } else {
342 inbuf[c] = random();
344 if (random() % 10 == 0) {
345 inbuf[c] |= 0xd8;
347 if (random() % 10 == 0) {
348 inbuf[c] |= 0xdc;
351 if (!test_buffer(tctx, inbuf, size, "UTF-8"))
352 return false;
354 if (!test_buffer(tctx, inbuf, size, "CP850"))
355 return false;
357 return true;
360 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
362 static iconv_t cd;
363 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
365 if (!lp_parm_bool(-1, "iconv", "native", True)) {
366 printf("system iconv disabled - skipping test\n");
367 return NULL;
370 cd = iconv_open("UTF-16LE", "UCS-4LE");
371 if (cd == (iconv_t)-1) {
372 printf("unable to test - system iconv library does not support UTF-16LE -> UCS-4LE\n");
373 return NULL;
375 iconv_close(cd);
377 srandom(time(NULL));
378 torture_suite_add_simple_test(suite, "next_codepoint()",
379 test_next_codepoint);
381 torture_suite_add_simple_test(suite, "first 1M codepoints",
382 test_first_1m);
384 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
385 test_random_5m);
386 return suite;
389 #else
391 struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
393 printf("No native iconv library - can't run iconv test\n");
394 return NULL;
397 #endif