[rx] add missing project file generator helper files.
[mono-project.git] / eglib / test / utf8.c
bloba924902c99f60a3aa0244a2446d4a056150b41e4
1 #include <stdlib.h>
3 #include "test.h"
5 /*
6 * g_utf16_to_utf8
7 */
9 glong
10 compare_strings_utf8_pos (const gchar *expected, const gchar *actual, glong size)
12 int i;
13 for (i = 0; i < size; i++)
14 if (expected [i] != actual [i])
15 return i;
16 return -1;
19 RESULT
20 compare_strings_utf8_RESULT (const gchar *expected, const gchar *actual, glong size)
22 glong ret;
24 ret = compare_strings_utf8_pos (expected, actual, size);
25 if (ret < 0)
26 return OK;
27 return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d\n", expected, actual, ret);
30 void
31 gchar_to_gunichar2 (gunichar2 ret[], const gchar *src)
33 int i;
35 for (i = 0; src [i]; i++)
36 ret [i] = src [i];
37 ret [i] = 0;
40 RESULT
41 compare_utf16_to_utf8_explicit (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out, glong size_spec)
43 GError *error;
44 gchar* ret;
45 RESULT result;
46 glong in_read, out_read;
48 result = NULL;
50 error = NULL;
51 ret = g_utf16_to_utf8 (utf16, size_spec, &in_read, &out_read, &error);
52 if (error) {
53 result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
54 g_error_free (error);
55 if (ret)
56 g_free (ret);
57 return result;
59 if (in_read != len_in)
60 result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
61 else if (out_read != len_out)
62 result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
63 else
64 result = compare_strings_utf8_RESULT (expected, ret, len_out);
66 g_free (ret);
67 if (result)
68 return result;
70 return OK;
73 RESULT
74 compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out)
76 RESULT result;
78 result = compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, -1);
79 if (result != OK)
80 return result;
81 return compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, len_in);
84 RESULT
85 test_utf16_to_utf8 ()
87 const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81", *src5 = "\xF0\x90\x90\x80";
88 gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}, str5 [] = {0xD801, 0xDC00, 0};
89 RESULT result;
91 gchar_to_gunichar2 (str1, src1);
93 /* empty string */
94 result = compare_utf16_to_utf8 (src0, str0, 0, 0);
95 if (result != OK)
96 return result;
98 result = compare_utf16_to_utf8 (src1, str1, 5, 5);
99 if (result != OK)
100 return result;
101 result = compare_utf16_to_utf8 (src2, str2, 2, 4);
102 if (result != OK)
103 return result;
104 result = compare_utf16_to_utf8 (src3, str3, 1, 3);
105 if (result != OK)
106 return result;
107 result = compare_utf16_to_utf8 (src4, str4, 1, 3);
108 if (result != OK)
109 return result;
110 result = compare_utf16_to_utf8 (src5, str5, 2, 4);
111 if (result != OK)
112 return result;
114 return OK;
118 * g_utf8_to_utf16
121 glong
122 compare_strings_utf16_pos (const gunichar2 *expected, const gunichar2 *actual, glong size)
124 int i;
125 for (i = 0; i < size; i++)
126 if (expected [i] != actual [i])
127 return i;
128 return -1;
131 RESULT
132 compare_strings_utf16_RESULT (const gunichar2 *expected, const gunichar2 *actual, glong size)
134 glong ret;
136 ret = compare_strings_utf16_pos (expected, actual, size);
137 if (ret < 0)
138 return OK;
139 return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d ('%c' x '%c')\n", expected, actual, ret, expected [ret], actual [ret]);
142 #if !defined(EGLIB_TESTS)
143 #define eg_utf8_to_utf16_with_nuls g_utf8_to_utf16
144 #endif
146 RESULT
147 compare_utf8_to_utf16_explicit (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, glong size_spec, gboolean include_nuls)
149 GError *error;
150 gunichar2* ret;
151 RESULT result;
152 glong in_read, out_read;
154 result = NULL;
156 error = NULL;
157 if (include_nuls)
158 ret = eg_utf8_to_utf16_with_nuls (utf8, size_spec, &in_read, &out_read, &error);
159 else
160 ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &error);
162 if (error) {
163 result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
164 g_error_free (error);
165 if (ret)
166 g_free (ret);
167 return result;
169 if (in_read != len_in)
170 result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
171 else if (out_read != len_out)
172 result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
173 else
174 result = compare_strings_utf16_RESULT (expected, ret, len_out);
176 g_free (ret);
177 if (result)
178 return result;
180 return OK;
183 RESULT
184 compare_utf8_to_utf16_general (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, gboolean include_nuls)
186 RESULT result;
188 result = compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, -1, include_nuls);
189 if (result != OK)
190 return result;
191 return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in, include_nuls);
194 RESULT
195 compare_utf8_to_utf16 (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out)
197 return compare_utf8_to_utf16_general (expected, utf8, len_in, len_out, FALSE);
200 RESULT
201 compare_utf8_to_utf16_with_nuls (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out)
203 return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in, TRUE);
207 RESULT
208 test_utf8_seq ()
210 const gchar *src = "\xE5\xB9\xB4\x27";
211 glong in_read, out_read;
212 //gunichar2 expected [6];
213 GError *error = NULL;
214 gunichar2 *dst;
216 //printf ("got: %s\n", src);
217 dst = g_utf8_to_utf16 (src, (glong)strlen (src), &in_read, &out_read, &error);
218 if (error != NULL){
219 return error->message;
222 if (in_read != 4) {
223 return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
225 if (out_read != 2) {
226 return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
228 g_free (dst);
230 return OK;
233 RESULT
234 test_utf8_to_utf16 ()
236 const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81";
237 gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0};
238 RESULT result;
240 gchar_to_gunichar2 (str1, src1);
242 /* empty string */
243 result = compare_utf8_to_utf16 (str0, src0, 0, 0);
244 if (result != OK)
245 return result;
247 result = compare_utf8_to_utf16 (str1, src1, 5, 5);
248 if (result != OK)
249 return result;
250 result = compare_utf8_to_utf16 (str2, src2, 4, 2);
251 if (result != OK)
252 return result;
253 result = compare_utf8_to_utf16 (str3, src3, 3, 1);
254 if (result != OK)
255 return result;
256 result = compare_utf8_to_utf16 (str4, src4, 3, 1);
257 if (result != OK)
258 return result;
260 return OK;
263 RESULT
264 test_utf8_to_utf16_with_nuls ()
266 const gchar *src0 = "", *src1 = "AB\0DE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81";
267 gunichar2 str0 [] = {0}, str1 [] = {'A', 'B', 0, 'D', 'E', 0}, str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0};
268 RESULT result;
270 #if !defined(EGLIB_TESTS)
271 return OK;
272 #endif
274 /* implicit length is forbidden */
275 if (eg_utf8_to_utf16_with_nuls (src1, -1, NULL, NULL, NULL) != NULL)
276 return FAILED ("explicit nulls must fail with -1 length\n");
278 /* empty string */
279 result = compare_utf8_to_utf16_with_nuls (str0, src0, 0, 0);
280 if (result != OK)
281 return result;
283 result = compare_utf8_to_utf16_with_nuls (str1, src1, 5, 5);
284 if (result != OK)
285 return result;
286 result = compare_utf8_to_utf16_with_nuls (str2, src2, 4, 2);
287 if (result != OK)
288 return result;
289 result = compare_utf8_to_utf16_with_nuls (str3, src3, 3, 1);
290 if (result != OK)
291 return result;
292 result = compare_utf8_to_utf16_with_nuls (str4, src4, 3, 1);
293 if (result != OK)
294 return result;
296 return OK;
299 typedef struct {
300 char *content;
301 size_t length;
302 } convert_result_t;
304 RESULT
305 test_convert ()
307 static const char *charsets[] = { "UTF-8", "UTF-16LE", "UTF-16BE", "UTF-32LE", "UTF-32BE" };
308 gsize length, converted_length, n;
309 char *content, *converted, *path;
310 convert_result_t **expected;
311 GError *err = NULL;
312 const char *srcdir;
313 gboolean loaded;
314 guint i, j, k;
315 char c;
317 if (!(srcdir = getenv ("srcdir")) && !(srcdir = getenv ("PWD")))
318 return FAILED ("srcdir not defined!");
320 expected = g_malloc (sizeof (convert_result_t *) * G_N_ELEMENTS (charsets));
322 /* first load all our test samples... */
323 for (i = 0; i < G_N_ELEMENTS (charsets); i++) {
324 path = g_strdup_printf ("%s%c%s.txt", srcdir, G_DIR_SEPARATOR, charsets[i]);
325 loaded = g_file_get_contents (path, &content, &length, &err);
326 g_free (path);
328 if (!loaded) {
329 for (j = 0; j < i; j++) {
330 g_free (expected[j]->content);
331 g_free (expected[j]);
334 g_free (expected);
336 return FAILED ("Failed to load content for %s: %s", charsets[i], err->message);
339 expected[i] = g_malloc (sizeof (convert_result_t));
340 expected[i]->content = content;
341 expected[i]->length = length;
344 /* test conversion from every charset to every other charset */
345 for (i = 0; i < G_N_ELEMENTS (charsets); i++) {
346 for (j = 0; j < G_N_ELEMENTS (charsets); j++) {
347 converted = g_convert (expected[i]->content, expected[i]->length, charsets[j],
348 charsets[i], NULL, &converted_length, NULL);
350 if (converted == NULL) {
351 for (k = 0; k < G_N_ELEMENTS (charsets); k++) {
352 g_free (expected[k]->content);
353 g_free (expected[k]);
356 g_free (expected);
358 return FAILED ("Failed to convert from %s to %s: NULL", charsets[i], charsets[j]);
361 if (converted_length != expected[j]->length) {
362 length = expected[j]->length;
364 for (k = 0; k < G_N_ELEMENTS (charsets); k++) {
365 g_free (expected[k]->content);
366 g_free (expected[k]);
369 g_free (converted);
370 g_free (expected);
372 return FAILED ("Failed to convert from %s to %s: expected %u bytes, got %u",
373 charsets[i], charsets[j], length, converted_length);
376 for (n = 0; n < converted_length; n++) {
377 if (converted[n] != expected[j]->content[n]) {
378 c = expected[j]->content[n];
380 for (k = 0; k < G_N_ELEMENTS (charsets); k++) {
381 g_free (expected[k]->content);
382 g_free (expected[k]);
385 g_free (converted);
386 g_free (expected);
388 return FAILED ("Failed to convert from %s to %s: expected 0x%x at offset %u, got 0x%x",
389 charsets[i], charsets[j], c, n, converted[n]);
393 g_free (converted);
397 for (k = 0; k < G_N_ELEMENTS (charsets); k++) {
398 g_free (expected[k]->content);
399 g_free (expected[k]);
402 g_free (expected);
404 return OK;
408 RESULT
409 test_xdigit ()
411 static char test_chars[] = {
412 '0', '1', '2', '3', '4',
413 '5', '6', '7', '8', '9',
414 'a', 'b', 'c', 'd', 'e', 'f', 'g',
415 'A', 'B', 'C', 'D', 'E', 'F', 'G'};
416 static gint32 test_values[] = {
417 0, 1, 2, 3, 4,
418 5, 6, 7, 8, 9,
419 10, 11, 12, 13, 14, 15, -1,
420 10, 11, 12, 13, 14, 15, -1};
422 int i =0;
424 for (i = 0; i < sizeof(test_chars); i++)
425 if (g_unichar_xdigit_value ((gunichar)test_chars[i]) != test_values[i])
426 return FAILED("Incorrect value %d at index %d", test_values[i], i);
428 return OK;
431 static RESULT
432 ucs4_to_utf16_check_result (const gunichar2 *result_str, const gunichar2 *expected_str,
433 glong result_items_read, glong expected_items_read,
434 glong result_items_written, glong expected_items_written,
435 GError* result_error, gboolean expect_error)
437 glong i;
438 if (result_items_read != expected_items_read)
439 return FAILED("Incorrect number of items read; expected %d, got %d", expected_items_read, result_items_read);
440 if (result_items_written != expected_items_written)
441 return FAILED("Incorrect number of items written; expected %d, got %d", expected_items_written, result_items_written);
442 if (result_error && !expect_error)
443 return FAILED("There should not be an error code.");
444 if (!result_error && expect_error)
445 return FAILED("Unexpected error object.");
446 if (expect_error && result_str)
447 return FAILED("NULL should be returned when an error occurs.");
448 if (!expect_error && !result_str)
449 return FAILED("When no error occurs NULL should not be returned.");
450 for (i=0; i<expected_items_written;i++) {
451 if (result_str [i] != expected_str [i])
452 return FAILED("Incorrect value %d at index %d", result_str [i], i);
454 if (result_str && result_str[expected_items_written] != '\0')
455 return FAILED("Null termination not found at the end of the string.");
457 return OK;
460 RESULT
461 test_ucs4_to_utf16 ()
463 static gunichar str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
464 static gunichar2 exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
465 static gunichar str2[3] = {'h',0x80000000,'\0'};
466 static gunichar2 exp2[2] = {'h','\0'};
467 static gunichar str3[3] = {'h',0xDA00,'\0'};
468 static gunichar str4[3] = {'h',0x10FFFF,'\0'};
469 static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'};
470 static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'};
471 static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'};
472 static gunichar str6[2] = {0x10400, '\0'};
473 static gunichar2 exp6[3] = {0xD801, 0xDC00, '\0'};
474 static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2};
475 gunichar2* res;
476 glong items_read, items_written, current_write_index;
477 GError* err=0;
478 RESULT check_result;
479 glong i;
481 res = g_ucs4_to_utf16 (str1, 12, &items_read, &items_written, &err);
482 check_result = ucs4_to_utf16_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
483 if (check_result) return check_result;
484 g_free (res);
486 items_read = items_written = 0;
487 res = g_ucs4_to_utf16 (str2, 0, &items_read, &items_written, &err);
488 check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
489 if (check_result) return check_result;
490 g_free (res);
492 items_read = items_written = 0;
493 res = g_ucs4_to_utf16 (str2, 1, &items_read, &items_written, &err);
494 check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
495 if (check_result) return check_result;
496 g_free (res);
498 items_read = items_written = 0;
499 res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err);
500 check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
501 g_free (res);
502 if (check_result) return check_result;
504 items_read = items_written = 0;
505 err = 0;
506 res = g_ucs4_to_utf16 (str3, 2, &items_read, &items_written, &err);
507 check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
508 if (check_result) return check_result;
509 g_free (res);
511 items_read = items_written = 0;
512 err = 0;
513 res = g_ucs4_to_utf16 (str4, 5, &items_read, &items_written, &err);
514 check_result = ucs4_to_utf16_check_result (res, exp4, items_read, 2, items_written, 3, err, FALSE);
515 if (check_result) return check_result;
516 g_free (res);
518 // This loop tests the bounds of the conversion algorithm
519 current_write_index = 0;
520 for (i=0;i<6;i++) {
521 items_read = items_written = 0;
522 err = 0;
523 res = g_ucs4_to_utf16 (&str5[i], 1, &items_read, &items_written, &err);
524 check_result = ucs4_to_utf16_check_result (res, &exp5[current_write_index],
525 items_read, read_write[i*2], items_written, read_write[(i*2)+1], err, !read_write[(i*2)+1]);
526 if (check_result) return check_result;
527 g_free (res);
528 current_write_index += items_written;
531 items_read = items_written = 0;
532 err = 0;
533 res = g_ucs4_to_utf16 (str6, 1, &items_read, &items_written, &err);
534 check_result = ucs4_to_utf16_check_result (res, exp6, items_read, 1, items_written, 2, err, FALSE);
535 if (check_result) return check_result;
536 g_free (res);
538 return OK;
541 static RESULT
542 utf16_to_ucs4_check_result (const gunichar *result_str, const gunichar *expected_str,
543 glong result_items_read, glong expected_items_read,
544 glong result_items_written, glong expected_items_written,
545 GError* result_error, gboolean expect_error)
547 glong i;
548 if (result_items_read != expected_items_read)
549 return FAILED("Incorrect number of items read; expected %d, got %d", expected_items_read, result_items_read);
550 if (result_items_written != expected_items_written)
551 return FAILED("Incorrect number of items written; expected %d, got %d", expected_items_written, result_items_written);
552 if (result_error && !expect_error)
553 return FAILED("There should not be an error code.");
554 if (!result_error && expect_error)
555 return FAILED("Unexpected error object.");
556 if (expect_error && result_str)
557 return FAILED("NULL should be returned when an error occurs.");
558 if (!expect_error && !result_str)
559 return FAILED("When no error occurs NULL should not be returned.");
560 for (i=0; i<expected_items_written;i++) {
561 if (result_str [i] != expected_str [i])
562 return FAILED("Incorrect value %d at index %d", result_str [i], i);
564 if (result_str && result_str[expected_items_written] != '\0')
565 return FAILED("Null termination not found at the end of the string.");
567 return OK;
570 RESULT
571 test_utf16_to_ucs4 ()
573 static gunichar2 str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
574 static gunichar exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
575 static gunichar2 str2[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'};
576 static gunichar exp2[3] = {'H',0x00010001,'\0'};
577 static gunichar2 str3[4] = {'H', 0xDC00 ,'l','\0'};
578 static gunichar exp3[2] = {'H','\0'};
579 static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
580 0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
581 static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
582 static gunichar2 str5[3] = {0xD801, 0xDC00, 0};
583 static gunichar exp5[2] = {0x10400, 0};
584 static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
585 gunichar* res;
586 glong items_read, items_written, current_read_index,current_write_index;
587 GError* err=0;
588 RESULT check_result;
589 glong i;
591 res = g_utf16_to_ucs4 (str1, 12, &items_read, &items_written, &err);
592 check_result = utf16_to_ucs4_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
593 if (check_result) return check_result;
594 g_free (res);
596 items_read = items_written = 0;
597 res = g_utf16_to_ucs4 (str2, 0, &items_read, &items_written, &err);
598 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
599 if (check_result) return check_result;
600 g_free (res);
602 items_read = items_written = 0;
603 res = g_utf16_to_ucs4 (str2, 1, &items_read, &items_written, &err);
604 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
605 if (check_result) return check_result;
606 g_free (res);
608 items_read = items_written = 0;
609 res = g_utf16_to_ucs4 (str2, 2, &items_read, &items_written, &err);
610 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
611 if (check_result) return check_result;
612 g_free (res);
614 items_read = items_written = 0;
615 res = g_utf16_to_ucs4 (str2, 3, &items_read, &items_written, &err);
616 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
617 if (check_result) return check_result;
618 g_free (res);
620 items_read = items_written = 0;
621 res = g_utf16_to_ucs4 (str2, 4, &items_read, &items_written, &err);
622 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
623 if (check_result) return check_result;
624 g_free (res);
626 items_read = items_written = 0;
627 res = g_utf16_to_ucs4 (str2, 5, &items_read, &items_written, &err);
628 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 4, items_written, 0, err, TRUE);
629 if (check_result) return check_result;
630 g_free (res);
632 items_read = items_written = 0;
633 err = 0;
634 res = g_utf16_to_ucs4 (str3, 5, &items_read, &items_written, &err);
635 check_result = utf16_to_ucs4_check_result (res, exp3, items_read, 1, items_written, 0, err, TRUE);
636 if (check_result) return check_result;
637 g_free (res);
639 // This loop tests the bounds of the conversion algorithm
640 current_read_index = current_write_index = 0;
641 for (i=0;i<11;i++) {
642 items_read = items_written = 0;
643 err = 0;
644 res = g_utf16_to_ucs4 (&str4[current_read_index], read_write[i*3], &items_read, &items_written, &err);
645 check_result = utf16_to_ucs4_check_result (res, &exp4[current_write_index], items_read,
646 read_write[(i*3)+1], items_written, read_write[(i*3)+2], err,
647 !read_write[(i*3)+2]);
648 if (check_result) return check_result;
649 g_free (res);
650 current_read_index += read_write[i*3];
651 current_write_index += items_written;
654 items_read = items_written = 0;
655 err = 0;
656 res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err);
657 check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE);
658 if (check_result) return check_result;
659 g_free (res);
661 return OK;
663 RESULT
664 test_utf8_strlen ()
666 gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'};//Valid, len = 5
667 gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'};//Valid, len = 5
668 gchar word3 [] = {'h','e',0xC2, 0x82,0x45,'\0'}; //Valid, len = 4
669 gchar word4 [] = {0x62,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
671 glong len = 0;
673 //Test word1
674 len = g_utf8_strlen (word1,-1);
675 if (len != 5)
676 return FAILED ("Word1 expected length of 5, but was %i", len);
677 //Do tests with different values for max parameter.
678 len = g_utf8_strlen (word1,1);
679 if (len != 0)
680 return FAILED ("Word1, max = 1, expected length of 0, but was %i", len);
681 len = g_utf8_strlen (word1,2);
682 if (len != 1)
683 return FAILED ("Word1, max = 1, expected length of 1, but was %i", len);
684 len = g_utf8_strlen (word1,3);
685 if (len != 2)
686 return FAILED ("Word1, max = 2, expected length of 2, but was %i", len);
688 //Test word2
689 len = g_utf8_strlen (word2,-1);
690 if (len != 5)
691 return FAILED ("Word2 expected length of 5, but was %i", len);
693 //Test word3
694 len = g_utf8_strlen (word3,-1);
695 if (len != 4)
696 return FAILED ("Word3 expected length of 4, but was %i", len);
698 //Test word4
699 len = g_utf8_strlen (word4,-1);
700 if (len != 5)
701 return FAILED ("Word4 expected length of 5, but was %i", len);
703 //Test null case
704 len = g_utf8_strlen(NULL,0);
705 if (len != 0)
706 return FAILED ("Expected passing null to result in a length of 0");
707 return OK;
710 RESULT
711 test_utf8_get_char()
713 gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
715 gunichar value = g_utf8_get_char (&word1 [0]);
716 if (value != 0x82UL)
717 return FAILED ("Expected value of 0x82, but was %x", value);
718 value = g_utf8_get_char (&word1 [2]);
719 if (value != 0x45UL)
720 return FAILED ("Expected value of 0x45, but was %x", value);
721 value = g_utf8_get_char (&word1 [3]);
722 if (value != 0x1043UL)
723 return FAILED ("Expected value of 0x1043, but was %x", value);
724 value = g_utf8_get_char (&word1 [6]);
725 if (value != 0x58UL)
726 return FAILED ("Expected value of 0x58, but was %x", value);
727 value = g_utf8_get_char (&word1 [7]);
728 if (value != 0x42082UL)
729 return FAILED ("Expected value of 0x42082, but was %x", value);
731 return OK;
734 RESULT
735 test_utf8_next_char()
737 gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
738 gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
739 gchar word1ExpectedValues [] = {0xC2, 0x45,0xE1, 0x58, 0xF1};
740 gchar word2ExpectedValues [] = {0xF1, 0xC2, 0x45, 0xE1, 0x58};
742 gchar* ptr = word1;
743 gint count = 0;
744 //Test word1
745 while (*ptr != 0) {
746 if (count > 4)
747 return FAILED ("Word1 has gone past its expected length");
748 if (*ptr != word1ExpectedValues[count])
749 return FAILED ("Word1 has an incorrect next_char at index %i", count);
750 ptr = g_utf8_next_char (ptr);
751 count++;
754 //Test word2
755 count = 0;
756 ptr = word2;
757 while (*ptr != 0) {
758 if (count > 4)
759 return FAILED ("Word2 has gone past its expected length");
760 if (*ptr != word2ExpectedValues[count])
761 return FAILED ("Word2 has an incorrect next_char at index %i", count);
762 ptr = g_utf8_next_char (ptr);
763 count++;
766 return OK;
769 RESULT
770 test_utf8_validate()
772 gchar invalidWord1 [] = {0xC3, 0x82, 0xC1,0x90,'\0'}; //Invalid, 1nd oct Can't be 0xC0 or 0xC1
773 gchar invalidWord2 [] = {0xC1, 0x89, 0x60, '\0'}; //Invalid, 1st oct can not be 0xC1
774 gchar invalidWord3 [] = {0xC2, 0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Invalid, oct after 0xC2 must be > 0x80
776 gchar validWord1 [] = {0xC2, 0x82, 0xC3,0xA0,'\0'}; //Valid
777 gchar validWord2 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid
779 const gchar* end;
780 gboolean retVal = g_utf8_validate (invalidWord1, -1, &end);
781 if (retVal != FALSE)
782 return FAILED ("Expected invalidWord1 to be invalid");
783 if (end != &invalidWord1 [2])
784 return FAILED ("Expected end parameter to be pointing to invalidWord1[2]");
786 end = NULL;
787 retVal = g_utf8_validate (invalidWord2, -1, &end);
788 if (retVal != FALSE)
789 return FAILED ("Expected invalidWord2 to be invalid");
790 if (end != &invalidWord2 [0])
791 return FAILED ("Expected end parameter to be pointing to invalidWord2[0]");
793 end = NULL;
794 retVal = g_utf8_validate (invalidWord3, -1, &end);
795 if (retVal != FALSE)
796 return FAILED ("Expected invalidWord3 to be invalid");
797 if (end != &invalidWord3 [0])
798 return FAILED ("Expected end parameter to be pointing to invalidWord3[1]");
800 end = NULL;
801 retVal = g_utf8_validate (validWord1, -1, &end);
802 if (retVal != TRUE)
803 return FAILED ("Expected validWord1 to be valid");
804 if (end != &validWord1 [4])
805 return FAILED ("Expected end parameter to be pointing to validWord1[4]");
807 end = NULL;
808 retVal = g_utf8_validate (validWord2, -1, &end);
809 if (retVal != TRUE)
810 return FAILED ("Expected validWord2 to be valid");
811 if (end != &validWord2 [11])
812 return FAILED ("Expected end parameter to be pointing to validWord2[11]");
813 return OK;
816 glong
817 utf8_byteslen (const gchar *src)
819 int i = 0;
820 do {
821 if (src [i] == '\0')
822 return i;
823 i++;
824 } while (TRUE);
827 RESULT
828 test_utf8_strcase_each (const gchar *src, const gchar *expected, gboolean strup)
830 gchar *tmp;
831 glong len, len2;
832 RESULT r;
834 len = utf8_byteslen (src);
835 tmp = strup ? g_utf8_strup (src, len) : g_utf8_strdown (src, len);
836 len2 = utf8_byteslen (tmp);
837 r = compare_strings_utf8_RESULT (expected, tmp, len < len2 ? len2 : len);
838 g_free (tmp);
839 return r;
842 RESULT
843 test_utf8_strup_each (const gchar *src, const gchar *expected)
845 return test_utf8_strcase_each (src, expected, TRUE);
848 RESULT
849 test_utf8_strdown_each (const gchar *src, const gchar *expected)
851 return test_utf8_strcase_each (src, expected, FALSE);
855 * g_utf8_strup
857 RESULT
858 test_utf8_strup ()
860 RESULT r;
862 if ((r = test_utf8_strup_each ("aBc", "ABC")) != OK)
863 return r;
864 if ((r = test_utf8_strup_each ("x86-64", "X86-64")) != OK)
865 return r;
866 // U+3B1 U+392 -> U+391 U+392
867 if ((r = test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK)
868 return r;
869 // U+FF21 -> U+FF21
870 if ((r = test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK)
871 return r;
872 // U+FF41 -> U+FF21
873 if ((r = test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK)
874 return r;
875 // U+10428 -> U+10400
876 if ((r = test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK)
877 return r;
879 return OK;
883 * g_utf8_strdown
885 RESULT
886 test_utf8_strdown ()
888 RESULT r;
890 if ((r = test_utf8_strdown_each ("aBc", "abc")) != OK)
891 return r;
892 if ((r = test_utf8_strdown_each ("X86-64", "x86-64")) != OK)
893 return r;
894 // U+391 U+3B2 -> U+3B1 U+3B2
895 if ((r = test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK)
896 return r;
898 // U+FF41 -> U+FF41
899 if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK)
900 return r;
901 // U+FF21 -> U+FF41
902 if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK)
903 return r;
904 // U+10400 -> U+10428
905 if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK)
906 return r;
908 return OK;
912 * test initialization
915 static Test utf8_tests [] = {
916 {"g_utf16_to_utf8", test_utf16_to_utf8},
917 {"g_utf8_to_utf16", test_utf8_to_utf16},
918 {"g_utf8_to_utf16_with_nuls", test_utf8_to_utf16_with_nuls},
919 {"g_utf8_seq", test_utf8_seq},
920 {"g_convert", test_convert },
921 {"g_unichar_xdigit_value", test_xdigit },
922 {"g_ucs4_to_utf16", test_ucs4_to_utf16 },
923 {"g_utf16_to_ucs4", test_utf16_to_ucs4 },
924 {"g_utf8_strlen", test_utf8_strlen },
925 {"g_utf8_get_char", test_utf8_get_char },
926 {"g_utf8_next_char", test_utf8_next_char },
927 {"g_utf8_validate", test_utf8_validate },
928 {"g_utf8_strup", test_utf8_strup},
929 {"g_utf8_strdown", test_utf8_strdown},
930 {NULL, NULL}
933 DEFINE_TEST_GROUP_INIT(utf8_tests_init, utf8_tests)