2010-05-14 Rodrigo Kumpera <rkumpera@novell.com>
[mono.git] / eglib / test / utf8.c
blobe7f04059d5b6670e7b5b802e838386bd26ef6c69
1 #include "test.h"
3 /*
4 * g_utf16_to_utf8
5 */
7 glong
8 compare_strings_utf8_pos (const gchar *expected, const gchar *actual, glong size)
10 int i;
11 for (i = 0; i < size; i++)
12 if (expected [i] != actual [i])
13 return i;
14 return -1;
17 RESULT
18 compare_strings_utf8_RESULT (const gchar *expected, const gchar *actual, glong size)
20 glong ret;
22 ret = compare_strings_utf8_pos (expected, actual, size);
23 if (ret < 0)
24 return OK;
25 return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d\n", expected, actual, ret);
28 void
29 gchar_to_gunichar2 (gunichar2 ret[], const gchar *src)
31 int i;
33 for (i = 0; src [i]; i++)
34 ret [i] = src [i];
35 ret [i] = 0;
38 RESULT
39 compare_utf16_to_utf8_explicit (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out, glong size_spec)
41 GError *error;
42 gchar* ret;
43 RESULT result;
44 glong in_read, out_read;
46 result = NULL;
48 error = NULL;
49 ret = g_utf16_to_utf8 (utf16, size_spec, &in_read, &out_read, &error);
50 if (error) {
51 result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
52 g_error_free (error);
53 if (ret)
54 g_free (ret);
55 return result;
57 if (in_read != len_in)
58 result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
59 else if (out_read != len_out)
60 result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
61 else
62 result = compare_strings_utf8_RESULT (expected, ret, len_out);
64 g_free (ret);
65 if (result)
66 return result;
68 return OK;
71 RESULT
72 compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out)
74 RESULT result;
76 result = compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, -1);
77 if (result != OK)
78 return result;
79 return compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, len_in);
82 RESULT
83 test_utf16_to_utf8 ()
85 const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81", *src5 = "\xF0\x90\x90\x80";
86 gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}, str5 [] = {0xD801, 0xDC00, 0};
87 RESULT result;
89 gchar_to_gunichar2 (str1, src1);
91 /* empty string */
92 result = compare_utf16_to_utf8 (src0, str0, 0, 0);
93 if (result != OK)
94 return result;
96 result = compare_utf16_to_utf8 (src1, str1, 5, 5);
97 if (result != OK)
98 return result;
99 result = compare_utf16_to_utf8 (src2, str2, 2, 4);
100 if (result != OK)
101 return result;
102 result = compare_utf16_to_utf8 (src3, str3, 1, 3);
103 if (result != OK)
104 return result;
105 result = compare_utf16_to_utf8 (src4, str4, 1, 3);
106 if (result != OK)
107 return result;
108 result = compare_utf16_to_utf8 (src5, str5, 2, 4);
109 if (result != OK)
110 return result;
112 return OK;
116 * g_utf8_to_utf16
119 glong
120 compare_strings_utf16_pos (const gunichar2 *expected, const gunichar2 *actual, glong size)
122 int i;
123 for (i = 0; i < size; i++)
124 if (expected [i] != actual [i])
125 return i;
126 return -1;
129 RESULT
130 compare_strings_utf16_RESULT (const gunichar2 *expected, const gunichar2 *actual, glong size)
132 glong ret;
134 ret = compare_strings_utf16_pos (expected, actual, size);
135 if (ret < 0)
136 return OK;
137 return FAILED ("Incorrect output: expected '%s' but was '%s'\n", expected, actual);
140 RESULT
141 compare_utf8_to_utf16_explicit (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, glong size_spec)
143 GError *error;
144 gunichar2* ret;
145 RESULT result;
146 glong in_read, out_read;
148 result = NULL;
150 error = NULL;
151 ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &error);
152 if (error) {
153 result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
154 g_error_free (error);
155 if (ret)
156 g_free (ret);
157 return result;
159 if (in_read != len_in)
160 result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
161 else if (out_read != len_out)
162 result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
163 else
164 result = compare_strings_utf16_RESULT (expected, ret, len_out);
166 g_free (ret);
167 if (result)
168 return result;
170 return OK;
174 RESULT
175 compare_utf8_to_utf16 (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out)
177 RESULT result;
179 result = compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, -1);
180 if (result != OK)
181 return result;
182 return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in);
185 RESULT
186 test_utf8_seq ()
188 const gchar *src = "\xE5\xB9\xB4\x27";
189 glong in_read, out_read;
190 //gunichar2 expected [6];
191 GError *error = NULL;
192 gunichar2 *dst;
194 printf ("got: %s\n", src);
195 dst = g_utf8_to_utf16 (src, (glong)strlen (src), &in_read, &out_read, &error);
196 if (error != NULL){
197 return error->message;
200 if (in_read != 4) {
201 return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
203 if (out_read != 2) {
204 return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
206 g_free (dst);
208 return OK;
211 RESULT
212 test_utf8_to_utf16 ()
214 const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81";
215 gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0};
216 RESULT result;
218 gchar_to_gunichar2 (str1, src1);
220 /* empty string */
221 result = compare_utf8_to_utf16 (str0, src0, 0, 0);
222 if (result != OK)
223 return result;
225 result = compare_utf8_to_utf16 (str1, src1, 5, 5);
226 if (result != OK)
227 return result;
228 result = compare_utf8_to_utf16 (str2, src2, 4, 2);
229 if (result != OK)
230 return result;
231 result = compare_utf8_to_utf16 (str3, src3, 3, 1);
232 if (result != OK)
233 return result;
234 result = compare_utf8_to_utf16 (str4, src4, 3, 1);
235 if (result != OK)
236 return result;
238 return OK;
241 RESULT
242 test_convert ()
244 gsize n;
245 char *s = g_convert ("\242\241\243\242\241\243\242\241\243\242\241\243", -1, "UTF-8", "ISO-8859-1", NULL, &n, NULL);
246 guchar *u = (guchar *) s;
248 if (!s)
249 return FAILED ("Expected 24 bytes, got: NULL");
251 if (strlen (s) != 24)
252 return FAILED ("Expected 24 bytes, got: %d", strlen (s));
254 if (u [1] != 162 || u [2] != 194 ||
255 u [3] != 161 || u [4] != 194 ||
256 u [5] != 163 || u [6] != 194)
257 return FAILED ("Incorrect conversion");
259 g_free (s);
261 return OK;
265 RESULT
266 test_xdigit ()
268 static char test_chars[] = {
269 '0', '1', '2', '3', '4',
270 '5', '6', '7', '8', '9',
271 'a', 'b', 'c', 'd', 'e', 'f', 'g',
272 'A', 'B', 'C', 'D', 'E', 'F', 'G'};
273 static gint32 test_values[] = {
274 0, 1, 2, 3, 4,
275 5, 6, 7, 8, 9,
276 10, 11, 12, 13, 14, 15, -1,
277 10, 11, 12, 13, 14, 15, -1};
279 int i =0;
281 for (i = 0; i < sizeof(test_chars); i++)
282 if (g_unichar_xdigit_value ((gunichar)test_chars[i]) != test_values[i])
283 return FAILED("Incorrect value %d at index %d", test_values[i], i);
285 return OK;
288 static RESULT
289 ucs4_to_utf16_check_result (const gunichar2 *result_str, const gunichar2 *expected_str,
290 glong result_items_read, glong expected_items_read,
291 glong result_items_written, glong expected_items_written,
292 GError* result_error, gboolean expect_error)
294 glong i;
295 if (result_items_read != expected_items_read)
296 return FAILED("Incorrect number of items read %d", result_items_read);
297 if (result_items_written != expected_items_written)
298 return FAILED("Incorrect number of items written %d", result_items_written);
299 if (result_error && !expect_error)
300 return FAILED("There should not be an error code.");
301 if (!result_error && expect_error)
302 return FAILED("Unexpected error object.");
303 if (expect_error && result_str)
304 return FAILED("NULL should be returned when an error occurs.");
305 if (!expect_error && !result_str)
306 return FAILED("When no error occurs NULL should not be returned.");
307 for (i=0; i<expected_items_written;i++) {
308 if (result_str [i] != expected_str [i])
309 return FAILED("Incorrect value %d at index %d", result_str [i], i);
311 if (result_str && result_str[expected_items_written] != '\0')
312 return FAILED("Null termination not found at the end of the string.");
314 return OK;
317 RESULT
318 test_ucs4_to_utf16 ()
320 static gunichar str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
321 static gunichar2 exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
322 static gunichar str2[3] = {'h',0x80000000,'\0'};
323 static gunichar2 exp2[2] = {'h','\0'};
324 static gunichar str3[3] = {'h',0xDA00,'\0'};
325 static gunichar str4[3] = {'h',0x10FFFF,'\0'};
326 static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'};
327 static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'};
328 static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'};
329 static gunichar str6[2] = {0x10400, '\0'};
330 static gunichar2 exp6[3] = {0xD801, 0xDC00, '\0'};
331 static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2};
332 gunichar2* res;
333 glong items_read, items_written, current_write_index;
334 GError* err=0;
335 RESULT check_result;
336 glong i;
338 res = g_ucs4_to_utf16 (str1, 12, &items_read, &items_written, &err);
339 check_result = ucs4_to_utf16_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
340 if (check_result) return check_result;
341 g_free (res);
343 items_read = items_written = 0;
344 res = g_ucs4_to_utf16 (str2, 0, &items_read, &items_written, &err);
345 check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
346 if (check_result) return check_result;
347 g_free (res);
349 items_read = items_written = 0;
350 res = g_ucs4_to_utf16 (str2, 1, &items_read, &items_written, &err);
351 check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
352 if (check_result) return check_result;
353 g_free (res);
355 items_read = items_written = 0;
356 res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err);
357 check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
358 g_free (res);
359 if (check_result) return check_result;
361 items_read = items_written = 0;
362 err = 0;
363 res = g_ucs4_to_utf16 (str3, 2, &items_read, &items_written, &err);
364 check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
365 if (check_result) return check_result;
366 g_free (res);
368 items_read = items_written = 0;
369 err = 0;
370 res = g_ucs4_to_utf16 (str4, 5, &items_read, &items_written, &err);
371 check_result = ucs4_to_utf16_check_result (res, exp4, items_read, 2, items_written, 3, err, FALSE);
372 if (check_result) return check_result;
373 g_free (res);
375 // This loop tests the bounds of the conversion algorithm
376 current_write_index = 0;
377 for (i=0;i<6;i++) {
378 items_read = items_written = 0;
379 err = 0;
380 res = g_ucs4_to_utf16 (&str5[i], 1, &items_read, &items_written, &err);
381 check_result = ucs4_to_utf16_check_result (res, &exp5[current_write_index],
382 items_read, read_write[i*2], items_written, read_write[(i*2)+1], err, !read_write[(i*2)+1]);
383 if (check_result) return check_result;
384 g_free (res);
385 current_write_index += items_written;
388 items_read = items_written = 0;
389 err = 0;
390 res = g_ucs4_to_utf16 (str6, 1, &items_read, &items_written, &err);
391 check_result = ucs4_to_utf16_check_result (res, exp6, items_read, 1, items_written, 2, err, FALSE);
392 if (check_result) return check_result;
393 g_free (res);
395 return OK;
398 static RESULT
399 utf16_to_ucs4_check_result (const gunichar *result_str, const gunichar *expected_str,
400 glong result_items_read, glong expected_items_read,
401 glong result_items_written, glong expected_items_written,
402 GError* result_error, gboolean expect_error)
404 glong i;
405 if (result_items_read != expected_items_read)
406 return FAILED("Incorrect number of items read %d", result_items_read);
407 if (result_items_written != expected_items_written)
408 return FAILED("Incorrect number of items written %d", result_items_written);
409 if (result_error && !expect_error)
410 return FAILED("There should not be an error code.");
411 if (!result_error && expect_error)
412 return FAILED("Unexpected error object.");
413 if (expect_error && result_str)
414 return FAILED("NULL should be returned when an error occurs.");
415 if (!expect_error && !result_str)
416 return FAILED("When no error occurs NULL should not be returned.");
417 for (i=0; i<expected_items_written;i++) {
418 if (result_str [i] != expected_str [i])
419 return FAILED("Incorrect value %d at index %d", result_str [i], i);
421 if (result_str && result_str[expected_items_written] != '\0')
422 return FAILED("Null termination not found at the end of the string.");
424 return OK;
427 RESULT
428 test_utf16_to_ucs4 ()
430 static gunichar2 str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
431 static gunichar exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
432 static gunichar2 str2[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'};
433 static gunichar exp2[3] = {'H',0x00010001,'\0'};
434 static gunichar2 str3[4] = {'H', 0xDC00 ,'l','\0'};
435 static gunichar exp3[2] = {'H','\0'};
436 static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
437 0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
438 static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
439 static gunichar2 str5[3] = {0xD801, 0xDC00, 0};
440 static gunichar exp5[2] = {0x10400, 0};
441 static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
442 gunichar* res;
443 glong items_read, items_written, current_read_index,current_write_index;
444 GError* err=0;
445 RESULT check_result;
446 glong i;
448 res = g_utf16_to_ucs4 (str1, 12, &items_read, &items_written, &err);
449 check_result = utf16_to_ucs4_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
450 if (check_result) return check_result;
451 g_free (res);
453 items_read = items_written = 0;
454 res = g_utf16_to_ucs4 (str2, 0, &items_read, &items_written, &err);
455 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
456 if (check_result) return check_result;
457 g_free (res);
459 items_read = items_written = 0;
460 res = g_utf16_to_ucs4 (str2, 1, &items_read, &items_written, &err);
461 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
462 if (check_result) return check_result;
463 g_free (res);
465 items_read = items_written = 0;
466 res = g_utf16_to_ucs4 (str2, 2, &items_read, &items_written, &err);
467 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
468 if (check_result) return check_result;
469 g_free (res);
471 items_read = items_written = 0;
472 res = g_utf16_to_ucs4 (str2, 3, &items_read, &items_written, &err);
473 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
474 if (check_result) return check_result;
475 g_free (res);
477 items_read = items_written = 0;
478 res = g_utf16_to_ucs4 (str2, 4, &items_read, &items_written, &err);
479 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
480 if (check_result) return check_result;
481 g_free (res);
483 items_read = items_written = 0;
484 res = g_utf16_to_ucs4 (str2, 5, &items_read, &items_written, &err);
485 check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 4, items_written, 0, err, TRUE);
486 if (check_result) return check_result;
487 g_free (res);
489 items_read = items_written = 0;
490 err = 0;
491 res = g_utf16_to_ucs4 (str3, 5, &items_read, &items_written, &err);
492 check_result = utf16_to_ucs4_check_result (res, exp3, items_read, 1, items_written, 0, err, TRUE);
493 if (check_result) return check_result;
494 g_free (res);
496 // This loop tests the bounds of the conversion algorithm
497 current_read_index = current_write_index = 0;
498 for (i=0;i<11;i++) {
499 items_read = items_written = 0;
500 err = 0;
501 res = g_utf16_to_ucs4 (&str4[current_read_index], read_write[i*3], &items_read, &items_written, &err);
502 check_result = utf16_to_ucs4_check_result (res, &exp4[current_write_index], items_read,
503 read_write[(i*3)+1], items_written, read_write[(i*3)+2], err,
504 !read_write[(i*3)+2]);
505 if (check_result) return check_result;
506 g_free (res);
507 current_read_index += read_write[i*3];
508 current_write_index += items_written;
511 items_read = items_written = 0;
512 err = 0;
513 res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err);
514 check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE);
515 if (check_result) return check_result;
516 g_free (res);
518 return OK;
520 RESULT
521 test_utf8_strlen ()
523 gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'};//Valid, len = 5
524 gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'};//Valid, len = 5
525 gchar word3 [] = {'h','e',0xC2, 0x82,0x45,'\0'}; //Valid, len = 4
526 gchar word4 [] = {0x62,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
528 glong len = 0;
530 //Test word1
531 len = g_utf8_strlen (word1,-1);
532 if (len != 5)
533 return FAILED ("Word1 expected length of 5, but was %i", len);
534 //Do tests with different values for max parameter.
535 len = g_utf8_strlen (word1,1);
536 if (len != 0)
537 return FAILED ("Word1, max = 1, expected length of 0, but was %i", len);
538 len = g_utf8_strlen (word1,2);
539 if (len != 1)
540 return FAILED ("Word1, max = 1, expected length of 1, but was %i", len);
541 len = g_utf8_strlen (word1,3);
542 if (len != 2)
543 return FAILED ("Word1, max = 2, expected length of 2, but was %i", len);
545 //Test word2
546 len = g_utf8_strlen (word2,-1);
547 if (len != 5)
548 return FAILED ("Word2 expected length of 5, but was %i", len);
550 //Test word3
551 len = g_utf8_strlen (word3,-1);
552 if (len != 4)
553 return FAILED ("Word3 expected length of 4, but was %i", len);
555 //Test word4
556 len = g_utf8_strlen (word4,-1);
557 if (len != 5)
558 return FAILED ("Word4 expected length of 5, but was %i", len);
560 //Test null case
561 len = g_utf8_strlen(NULL,0);
562 if (len != 0)
563 return FAILED ("Expected passing null to result in a length of 0");
564 return OK;
567 RESULT
568 test_utf8_get_char()
570 gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
572 gunichar value = g_utf8_get_char (&word1 [0]);
573 if (value != 0x82UL)
574 return FAILED ("Expected value of 0x82, but was %x", value);
575 value = g_utf8_get_char (&word1 [2]);
576 if (value != 0x45UL)
577 return FAILED ("Expected value of 0x45, but was %x", value);
578 value = g_utf8_get_char (&word1 [3]);
579 if (value != 0x1043UL)
580 return FAILED ("Expected value of 0x1043, but was %x", value);
581 value = g_utf8_get_char (&word1 [6]);
582 if (value != 0x58UL)
583 return FAILED ("Expected value of 0x58, but was %x", value);
584 value = g_utf8_get_char (&word1 [7]);
585 if (value != 0x42082UL)
586 return FAILED ("Expected value of 0x42082, but was %x", value);
588 return OK;
591 RESULT
592 test_utf8_next_char()
594 gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
595 gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
596 gchar word1ExpectedValues [] = {0xC2, 0x45,0xE1, 0x58, 0xF1};
597 gchar word2ExpectedValues [] = {0xF1, 0xC2, 0x45, 0xE1, 0x58};
599 gchar* ptr = word1;
600 gint count = 0;
601 //Test word1
602 while (*ptr != 0) {
603 if (count > 4)
604 return FAILED ("Word1 has gone past its expected length");
605 if (*ptr != word1ExpectedValues[count])
606 return FAILED ("Word1 has an incorrect next_char at index %i", count);
607 ptr = g_utf8_next_char (ptr);
608 count++;
611 //Test word2
612 count = 0;
613 ptr = word2;
614 while (*ptr != 0) {
615 if (count > 4)
616 return FAILED ("Word2 has gone past its expected length");
617 if (*ptr != word2ExpectedValues[count])
618 return FAILED ("Word2 has an incorrect next_char at index %i", count);
619 ptr = g_utf8_next_char (ptr);
620 count++;
623 return OK;
626 RESULT
627 test_utf8_validate()
629 gchar invalidWord1 [] = {0xC3, 0x82, 0xC1,0x90,'\0'}; //Invalid, 1nd oct Can't be 0xC0 or 0xC1
630 gchar invalidWord2 [] = {0xC1, 0x89, 0x60, '\0'}; //Invalid, 1st oct can not be 0xC1
631 gchar invalidWord3 [] = {0xC2, 0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Invalid, oct after 0xC2 must be > 0x80
633 gchar validWord1 [] = {0xC2, 0x82, 0xC3,0xA0,'\0'}; //Valid
634 gchar validWord2 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid
636 const gchar* end;
637 gboolean retVal = g_utf8_validate (invalidWord1, -1, &end);
638 if (retVal != FALSE)
639 return FAILED ("Expected invalidWord1 to be invalid");
640 if (end != &invalidWord1 [2])
641 return FAILED ("Expected end parameter to be pointing to invalidWord1[2]");
643 end = NULL;
644 retVal = g_utf8_validate (invalidWord2, -1, &end);
645 if (retVal != FALSE)
646 return FAILED ("Expected invalidWord2 to be invalid");
647 if (end != &invalidWord2 [0])
648 return FAILED ("Expected end parameter to be pointing to invalidWord2[0]");
650 end = NULL;
651 retVal = g_utf8_validate (invalidWord3, -1, &end);
652 if (retVal != FALSE)
653 return FAILED ("Expected invalidWord3 to be invalid");
654 if (end != &invalidWord3 [0])
655 return FAILED ("Expected end parameter to be pointing to invalidWord3[1]");
657 end = NULL;
658 retVal = g_utf8_validate (validWord1, -1, &end);
659 if (retVal != TRUE)
660 return FAILED ("Expected validWord1 to be valid");
661 if (end != &validWord1 [4])
662 return FAILED ("Expected end parameter to be pointing to validWord1[4]");
664 end = NULL;
665 retVal = g_utf8_validate (validWord2, -1, &end);
666 if (retVal != TRUE)
667 return FAILED ("Expected validWord2 to be valid");
668 if (end != &validWord2 [11])
669 return FAILED ("Expected end parameter to be pointing to validWord2[11]");
670 return OK;
673 glong
674 utf8_byteslen (const gchar *src)
676 int i = 0;
677 do {
678 if (src [i] == '\0')
679 return i;
680 i++;
681 } while (TRUE);
684 RESULT
685 test_utf8_strcase_each (const gchar *src, const gchar *expected, gboolean strup)
687 gchar *tmp;
688 glong len, len2;
689 RESULT r;
691 len = utf8_byteslen (src);
692 tmp = strup ? g_utf8_strup (src, len) : g_utf8_strdown (src, len);
693 len2 = utf8_byteslen (tmp);
694 r = compare_strings_utf8_RESULT (expected, tmp, len < len2 ? len2 : len);
695 g_free (tmp);
696 return r;
699 RESULT
700 test_utf8_strup_each (const gchar *src, const gchar *expected)
702 return test_utf8_strcase_each (src, expected, TRUE);
705 RESULT
706 test_utf8_strdown_each (const gchar *src, const gchar *expected)
708 return test_utf8_strcase_each (src, expected, FALSE);
712 * g_utf8_strup
714 RESULT
715 test_utf8_strup ()
717 RESULT r;
719 if ((r = test_utf8_strup_each ("aBc", "ABC")) != OK)
720 return r;
721 if ((r = test_utf8_strup_each ("x86-64", "X86-64")) != OK)
722 return r;
723 // U+3B1 U+392 -> U+391 U+392
724 if ((r = test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK)
725 return r;
726 // U+FF21 -> U+FF21
727 if ((r = test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK)
728 return r;
729 // U+FF41 -> U+FF21
730 if ((r = test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK)
731 return r;
732 // U+10428 -> U+10400
733 if ((r = test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK)
734 return r;
736 return OK;
740 * g_utf8_strdown
742 RESULT
743 test_utf8_strdown ()
745 RESULT r;
747 if ((r = test_utf8_strdown_each ("aBc", "abc")) != OK)
748 return r;
749 if ((r = test_utf8_strdown_each ("X86-64", "x86-64")) != OK)
750 return r;
751 // U+391 U+3B2 -> U+3B1 U+3B2
752 if ((r = test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK)
753 return r;
755 // U+FF41 -> U+FF41
756 if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK)
757 return r;
758 // U+FF21 -> U+FF41
759 if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK)
760 return r;
761 // U+10400 -> U+10428
762 if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK)
763 return r;
765 return OK;
769 * test initialization
772 static Test utf8_tests [] = {
773 {"g_utf16_to_utf8", test_utf16_to_utf8},
774 {"g_utf8_to_utf16", test_utf8_to_utf16},
775 {"g_utf8_seq", test_utf8_seq},
776 {"g_convert", test_convert },
777 {"g_unichar_xdigit_value", test_xdigit },
778 {"g_ucs4_to_utf16", test_ucs4_to_utf16 },
779 {"g_utf16_to_ucs4", test_utf16_to_ucs4 },
780 {"g_utf8_strlen", test_utf8_strlen },
781 {"g_utf8_get_char", test_utf8_get_char },
782 {"g_utf8_next_char", test_utf8_next_char },
783 {"g_utf8_validate", test_utf8_validate },
784 {"g_utf8_strup", test_utf8_strup},
785 {"g_utf8_strdown", test_utf8_strdown},
786 {NULL, NULL}
789 DEFINE_TEST_GROUP_INIT(utf8_tests_init, utf8_tests)