10 compare_strings_utf8_pos (const gchar
*expected
, const gchar
*actual
, glong size
)
13 for (i
= 0; i
< size
; i
++)
14 if (expected
[i
] != actual
[i
])
20 compare_strings_utf8_RESULT (const gchar
*expected
, const gchar
*actual
, glong size
)
24 ret
= compare_strings_utf8_pos (expected
, actual
, size
);
27 return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d\n", expected
, actual
, ret
);
31 gchar_to_gunichar2 (gunichar2 ret
[], const gchar
*src
)
35 for (i
= 0; src
[i
]; i
++)
41 compare_utf16_to_utf8_explicit (const gchar
*expected
, const gunichar2
*utf16
, glong len_in
, glong len_out
, glong size_spec
)
46 glong in_read
, out_read
;
51 ret
= g_utf16_to_utf8 (utf16
, size_spec
, &in_read
, &out_read
, &error
);
53 result
= FAILED ("The error is %d %s\n", (error
)->code
, (error
)->message
);
59 if (in_read
!= len_in
)
60 result
= FAILED ("Read size is incorrect: expected %d but was %d\n", len_in
, in_read
);
61 else if (out_read
!= len_out
)
62 result
= FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out
, out_read
);
64 result
= compare_strings_utf8_RESULT (expected
, ret
, len_out
);
74 compare_utf16_to_utf8 (const gchar
*expected
, const gunichar2
*utf16
, glong len_in
, glong len_out
)
78 result
= compare_utf16_to_utf8_explicit (expected
, utf16
, len_in
, len_out
, -1);
81 return compare_utf16_to_utf8_explicit (expected
, utf16
, len_in
, len_out
, len_in
);
87 const gchar
*src0
= "", *src1
= "ABCDE", *src2
= "\xE5\xB9\xB4\x27", *src3
= "\xEF\xBC\xA1", *src4
= "\xEF\xBD\x81", *src5
= "\xF0\x90\x90\x80";
88 gunichar2 str0
[] = {0}, str1
[6], str2
[] = {0x5E74, 39, 0}, str3
[] = {0xFF21, 0}, str4
[] = {0xFF41, 0}, str5
[] = {0xD801, 0xDC00, 0};
91 gchar_to_gunichar2 (str1
, src1
);
94 result
= compare_utf16_to_utf8 (src0
, str0
, 0, 0);
98 result
= compare_utf16_to_utf8 (src1
, str1
, 5, 5);
101 result
= compare_utf16_to_utf8 (src2
, str2
, 2, 4);
104 result
= compare_utf16_to_utf8 (src3
, str3
, 1, 3);
107 result
= compare_utf16_to_utf8 (src4
, str4
, 1, 3);
110 result
= compare_utf16_to_utf8 (src5
, str5
, 2, 4);
122 compare_strings_utf16_pos (const gunichar2
*expected
, const gunichar2
*actual
, glong size
)
125 for (i
= 0; i
< size
; i
++)
126 if (expected
[i
] != actual
[i
])
132 compare_strings_utf16_RESULT (const gunichar2
*expected
, const gunichar2
*actual
, glong size
)
136 ret
= compare_strings_utf16_pos (expected
, actual
, size
);
139 return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d ('%c' x '%c')\n", expected
, actual
, ret
, expected
[ret
], actual
[ret
]);
142 #if !defined(EGLIB_TESTS)
143 #define eg_utf8_to_utf16_with_nuls g_utf8_to_utf16
147 compare_utf8_to_utf16_explicit (const gunichar2
*expected
, const gchar
*utf8
, glong len_in
, glong len_out
, glong size_spec
, gboolean include_nuls
)
152 glong in_read
, out_read
;
158 ret
= eg_utf8_to_utf16_with_nuls (utf8
, size_spec
, &in_read
, &out_read
, &error
);
160 ret
= g_utf8_to_utf16 (utf8
, size_spec
, &in_read
, &out_read
, &error
);
163 result
= FAILED ("The error is %d %s\n", (error
)->code
, (error
)->message
);
164 g_error_free (error
);
169 if (in_read
!= len_in
)
170 result
= FAILED ("Read size is incorrect: expected %d but was %d\n", len_in
, in_read
);
171 else if (out_read
!= len_out
)
172 result
= FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out
, out_read
);
174 result
= compare_strings_utf16_RESULT (expected
, ret
, len_out
);
184 compare_utf8_to_utf16_general (const gunichar2
*expected
, const gchar
*utf8
, glong len_in
, glong len_out
, gboolean include_nuls
)
188 result
= compare_utf8_to_utf16_explicit (expected
, utf8
, len_in
, len_out
, -1, include_nuls
);
191 return compare_utf8_to_utf16_explicit (expected
, utf8
, len_in
, len_out
, len_in
, include_nuls
);
195 compare_utf8_to_utf16 (const gunichar2
*expected
, const gchar
*utf8
, glong len_in
, glong len_out
)
197 return compare_utf8_to_utf16_general (expected
, utf8
, len_in
, len_out
, FALSE
);
201 compare_utf8_to_utf16_with_nuls (const gunichar2
*expected
, const gchar
*utf8
, glong len_in
, glong len_out
)
203 return compare_utf8_to_utf16_explicit (expected
, utf8
, len_in
, len_out
, len_in
, TRUE
);
210 const gchar
*src
= "\xE5\xB9\xB4\x27";
211 glong in_read
, out_read
;
212 //gunichar2 expected [6];
213 GError
*error
= NULL
;
216 //printf ("got: %s\n", src);
217 dst
= g_utf8_to_utf16 (src
, (glong
)strlen (src
), &in_read
, &out_read
, &error
);
219 return error
->message
;
223 return FAILED ("in_read is expected to be 4 but was %d\n", in_read
);
226 return FAILED ("out_read is expected to be 2 but was %d\n", out_read
);
234 test_utf8_to_utf16 ()
236 const gchar
*src0
= "", *src1
= "ABCDE", *src2
= "\xE5\xB9\xB4\x27", *src3
= "\xEF\xBC\xA1", *src4
= "\xEF\xBD\x81";
237 gunichar2 str0
[] = {0}, str1
[6], str2
[] = {0x5E74, 39, 0}, str3
[] = {0xFF21, 0}, str4
[] = {0xFF41, 0};
240 gchar_to_gunichar2 (str1
, src1
);
243 result
= compare_utf8_to_utf16 (str0
, src0
, 0, 0);
247 result
= compare_utf8_to_utf16 (str1
, src1
, 5, 5);
250 result
= compare_utf8_to_utf16 (str2
, src2
, 4, 2);
253 result
= compare_utf8_to_utf16 (str3
, src3
, 3, 1);
256 result
= compare_utf8_to_utf16 (str4
, src4
, 3, 1);
264 test_utf8_to_utf16_with_nuls ()
266 const gchar
*src0
= "", *src1
= "AB\0DE", *src2
= "\xE5\xB9\xB4\x27", *src3
= "\xEF\xBC\xA1", *src4
= "\xEF\xBD\x81";
267 gunichar2 str0
[] = {0}, str1
[] = {'A', 'B', 0, 'D', 'E', 0}, str2
[] = {0x5E74, 39, 0}, str3
[] = {0xFF21, 0}, str4
[] = {0xFF41, 0};
270 #if !defined(EGLIB_TESTS)
274 /* implicit length is forbidden */
275 if (eg_utf8_to_utf16_with_nuls (src1
, -1, NULL
, NULL
, NULL
) != NULL
)
276 return FAILED ("explicit nulls must fail with -1 length\n");
279 result
= compare_utf8_to_utf16_with_nuls (str0
, src0
, 0, 0);
283 result
= compare_utf8_to_utf16_with_nuls (str1
, src1
, 5, 5);
286 result
= compare_utf8_to_utf16_with_nuls (str2
, src2
, 4, 2);
289 result
= compare_utf8_to_utf16_with_nuls (str3
, src3
, 3, 1);
292 result
= compare_utf8_to_utf16_with_nuls (str4
, src4
, 3, 1);
307 static const char *charsets
[] = { "UTF-8", "UTF-16LE", "UTF-16BE", "UTF-32LE", "UTF-32BE" };
308 gsize length
, converted_length
, n
;
309 char *content
, *converted
, *path
;
310 convert_result_t
**expected
;
317 if (!(srcdir
= getenv ("srcdir")) && !(srcdir
= getenv ("PWD")))
318 return FAILED ("srcdir not defined!");
320 expected
= g_malloc (sizeof (convert_result_t
*) * G_N_ELEMENTS (charsets
));
322 /* first load all our test samples... */
323 for (i
= 0; i
< G_N_ELEMENTS (charsets
); i
++) {
324 path
= g_strdup_printf ("%s%c%s.txt", srcdir
, G_DIR_SEPARATOR
, charsets
[i
]);
325 loaded
= g_file_get_contents (path
, &content
, &length
, &err
);
329 for (j
= 0; j
< i
; j
++) {
330 g_free (expected
[j
]->content
);
331 g_free (expected
[j
]);
336 return FAILED ("Failed to load content for %s: %s", charsets
[i
], err
->message
);
339 expected
[i
] = g_malloc (sizeof (convert_result_t
));
340 expected
[i
]->content
= content
;
341 expected
[i
]->length
= length
;
344 /* test conversion from every charset to every other charset */
345 for (i
= 0; i
< G_N_ELEMENTS (charsets
); i
++) {
346 for (j
= 0; j
< G_N_ELEMENTS (charsets
); j
++) {
347 converted
= g_convert (expected
[i
]->content
, expected
[i
]->length
, charsets
[j
],
348 charsets
[i
], NULL
, &converted_length
, NULL
);
350 if (converted
== NULL
) {
351 for (k
= 0; k
< G_N_ELEMENTS (charsets
); k
++) {
352 g_free (expected
[k
]->content
);
353 g_free (expected
[k
]);
358 return FAILED ("Failed to convert from %s to %s: NULL", charsets
[i
], charsets
[j
]);
361 if (converted_length
!= expected
[j
]->length
) {
362 length
= expected
[j
]->length
;
364 for (k
= 0; k
< G_N_ELEMENTS (charsets
); k
++) {
365 g_free (expected
[k
]->content
);
366 g_free (expected
[k
]);
372 return FAILED ("Failed to convert from %s to %s: expected %u bytes, got %u",
373 charsets
[i
], charsets
[j
], length
, converted_length
);
376 for (n
= 0; n
< converted_length
; n
++) {
377 if (converted
[n
] != expected
[j
]->content
[n
]) {
378 c
= expected
[j
]->content
[n
];
380 for (k
= 0; k
< G_N_ELEMENTS (charsets
); k
++) {
381 g_free (expected
[k
]->content
);
382 g_free (expected
[k
]);
388 return FAILED ("Failed to convert from %s to %s: expected 0x%x at offset %u, got 0x%x",
389 charsets
[i
], charsets
[j
], c
, n
, converted
[n
]);
397 for (k
= 0; k
< G_N_ELEMENTS (charsets
); k
++) {
398 g_free (expected
[k
]->content
);
399 g_free (expected
[k
]);
411 static char test_chars
[] = {
412 '0', '1', '2', '3', '4',
413 '5', '6', '7', '8', '9',
414 'a', 'b', 'c', 'd', 'e', 'f', 'g',
415 'A', 'B', 'C', 'D', 'E', 'F', 'G'};
416 static gint32 test_values
[] = {
419 10, 11, 12, 13, 14, 15, -1,
420 10, 11, 12, 13, 14, 15, -1};
424 for (i
= 0; i
< sizeof(test_chars
); i
++)
425 if (g_unichar_xdigit_value ((gunichar
)test_chars
[i
]) != test_values
[i
])
426 return FAILED("Incorrect value %d at index %d", test_values
[i
], i
);
432 ucs4_to_utf16_check_result (const gunichar2
*result_str
, const gunichar2
*expected_str
,
433 glong result_items_read
, glong expected_items_read
,
434 glong result_items_written
, glong expected_items_written
,
435 GError
* result_error
, gboolean expect_error
)
438 if (result_items_read
!= expected_items_read
)
439 return FAILED("Incorrect number of items read; expected %d, got %d", expected_items_read
, result_items_read
);
440 if (result_items_written
!= expected_items_written
)
441 return FAILED("Incorrect number of items written; expected %d, got %d", expected_items_written
, result_items_written
);
442 if (result_error
&& !expect_error
)
443 return FAILED("There should not be an error code.");
444 if (!result_error
&& expect_error
)
445 return FAILED("Unexpected error object.");
446 if (expect_error
&& result_str
)
447 return FAILED("NULL should be returned when an error occurs.");
448 if (!expect_error
&& !result_str
)
449 return FAILED("When no error occurs NULL should not be returned.");
450 for (i
=0; i
<expected_items_written
;i
++) {
451 if (result_str
[i
] != expected_str
[i
])
452 return FAILED("Incorrect value %d at index %d", result_str
[i
], i
);
454 if (result_str
&& result_str
[expected_items_written
] != '\0')
455 return FAILED("Null termination not found at the end of the string.");
461 test_ucs4_to_utf16 ()
463 static gunichar str1
[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
464 static gunichar2 exp1
[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
465 static gunichar str2
[3] = {'h',0x80000000,'\0'};
466 static gunichar2 exp2
[2] = {'h','\0'};
467 static gunichar str3
[3] = {'h',0xDA00,'\0'};
468 static gunichar str4
[3] = {'h',0x10FFFF,'\0'};
469 static gunichar2 exp4
[4] = {'h',0xdbff,0xdfff,'\0'};
470 static gunichar str5
[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'};
471 static gunichar2 exp5
[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'};
472 static gunichar str6
[2] = {0x10400, '\0'};
473 static gunichar2 exp6
[3] = {0xD801, 0xDC00, '\0'};
474 static glong read_write
[12] = {1,1,0,0,0,0,1,1,0,0,1,2};
476 glong items_read
, items_written
, current_write_index
;
481 res
= g_ucs4_to_utf16 (str1
, 12, &items_read
, &items_written
, &err
);
482 check_result
= ucs4_to_utf16_check_result (res
, exp1
, items_read
, 11, items_written
, 11, err
, FALSE
);
483 if (check_result
) return check_result
;
486 items_read
= items_written
= 0;
487 res
= g_ucs4_to_utf16 (str2
, 0, &items_read
, &items_written
, &err
);
488 check_result
= ucs4_to_utf16_check_result (res
, exp2
, items_read
, 0, items_written
, 0, err
, FALSE
);
489 if (check_result
) return check_result
;
492 items_read
= items_written
= 0;
493 res
= g_ucs4_to_utf16 (str2
, 1, &items_read
, &items_written
, &err
);
494 check_result
= ucs4_to_utf16_check_result (res
, exp2
, items_read
, 1, items_written
, 1, err
, FALSE
);
495 if (check_result
) return check_result
;
498 items_read
= items_written
= 0;
499 res
= g_ucs4_to_utf16 (str2
, 2, &items_read
, &items_written
, &err
);
500 check_result
= ucs4_to_utf16_check_result (res
, 0, items_read
, 1, items_written
, 0, err
, TRUE
);
502 if (check_result
) return check_result
;
504 items_read
= items_written
= 0;
506 res
= g_ucs4_to_utf16 (str3
, 2, &items_read
, &items_written
, &err
);
507 check_result
= ucs4_to_utf16_check_result (res
, 0, items_read
, 1, items_written
, 0, err
, TRUE
);
508 if (check_result
) return check_result
;
511 items_read
= items_written
= 0;
513 res
= g_ucs4_to_utf16 (str4
, 5, &items_read
, &items_written
, &err
);
514 check_result
= ucs4_to_utf16_check_result (res
, exp4
, items_read
, 2, items_written
, 3, err
, FALSE
);
515 if (check_result
) return check_result
;
518 // This loop tests the bounds of the conversion algorithm
519 current_write_index
= 0;
521 items_read
= items_written
= 0;
523 res
= g_ucs4_to_utf16 (&str5
[i
], 1, &items_read
, &items_written
, &err
);
524 check_result
= ucs4_to_utf16_check_result (res
, &exp5
[current_write_index
],
525 items_read
, read_write
[i
*2], items_written
, read_write
[(i
*2)+1], err
, !read_write
[(i
*2)+1]);
526 if (check_result
) return check_result
;
528 current_write_index
+= items_written
;
531 items_read
= items_written
= 0;
533 res
= g_ucs4_to_utf16 (str6
, 1, &items_read
, &items_written
, &err
);
534 check_result
= ucs4_to_utf16_check_result (res
, exp6
, items_read
, 1, items_written
, 2, err
, FALSE
);
535 if (check_result
) return check_result
;
542 utf16_to_ucs4_check_result (const gunichar
*result_str
, const gunichar
*expected_str
,
543 glong result_items_read
, glong expected_items_read
,
544 glong result_items_written
, glong expected_items_written
,
545 GError
* result_error
, gboolean expect_error
)
548 if (result_items_read
!= expected_items_read
)
549 return FAILED("Incorrect number of items read; expected %d, got %d", expected_items_read
, result_items_read
);
550 if (result_items_written
!= expected_items_written
)
551 return FAILED("Incorrect number of items written; expected %d, got %d", expected_items_written
, result_items_written
);
552 if (result_error
&& !expect_error
)
553 return FAILED("There should not be an error code.");
554 if (!result_error
&& expect_error
)
555 return FAILED("Unexpected error object.");
556 if (expect_error
&& result_str
)
557 return FAILED("NULL should be returned when an error occurs.");
558 if (!expect_error
&& !result_str
)
559 return FAILED("When no error occurs NULL should not be returned.");
560 for (i
=0; i
<expected_items_written
;i
++) {
561 if (result_str
[i
] != expected_str
[i
])
562 return FAILED("Incorrect value %d at index %d", result_str
[i
], i
);
564 if (result_str
&& result_str
[expected_items_written
] != '\0')
565 return FAILED("Null termination not found at the end of the string.");
571 test_utf16_to_ucs4 ()
573 static gunichar2 str1
[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
574 static gunichar exp1
[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
575 static gunichar2 str2
[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'};
576 static gunichar exp2
[3] = {'H',0x00010001,'\0'};
577 static gunichar2 str3
[4] = {'H', 0xDC00 ,'l','\0'};
578 static gunichar exp3
[2] = {'H','\0'};
579 static gunichar2 str4
[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
580 0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
581 static gunichar exp4
[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
582 static gunichar2 str5
[3] = {0xD801, 0xDC00, 0};
583 static gunichar exp5
[2] = {0x10400, 0};
584 static glong read_write
[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
586 glong items_read
, items_written
, current_read_index
,current_write_index
;
591 res
= g_utf16_to_ucs4 (str1
, 12, &items_read
, &items_written
, &err
);
592 check_result
= utf16_to_ucs4_check_result (res
, exp1
, items_read
, 11, items_written
, 11, err
, FALSE
);
593 if (check_result
) return check_result
;
596 items_read
= items_written
= 0;
597 res
= g_utf16_to_ucs4 (str2
, 0, &items_read
, &items_written
, &err
);
598 check_result
= utf16_to_ucs4_check_result (res
, exp2
, items_read
, 0, items_written
, 0, err
, FALSE
);
599 if (check_result
) return check_result
;
602 items_read
= items_written
= 0;
603 res
= g_utf16_to_ucs4 (str2
, 1, &items_read
, &items_written
, &err
);
604 check_result
= utf16_to_ucs4_check_result (res
, exp2
, items_read
, 1, items_written
, 1, err
, FALSE
);
605 if (check_result
) return check_result
;
608 items_read
= items_written
= 0;
609 res
= g_utf16_to_ucs4 (str2
, 2, &items_read
, &items_written
, &err
);
610 check_result
= utf16_to_ucs4_check_result (res
, exp2
, items_read
, 1, items_written
, 1, err
, FALSE
);
611 if (check_result
) return check_result
;
614 items_read
= items_written
= 0;
615 res
= g_utf16_to_ucs4 (str2
, 3, &items_read
, &items_written
, &err
);
616 check_result
= utf16_to_ucs4_check_result (res
, exp2
, items_read
, 3, items_written
, 2, err
, FALSE
);
617 if (check_result
) return check_result
;
620 items_read
= items_written
= 0;
621 res
= g_utf16_to_ucs4 (str2
, 4, &items_read
, &items_written
, &err
);
622 check_result
= utf16_to_ucs4_check_result (res
, exp2
, items_read
, 3, items_written
, 2, err
, FALSE
);
623 if (check_result
) return check_result
;
626 items_read
= items_written
= 0;
627 res
= g_utf16_to_ucs4 (str2
, 5, &items_read
, &items_written
, &err
);
628 check_result
= utf16_to_ucs4_check_result (res
, exp2
, items_read
, 4, items_written
, 0, err
, TRUE
);
629 if (check_result
) return check_result
;
632 items_read
= items_written
= 0;
634 res
= g_utf16_to_ucs4 (str3
, 5, &items_read
, &items_written
, &err
);
635 check_result
= utf16_to_ucs4_check_result (res
, exp3
, items_read
, 1, items_written
, 0, err
, TRUE
);
636 if (check_result
) return check_result
;
639 // This loop tests the bounds of the conversion algorithm
640 current_read_index
= current_write_index
= 0;
642 items_read
= items_written
= 0;
644 res
= g_utf16_to_ucs4 (&str4
[current_read_index
], read_write
[i
*3], &items_read
, &items_written
, &err
);
645 check_result
= utf16_to_ucs4_check_result (res
, &exp4
[current_write_index
], items_read
,
646 read_write
[(i
*3)+1], items_written
, read_write
[(i
*3)+2], err
,
647 !read_write
[(i
*3)+2]);
648 if (check_result
) return check_result
;
650 current_read_index
+= read_write
[i
*3];
651 current_write_index
+= items_written
;
654 items_read
= items_written
= 0;
656 res
= g_utf16_to_ucs4 (str5
, 2, &items_read
, &items_written
, &err
);
657 check_result
= utf16_to_ucs4_check_result (res
, exp5
, items_read
, 2, items_written
, 1, err
, FALSE
);
658 if (check_result
) return check_result
;
666 gchar word1
[] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'};//Valid, len = 5
667 gchar word2
[] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'};//Valid, len = 5
668 gchar word3
[] = {'h','e',0xC2, 0x82,0x45,'\0'}; //Valid, len = 4
669 gchar word4
[] = {0x62,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
674 len
= g_utf8_strlen (word1
,-1);
676 return FAILED ("Word1 expected length of 5, but was %i", len
);
677 //Do tests with different values for max parameter.
678 len
= g_utf8_strlen (word1
,1);
680 return FAILED ("Word1, max = 1, expected length of 0, but was %i", len
);
681 len
= g_utf8_strlen (word1
,2);
683 return FAILED ("Word1, max = 1, expected length of 1, but was %i", len
);
684 len
= g_utf8_strlen (word1
,3);
686 return FAILED ("Word1, max = 2, expected length of 2, but was %i", len
);
689 len
= g_utf8_strlen (word2
,-1);
691 return FAILED ("Word2 expected length of 5, but was %i", len
);
694 len
= g_utf8_strlen (word3
,-1);
696 return FAILED ("Word3 expected length of 4, but was %i", len
);
699 len
= g_utf8_strlen (word4
,-1);
701 return FAILED ("Word4 expected length of 5, but was %i", len
);
704 len
= g_utf8_strlen(NULL
,0);
706 return FAILED ("Expected passing null to result in a length of 0");
713 gchar word1
[] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
715 gunichar value
= g_utf8_get_char (&word1
[0]);
717 return FAILED ("Expected value of 0x82, but was %x", value
);
718 value
= g_utf8_get_char (&word1
[2]);
720 return FAILED ("Expected value of 0x45, but was %x", value
);
721 value
= g_utf8_get_char (&word1
[3]);
722 if (value
!= 0x1043UL
)
723 return FAILED ("Expected value of 0x1043, but was %x", value
);
724 value
= g_utf8_get_char (&word1
[6]);
726 return FAILED ("Expected value of 0x58, but was %x", value
);
727 value
= g_utf8_get_char (&word1
[7]);
728 if (value
!= 0x42082UL
)
729 return FAILED ("Expected value of 0x42082, but was %x", value
);
735 test_utf8_next_char()
737 gchar word1
[] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
738 gchar word2
[] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
739 gchar word1ExpectedValues
[] = {0xC2, 0x45,0xE1, 0x58, 0xF1};
740 gchar word2ExpectedValues
[] = {0xF1, 0xC2, 0x45, 0xE1, 0x58};
747 return FAILED ("Word1 has gone past its expected length");
748 if (*ptr
!= word1ExpectedValues
[count
])
749 return FAILED ("Word1 has an incorrect next_char at index %i", count
);
750 ptr
= g_utf8_next_char (ptr
);
759 return FAILED ("Word2 has gone past its expected length");
760 if (*ptr
!= word2ExpectedValues
[count
])
761 return FAILED ("Word2 has an incorrect next_char at index %i", count
);
762 ptr
= g_utf8_next_char (ptr
);
772 gchar invalidWord1
[] = {0xC3, 0x82, 0xC1,0x90,'\0'}; //Invalid, 1nd oct Can't be 0xC0 or 0xC1
773 gchar invalidWord2
[] = {0xC1, 0x89, 0x60, '\0'}; //Invalid, 1st oct can not be 0xC1
774 gchar invalidWord3
[] = {0xC2, 0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Invalid, oct after 0xC2 must be > 0x80
776 gchar validWord1
[] = {0xC2, 0x82, 0xC3,0xA0,'\0'}; //Valid
777 gchar validWord2
[] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid
780 gboolean retVal
= g_utf8_validate (invalidWord1
, -1, &end
);
782 return FAILED ("Expected invalidWord1 to be invalid");
783 if (end
!= &invalidWord1
[2])
784 return FAILED ("Expected end parameter to be pointing to invalidWord1[2]");
787 retVal
= g_utf8_validate (invalidWord2
, -1, &end
);
789 return FAILED ("Expected invalidWord2 to be invalid");
790 if (end
!= &invalidWord2
[0])
791 return FAILED ("Expected end parameter to be pointing to invalidWord2[0]");
794 retVal
= g_utf8_validate (invalidWord3
, -1, &end
);
796 return FAILED ("Expected invalidWord3 to be invalid");
797 if (end
!= &invalidWord3
[0])
798 return FAILED ("Expected end parameter to be pointing to invalidWord3[1]");
801 retVal
= g_utf8_validate (validWord1
, -1, &end
);
803 return FAILED ("Expected validWord1 to be valid");
804 if (end
!= &validWord1
[4])
805 return FAILED ("Expected end parameter to be pointing to validWord1[4]");
808 retVal
= g_utf8_validate (validWord2
, -1, &end
);
810 return FAILED ("Expected validWord2 to be valid");
811 if (end
!= &validWord2
[11])
812 return FAILED ("Expected end parameter to be pointing to validWord2[11]");
817 utf8_byteslen (const gchar
*src
)
828 test_utf8_strcase_each (const gchar
*src
, const gchar
*expected
, gboolean strup
)
834 len
= utf8_byteslen (src
);
835 tmp
= strup
? g_utf8_strup (src
, len
) : g_utf8_strdown (src
, len
);
836 len2
= utf8_byteslen (tmp
);
837 r
= compare_strings_utf8_RESULT (expected
, tmp
, len
< len2
? len2
: len
);
843 test_utf8_strup_each (const gchar
*src
, const gchar
*expected
)
845 return test_utf8_strcase_each (src
, expected
, TRUE
);
849 test_utf8_strdown_each (const gchar
*src
, const gchar
*expected
)
851 return test_utf8_strcase_each (src
, expected
, FALSE
);
862 if ((r
= test_utf8_strup_each ("aBc", "ABC")) != OK
)
864 if ((r
= test_utf8_strup_each ("x86-64", "X86-64")) != OK
)
866 // U+3B1 U+392 -> U+391 U+392
867 if ((r
= test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK
)
870 if ((r
= test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK
)
873 if ((r
= test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK
)
875 // U+10428 -> U+10400
876 if ((r
= test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK
)
890 if ((r
= test_utf8_strdown_each ("aBc", "abc")) != OK
)
892 if ((r
= test_utf8_strdown_each ("X86-64", "x86-64")) != OK
)
894 // U+391 U+3B2 -> U+3B1 U+3B2
895 if ((r
= test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK
)
899 if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK)
902 if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK)
904 // U+10400 -> U+10428
905 if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK)
912 * test initialization
915 static Test utf8_tests
[] = {
916 {"g_utf16_to_utf8", test_utf16_to_utf8
},
917 {"g_utf8_to_utf16", test_utf8_to_utf16
},
918 {"g_utf8_to_utf16_with_nuls", test_utf8_to_utf16_with_nuls
},
919 {"g_utf8_seq", test_utf8_seq
},
920 {"g_convert", test_convert
},
921 {"g_unichar_xdigit_value", test_xdigit
},
922 {"g_ucs4_to_utf16", test_ucs4_to_utf16
},
923 {"g_utf16_to_ucs4", test_utf16_to_ucs4
},
924 {"g_utf8_strlen", test_utf8_strlen
},
925 {"g_utf8_get_char", test_utf8_get_char
},
926 {"g_utf8_next_char", test_utf8_next_char
},
927 {"g_utf8_validate", test_utf8_validate
},
928 {"g_utf8_strup", test_utf8_strup
},
929 {"g_utf8_strdown", test_utf8_strdown
},
933 DEFINE_TEST_GROUP_INIT(utf8_tests_init
, utf8_tests
)