eglib/test/utf8.c

   1 #include <stdlib.h>
   2
   3 #include "test.h"
   4
   5 /*
   6  * g_utf16_to_utf8
   7  */
   8
   9 glong
  10 compare_strings_utf8_pos (const gchar *expected, const gchar *actual, glong size)
  11 {
  12         int i;
  13         for (i = 0; i < size; i++)
  14                 if (expected [i] != actual [i])
  15                         return i;
  16         return -1;
  17 }
  18
  19 RESULT
  20 compare_strings_utf8_RESULT (const gchar *expected, const gchar *actual, glong size)
  21 {
  22         glong ret;
  23
  24         ret = compare_strings_utf8_pos (expected, actual, size);
  25         if (ret < 0)
  26                 return OK;
  27         return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d\n", expected, actual, ret);
  28 }
  29
  30 void
  31 gchar_to_gunichar2 (gunichar2 ret[], const gchar *src)
  32 {
  33         int i;
  34
  35         for (i = 0; src [i]; i++)
  36                 ret [i] = src [i];
  37         ret [i] = 0;
  38 }
  39
  40 RESULT
  41 compare_utf16_to_utf8_explicit (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out, glong size_spec)
  42 {
  43         GError *error;
  44         gchar* ret;
  45         RESULT result;
  46         glong in_read, out_read;
  47
  48         result = NULL;
  49
  50         error = NULL;
  51         ret = g_utf16_to_utf8 (utf16, size_spec, &in_read, &out_read, &error);
  52         if (error) {
  53                 result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
  54                 g_error_free (error);
  55                 if (ret)
  56                         g_free (ret);
  57                 return result;
  58         }
  59         if (in_read != len_in)
  60                 result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
  61         else if (out_read != len_out)
  62                 result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
  63         else
  64                 result = compare_strings_utf8_RESULT (expected, ret, len_out);
  65
  66         g_free (ret);
  67         if (result)
  68                 return result;
  69
  70         return OK;
  71 }
  72
  73 RESULT
  74 compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out)
  75 {
  76         RESULT result;
  77
  78         result = compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, -1);
  79         if (result != OK)
  80                 return result;
  81         return compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, len_in);
  82 }
  83
  84 RESULT
  85 test_utf16_to_utf8 ()
  86 {
  87         const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81", *src5 = "\xF0\x90\x90\x80";
  88         gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}, str5 [] = {0xD801, 0xDC00, 0};
  89         RESULT result;
  90
  91         gchar_to_gunichar2 (str1, src1);
  92
  93         /* empty string */
  94         result = compare_utf16_to_utf8 (src0, str0, 0, 0);
  95         if (result != OK)
  96                 return result;
  97
  98         result = compare_utf16_to_utf8 (src1, str1, 5, 5);
  99         if (result != OK)
 100                 return result;
 101         result = compare_utf16_to_utf8 (src2, str2, 2, 4);
 102         if (result != OK)
 103                 return result;
 104         result = compare_utf16_to_utf8 (src3, str3, 1, 3);
 105         if (result != OK)
 106                 return result;
 107         result = compare_utf16_to_utf8 (src4, str4, 1, 3);
 108         if (result != OK)
 109                 return result;
 110         result = compare_utf16_to_utf8 (src5, str5, 2, 4);
 111         if (result != OK)
 112                 return result;
 113
 114         return OK;
 115 }
 116
 117 /*
 118  * g_utf8_to_utf16
 119  */
 120
 121 glong
 122 compare_strings_utf16_pos (const gunichar2 *expected, const gunichar2 *actual, glong size)
 123 {
 124         int i;
 125         for (i = 0; i < size; i++)
 126                 if (expected [i] != actual [i])
 127                         return i;
 128         return -1;
 129 }
 130
 131 RESULT
 132 compare_strings_utf16_RESULT (const gunichar2 *expected, const gunichar2 *actual, glong size)
 133 {
 134         glong ret;
 135
 136         ret = compare_strings_utf16_pos (expected, actual, size);
 137         if (ret < 0)
 138                 return OK;
 139         return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d ('%c' x '%c')\n", expected, actual, ret, expected [ret], actual [ret]);
 140 }
 141
 142 #if !defined(EGLIB_TESTS)
 143 #define eg_utf8_to_utf16_with_nuls g_utf8_to_utf16
 144 #endif
 145
 146 RESULT
 147 compare_utf8_to_utf16_explicit (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, glong size_spec, gboolean include_nuls)
 148 {
 149         GError *error;
 150         gunichar2* ret;
 151         RESULT result;
 152         glong in_read, out_read;
 153
 154         result = NULL;
 155
 156         error = NULL;
 157         if (include_nuls)
 158                 ret = eg_utf8_to_utf16_with_nuls (utf8, size_spec, &in_read, &out_read, &error);
 159         else
 160                 ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &error);
 161
 162         if (error) {
 163                 result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
 164                 g_error_free (error);
 165                 if (ret)
 166                         g_free (ret);
 167                 return result;
 168         }
 169         if (in_read != len_in)
 170                 result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
 171         else if (out_read != len_out)
 172                 result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
 173         else
 174                 result = compare_strings_utf16_RESULT (expected, ret, len_out);
 175
 176         g_free (ret);
 177         if (result)
 178                 return result;
 179
 180         return OK;
 181 }
 182
 183 RESULT
 184 compare_utf8_to_utf16_general (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, gboolean include_nuls)
 185 {
 186         RESULT result;
 187
 188         result = compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, -1, include_nuls);
 189         if (result != OK)
 190                 return result;
 191         return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in, include_nuls);
 192 }
 193
 194 RESULT
 195 compare_utf8_to_utf16 (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out)
 196 {
 197         return compare_utf8_to_utf16_general (expected, utf8, len_in, len_out, FALSE);
 198 }
 199
 200 RESULT
 201 compare_utf8_to_utf16_with_nuls (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out)
 202 {
 203         return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in, TRUE);
 204 }
 205
 206
 207 RESULT
 208 test_utf8_seq ()
 209 {
 210         const gchar *src = "\xE5\xB9\xB4\x27";
 211         glong in_read, out_read;
 212         //gunichar2 expected [6];
 213         GError *error = NULL;
 214         gunichar2 *dst;
 215
 216         //printf ("got: %s\n", src);
 217         dst = g_utf8_to_utf16 (src, (glong)strlen (src), &in_read, &out_read, &error);
 218         if (error != NULL){
 219                 return error->message;
 220         }
 221
 222         if (in_read != 4) {
 223                 return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
 224         }
 225         if (out_read != 2) {
 226                 return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
 227         }
 228         g_free (dst);
 229
 230         return OK;
 231 }
 232
 233 RESULT
 234 test_utf8_to_utf16 ()
 235 {
 236         const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81";
 237         gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0};
 238         RESULT result;
 239
 240         gchar_to_gunichar2 (str1, src1);
 241
 242         /* empty string */
 243         result = compare_utf8_to_utf16 (str0, src0, 0, 0);
 244         if (result != OK)
 245                 return result;
 246
 247         result = compare_utf8_to_utf16 (str1, src1, 5, 5);
 248         if (result != OK)
 249                 return result;
 250         result = compare_utf8_to_utf16 (str2, src2, 4, 2);
 251         if (result != OK)
 252                 return result;
 253         result = compare_utf8_to_utf16 (str3, src3, 3, 1);
 254         if (result != OK)
 255                 return result;
 256         result = compare_utf8_to_utf16 (str4, src4, 3, 1);
 257         if (result != OK)
 258                 return result;
 259
 260         return OK;
 261 }
 262
 263 RESULT
 264 test_utf8_to_utf16_with_nuls ()
 265 {
 266         const gchar *src0 = "", *src1 = "AB\0DE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81";
 267         gunichar2 str0 [] = {0}, str1 [] = {'A', 'B', 0, 'D', 'E', 0}, str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0};
 268         RESULT result;
 269
 270 #if !defined(EGLIB_TESTS)
 271         return OK;
 272 #endif
 273
 274         /* implicit length is forbidden */
 275                 if (eg_utf8_to_utf16_with_nuls (src1, -1, NULL, NULL, NULL) != NULL)
 276                 return FAILED ("explicit nulls must fail with -1 length\n");
 277
 278         /* empty string */
 279         result = compare_utf8_to_utf16_with_nuls (str0, src0, 0, 0);
 280         if (result != OK)
 281                 return result;
 282
 283         result = compare_utf8_to_utf16_with_nuls  (str1, src1, 5, 5);
 284         if (result != OK)
 285                 return result;
 286         result = compare_utf8_to_utf16_with_nuls  (str2, src2, 4, 2);
 287         if (result != OK)
 288                 return result;
 289         result = compare_utf8_to_utf16_with_nuls  (str3, src3, 3, 1);
 290         if (result != OK)
 291                 return result;
 292         result = compare_utf8_to_utf16_with_nuls  (str4, src4, 3, 1);
 293         if (result != OK)
 294                 return result;
 295
 296         return OK;
 297 }
 298
 299 typedef struct {
 300         char *content;
 301         size_t length;
 302 } convert_result_t;
 303
 304 RESULT
 305 test_convert ()
 306 {
 307         static const char *charsets[] = { "UTF-8", "UTF-16LE", "UTF-16BE", "UTF-32LE", "UTF-32BE" };
 308         gsize length, converted_length, n;
 309         char *content, *converted, *path;
 310         convert_result_t **expected;
 311         GError *err = NULL;
 312         const char *srcdir;
 313         gboolean loaded;
 314         guint i, j, k;
 315         char c;
 316
 317         if (!(srcdir = getenv ("srcdir")) && !(srcdir = getenv ("PWD")))
 318                 return FAILED ("srcdir not defined!");
 319
 320         expected = g_malloc (sizeof (convert_result_t *) * G_N_ELEMENTS (charsets));
 321
 322         /* first load all our test samples... */
 323         for (i = 0; i < G_N_ELEMENTS (charsets); i++) {
 324                 path = g_strdup_printf ("%s%c%s.txt", srcdir, G_DIR_SEPARATOR, charsets[i]);
 325                 loaded = g_file_get_contents (path, &content, &length, &err);
 326                 g_free (path);
 327
 328                 if (!loaded) {
 329                         for (j = 0; j < i; j++) {
 330                                 g_free (expected[j]->content);
 331                                 g_free (expected[j]);
 332                         }
 333
 334                         g_free (expected);
 335
 336                         return FAILED ("Failed to load content for %s: %s", charsets[i], err->message);
 337                 }
 338
 339                 expected[i] = g_malloc (sizeof (convert_result_t));
 340                 expected[i]->content = content;
 341                 expected[i]->length = length;
 342         }
 343
 344         /* test conversion from every charset to every other charset */
 345         for (i = 0; i < G_N_ELEMENTS (charsets); i++) {
 346                 for (j = 0; j < G_N_ELEMENTS (charsets); j++) {
 347                         converted = g_convert (expected[i]->content, expected[i]->length, charsets[j],
 348                                                charsets[i], NULL, &converted_length, NULL);
 349
 350                         if (converted == NULL) {
 351                                 for (k = 0; k < G_N_ELEMENTS (charsets); k++) {
 352                                         g_free (expected[k]->content);
 353                                         g_free (expected[k]);
 354                                 }
 355
 356                                 g_free (expected);
 357
 358                                 return FAILED ("Failed to convert from %s to %s: NULL", charsets[i], charsets[j]);
 359                         }
 360
 361                         if (converted_length != expected[j]->length) {
 362                                 length = expected[j]->length;
 363
 364                                 for (k = 0; k < G_N_ELEMENTS (charsets); k++) {
 365                                         g_free (expected[k]->content);
 366                                         g_free (expected[k]);
 367                                 }
 368
 369                                 g_free (converted);
 370                                 g_free (expected);
 371
 372                                 return FAILED ("Failed to convert from %s to %s: expected %u bytes, got %u",
 373                                                charsets[i], charsets[j], length, converted_length);
 374                         }
 375
 376                         for (n = 0; n < converted_length; n++) {
 377                                 if (converted[n] != expected[j]->content[n]) {
 378                                         c = expected[j]->content[n];
 379
 380                                         for (k = 0; k < G_N_ELEMENTS (charsets); k++) {
 381                                                 g_free (expected[k]->content);
 382                                                 g_free (expected[k]);
 383                                         }
 384
 385                                         g_free (converted);
 386                                         g_free (expected);
 387
 388                                         return FAILED ("Failed to convert from %s to %s: expected 0x%x at offset %u, got 0x%x",
 389                                                        charsets[i], charsets[j], c, n, converted[n]);
 390                                 }
 391                         }
 392
 393                         g_free (converted);
 394                 }
 395         }
 396
 397         for (k = 0; k < G_N_ELEMENTS (charsets); k++) {
 398                 g_free (expected[k]->content);
 399                 g_free (expected[k]);
 400         }
 401
 402         g_free (expected);
 403
 404         return OK;
 405 }
 406
 407
 408 RESULT
 409 test_xdigit ()
 410 {
 411         static char test_chars[] = {
 412                 '0', '1', '2', '3', '4',
 413                 '5', '6', '7', '8', '9',
 414                 'a', 'b', 'c', 'd', 'e', 'f', 'g',
 415                 'A', 'B', 'C', 'D', 'E', 'F', 'G'};
 416         static gint32 test_values[] = {
 417                 0, 1, 2, 3, 4,
 418                 5, 6, 7, 8, 9,
 419                 10, 11, 12, 13, 14, 15, -1,
 420                 10, 11, 12, 13, 14, 15, -1};
 421
 422                 int i =0;
 423
 424                 for (i = 0; i < sizeof(test_chars); i++)
 425                         if (g_unichar_xdigit_value ((gunichar)test_chars[i]) != test_values[i])
 426                                 return FAILED("Incorrect value %d at index %d", test_values[i], i);
 427
 428                 return OK;
 429 }
 430
 431 static RESULT
 432 ucs4_to_utf16_check_result (const gunichar2 *result_str, const gunichar2 *expected_str,
 433                             glong result_items_read, glong expected_items_read,
 434                             glong result_items_written, glong expected_items_written,
 435                             GError* result_error, gboolean expect_error)
 436 {
 437         glong i;
 438         if (result_items_read != expected_items_read)
 439                 return FAILED("Incorrect number of items read; expected %d, got %d", expected_items_read, result_items_read);
 440         if (result_items_written != expected_items_written)
 441                 return FAILED("Incorrect number of items written; expected %d, got %d", expected_items_written, result_items_written);
 442         if (result_error && !expect_error)
 443                 return FAILED("There should not be an error code.");
 444         if (!result_error && expect_error)
 445                 return FAILED("Unexpected error object.");
 446         if (expect_error && result_str)
 447                 return FAILED("NULL should be returned when an error occurs.");
 448         if (!expect_error && !result_str)
 449                 return FAILED("When no error occurs NULL should not be returned.");
 450         for (i=0; i<expected_items_written;i++) {
 451                 if (result_str [i] != expected_str [i])
 452                         return FAILED("Incorrect value %d at index %d", result_str [i], i);
 453         }
 454         if (result_str && result_str[expected_items_written] != '\0')
 455                 return FAILED("Null termination not found at the end of the string.");
 456
 457         return OK;
 458 }
 459
 460 RESULT
 461 test_ucs4_to_utf16 ()
 462 {
 463         static gunichar str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
 464         static gunichar2 exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
 465         static gunichar str2[3] = {'h',0x80000000,'\0'};
 466         static gunichar2 exp2[2] = {'h','\0'};
 467         static gunichar str3[3] = {'h',0xDA00,'\0'};
 468         static gunichar str4[3] = {'h',0x10FFFF,'\0'};
 469         static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'};
 470         static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'};
 471         static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'};
 472         static gunichar str6[2] = {0x10400, '\0'};
 473         static gunichar2 exp6[3] = {0xD801, 0xDC00, '\0'};
 474         static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2};
 475         gunichar2* res;
 476         glong items_read, items_written, current_write_index;
 477         GError* err=0;
 478         RESULT check_result;
 479         glong i;
 480
 481         res = g_ucs4_to_utf16 (str1, 12, &items_read, &items_written, &err);
 482         check_result = ucs4_to_utf16_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
 483         if (check_result) return check_result;
 484         g_free (res);
 485
 486         items_read = items_written = 0;
 487         res = g_ucs4_to_utf16 (str2, 0, &items_read, &items_written, &err);
 488         check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
 489         if (check_result) return check_result;
 490         g_free (res);
 491
 492         items_read = items_written = 0;
 493         res = g_ucs4_to_utf16 (str2, 1, &items_read, &items_written, &err);
 494         check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
 495         if (check_result) return check_result;
 496         g_free (res);
 497
 498         items_read = items_written = 0;
 499         res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err);
 500         check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
 501         g_free (res);
 502         if (check_result) return check_result;
 503
 504         items_read = items_written = 0;
 505         err = 0;
 506         res = g_ucs4_to_utf16 (str3, 2, &items_read, &items_written, &err);
 507         check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
 508         if (check_result) return check_result;
 509         g_free (res);
 510
 511         items_read = items_written = 0;
 512         err = 0;
 513         res = g_ucs4_to_utf16 (str4, 5, &items_read, &items_written, &err);
 514         check_result = ucs4_to_utf16_check_result (res, exp4, items_read, 2, items_written, 3, err, FALSE);
 515         if (check_result) return check_result;
 516         g_free (res);
 517
 518         // This loop tests the bounds of the conversion algorithm
 519         current_write_index = 0;
 520         for (i=0;i<6;i++) {
 521                 items_read = items_written = 0;
 522                 err = 0;
 523                 res = g_ucs4_to_utf16 (&str5[i], 1, &items_read, &items_written, &err);
 524                 check_result = ucs4_to_utf16_check_result (res, &exp5[current_write_index],
 525                                         items_read, read_write[i*2], items_written, read_write[(i*2)+1], err, !read_write[(i*2)+1]);
 526                 if (check_result) return check_result;
 527                 g_free (res);
 528                 current_write_index += items_written;
 529         }
 530
 531         items_read = items_written = 0;
 532         err = 0;
 533         res = g_ucs4_to_utf16 (str6, 1, &items_read, &items_written, &err);
 534         check_result = ucs4_to_utf16_check_result (res, exp6, items_read, 1, items_written, 2, err, FALSE);
 535         if (check_result) return check_result;
 536         g_free (res);
 537
 538         return OK;
 539 }
 540
 541 static RESULT
 542 utf16_to_ucs4_check_result (const gunichar *result_str, const gunichar *expected_str,
 543                             glong result_items_read, glong expected_items_read,
 544                             glong result_items_written, glong expected_items_written,
 545                             GError* result_error, gboolean expect_error)
 546 {
 547         glong i;
 548         if (result_items_read != expected_items_read)
 549                 return FAILED("Incorrect number of items read; expected %d, got %d", expected_items_read, result_items_read);
 550         if (result_items_written != expected_items_written)
 551                 return FAILED("Incorrect number of items written; expected %d, got %d", expected_items_written, result_items_written);
 552         if (result_error && !expect_error)
 553                 return FAILED("There should not be an error code.");
 554         if (!result_error && expect_error)
 555                 return FAILED("Unexpected error object.");
 556         if (expect_error && result_str)
 557                 return FAILED("NULL should be returned when an error occurs.");
 558         if (!expect_error && !result_str)
 559                 return FAILED("When no error occurs NULL should not be returned.");
 560         for (i=0; i<expected_items_written;i++) {
 561                 if (result_str [i] != expected_str [i])
 562                         return FAILED("Incorrect value %d at index %d", result_str [i], i);
 563         }
 564         if (result_str && result_str[expected_items_written] != '\0')
 565                 return FAILED("Null termination not found at the end of the string.");
 566
 567         return OK;
 568 }
 569
 570 RESULT
 571 test_utf16_to_ucs4 ()
 572 {
 573         static gunichar2 str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
 574         static gunichar exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
 575         static gunichar2 str2[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'};
 576         static gunichar exp2[3] = {'H',0x00010001,'\0'};
 577         static gunichar2 str3[4] = {'H', 0xDC00 ,'l','\0'};
 578         static gunichar exp3[2] = {'H','\0'};
 579         static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
 580                                      0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
 581         static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
 582         static gunichar2 str5[3] = {0xD801, 0xDC00, 0};
 583         static gunichar exp5[2] = {0x10400, 0};
 584         static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
 585         gunichar* res;
 586         glong items_read, items_written, current_read_index,current_write_index;
 587         GError* err=0;
 588         RESULT check_result;
 589         glong i;
 590
 591         res = g_utf16_to_ucs4 (str1, 12, &items_read, &items_written, &err);
 592         check_result = utf16_to_ucs4_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
 593         if (check_result) return check_result;
 594         g_free (res);
 595
 596         items_read = items_written = 0;
 597         res = g_utf16_to_ucs4 (str2, 0, &items_read, &items_written, &err);
 598         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
 599         if (check_result) return check_result;
 600         g_free (res);
 601
 602         items_read = items_written = 0;
 603         res = g_utf16_to_ucs4 (str2, 1, &items_read, &items_written, &err);
 604         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
 605         if (check_result) return check_result;
 606         g_free (res);
 607
 608         items_read = items_written = 0;
 609         res = g_utf16_to_ucs4 (str2, 2, &items_read, &items_written, &err);
 610         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
 611         if (check_result) return check_result;
 612         g_free (res);
 613
 614         items_read = items_written = 0;
 615         res = g_utf16_to_ucs4 (str2, 3, &items_read, &items_written, &err);
 616         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
 617         if (check_result) return check_result;
 618         g_free (res);
 619
 620         items_read = items_written = 0;
 621         res = g_utf16_to_ucs4 (str2, 4, &items_read, &items_written, &err);
 622         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
 623         if (check_result) return check_result;
 624         g_free (res);
 625
 626         items_read = items_written = 0;
 627         res = g_utf16_to_ucs4 (str2, 5, &items_read, &items_written, &err);
 628         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 4, items_written, 0, err, TRUE);
 629         if (check_result) return check_result;
 630         g_free (res);
 631
 632         items_read = items_written = 0;
 633         err = 0;
 634         res = g_utf16_to_ucs4 (str3, 5, &items_read, &items_written, &err);
 635         check_result = utf16_to_ucs4_check_result (res, exp3, items_read, 1, items_written, 0, err, TRUE);
 636         if (check_result) return check_result;
 637         g_free (res);
 638
 639         // This loop tests the bounds of the conversion algorithm
 640         current_read_index = current_write_index = 0;
 641         for (i=0;i<11;i++) {
 642                 items_read = items_written = 0;
 643                 err = 0;
 644                 res = g_utf16_to_ucs4 (&str4[current_read_index], read_write[i*3], &items_read, &items_written, &err);
 645                 check_result = utf16_to_ucs4_check_result (res, &exp4[current_write_index], items_read,
 646                                              read_write[(i*3)+1], items_written, read_write[(i*3)+2], err,
 647                                              !read_write[(i*3)+2]);
 648                 if (check_result) return check_result;
 649                 g_free (res);
 650                 current_read_index += read_write[i*3];
 651                 current_write_index += items_written;
 652         }
 653
 654         items_read = items_written = 0;
 655         err = 0;
 656         res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err);
 657         check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE);
 658         if (check_result) return check_result;
 659         g_free (res);
 660
 661         return OK;
 662 }
 663 RESULT
 664 test_utf8_strlen ()
 665 {
 666         gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'};//Valid, len = 5
 667         gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'};//Valid, len = 5
 668         gchar word3 [] = {'h','e',0xC2, 0x82,0x45,'\0'};                                                                                //Valid, len = 4
 669         gchar word4 [] = {0x62,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'};                                     //Valid, len = 5
 670
 671         glong len = 0;
 672
 673         //Test word1
 674         len = g_utf8_strlen (word1,-1);
 675         if (len != 5)
 676                 return FAILED ("Word1 expected length of 5, but was %i", len);
 677         //Do tests with different values for max parameter.
 678         len = g_utf8_strlen (word1,1);
 679         if (len != 0)
 680                 return FAILED ("Word1, max = 1, expected length of 0, but was %i", len);
 681         len = g_utf8_strlen (word1,2);
 682         if (len != 1)
 683                 return FAILED ("Word1, max = 1, expected length of 1, but was %i", len);
 684         len = g_utf8_strlen (word1,3);
 685         if (len != 2)
 686                 return FAILED ("Word1, max = 2, expected length of 2, but was %i", len);
 687
 688         //Test word2
 689         len = g_utf8_strlen (word2,-1);
 690         if (len != 5)
 691                 return FAILED ("Word2 expected length of 5, but was %i", len);
 692
 693         //Test word3
 694         len = g_utf8_strlen (word3,-1);
 695         if (len != 4)
 696                 return FAILED ("Word3 expected length of 4, but was %i", len);
 697
 698         //Test word4
 699         len = g_utf8_strlen (word4,-1);
 700         if (len != 5)
 701                 return FAILED ("Word4 expected length of 5, but was %i", len);
 702
 703         //Test null case
 704         len = g_utf8_strlen(NULL,0);
 705         if (len != 0)
 706                 return FAILED ("Expected passing null to result in a length of 0");
 707         return OK;
 708 }
 709
 710 RESULT
 711 test_utf8_get_char()
 712 {
 713         gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
 714
 715         gunichar value = g_utf8_get_char (&word1 [0]);
 716         if (value != 0x82UL)
 717                 return FAILED ("Expected value of 0x82, but was %x", value);
 718         value = g_utf8_get_char (&word1 [2]);
 719         if (value != 0x45UL)
 720                 return FAILED ("Expected value of 0x45, but was %x", value);
 721         value = g_utf8_get_char (&word1 [3]);
 722         if (value != 0x1043UL)
 723                 return FAILED ("Expected value of 0x1043, but was %x", value);
 724         value = g_utf8_get_char (&word1 [6]);
 725         if (value != 0x58UL)
 726                 return FAILED ("Expected value of 0x58, but was %x", value);
 727         value = g_utf8_get_char (&word1 [7]);
 728         if (value != 0x42082UL)
 729                 return FAILED ("Expected value of 0x42082, but was %x", value);
 730
 731         return OK;
 732 }
 733
 734 RESULT
 735 test_utf8_next_char()
 736 {
 737         gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
 738         gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
 739         gchar word1ExpectedValues [] = {0xC2, 0x45,0xE1, 0x58, 0xF1};
 740         gchar word2ExpectedValues [] = {0xF1, 0xC2, 0x45, 0xE1, 0x58};
 741
 742         gchar* ptr = word1;
 743         gint count = 0;
 744         //Test word1
 745         while (*ptr != 0) {
 746                 if (count > 4)
 747                         return FAILED ("Word1 has gone past its expected length");
 748                 if (*ptr != word1ExpectedValues[count])
 749                         return FAILED ("Word1 has an incorrect next_char at index %i", count);
 750                 ptr = g_utf8_next_char (ptr);
 751                 count++;
 752         }
 753
 754         //Test word2
 755         count = 0;
 756         ptr = word2;
 757         while (*ptr != 0) {
 758                 if (count > 4)
 759                         return FAILED ("Word2 has gone past its expected length");
 760                 if (*ptr != word2ExpectedValues[count])
 761                         return FAILED ("Word2 has an incorrect next_char at index %i", count);
 762                 ptr = g_utf8_next_char (ptr);
 763                 count++;
 764         }
 765
 766         return OK;
 767 }
 768
 769 RESULT
 770 test_utf8_validate()
 771 {
 772         gchar invalidWord1 [] = {0xC3, 0x82, 0xC1,0x90,'\0'}; //Invalid, 1nd oct Can't be 0xC0 or 0xC1
 773         gchar invalidWord2 [] = {0xC1, 0x89, 0x60, '\0'}; //Invalid, 1st oct can not be 0xC1
 774         gchar invalidWord3 [] = {0xC2, 0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Invalid, oct after 0xC2 must be > 0x80
 775
 776         gchar validWord1 [] = {0xC2, 0x82, 0xC3,0xA0,'\0'}; //Valid
 777         gchar validWord2 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid
 778
 779         const gchar* end;
 780         gboolean retVal = g_utf8_validate (invalidWord1, -1, &end);
 781         if (retVal != FALSE)
 782                 return FAILED ("Expected invalidWord1 to be invalid");
 783         if (end != &invalidWord1 [2])
 784                 return FAILED ("Expected end parameter to be pointing to invalidWord1[2]");
 785
 786         end = NULL;
 787         retVal = g_utf8_validate (invalidWord2, -1, &end);
 788         if (retVal != FALSE)
 789                 return FAILED ("Expected invalidWord2 to be invalid");
 790         if (end != &invalidWord2 [0])
 791                 return FAILED ("Expected end parameter to be pointing to invalidWord2[0]");
 792
 793         end = NULL;
 794         retVal = g_utf8_validate (invalidWord3, -1, &end);
 795         if (retVal != FALSE)
 796                 return FAILED ("Expected invalidWord3 to be invalid");
 797         if (end != &invalidWord3 [0])
 798                 return FAILED ("Expected end parameter to be pointing to invalidWord3[1]");
 799
 800         end = NULL;
 801         retVal = g_utf8_validate (validWord1, -1, &end);
 802         if (retVal != TRUE)
 803                 return FAILED ("Expected validWord1 to be valid");
 804         if (end != &validWord1 [4])
 805                 return FAILED ("Expected end parameter to be pointing to validWord1[4]");
 806
 807         end = NULL;
 808         retVal = g_utf8_validate (validWord2, -1, &end);
 809         if (retVal != TRUE)
 810                 return FAILED ("Expected validWord2 to be valid");
 811         if (end != &validWord2 [11])
 812                 return FAILED ("Expected end parameter to be pointing to validWord2[11]");
 813         return OK;
 814 }
 815
 816 glong
 817 utf8_byteslen (const gchar *src)
 818 {
 819         int i = 0;
 820         do {
 821                 if (src [i] == '\0')
 822                         return i;
 823                 i++;
 824         } while (TRUE);
 825 }
 826
 827 RESULT
 828 test_utf8_strcase_each (const gchar *src, const gchar *expected, gboolean strup)
 829 {
 830         gchar *tmp;
 831         glong len, len2;
 832         RESULT r;
 833
 834         len = utf8_byteslen (src);
 835         tmp = strup ? g_utf8_strup (src, len) : g_utf8_strdown (src, len);
 836         len2 = utf8_byteslen (tmp);
 837         r = compare_strings_utf8_RESULT (expected, tmp, len < len2 ? len2 : len);
 838         g_free (tmp);
 839         return r;
 840 }
 841
 842 RESULT
 843 test_utf8_strup_each (const gchar *src, const gchar *expected)
 844 {
 845         return test_utf8_strcase_each (src, expected, TRUE);
 846 }
 847
 848 RESULT
 849 test_utf8_strdown_each (const gchar *src, const gchar *expected)
 850 {
 851         return test_utf8_strcase_each (src, expected, FALSE);
 852 }
 853
 854 /*
 855  * g_utf8_strup
 856  */
 857 RESULT
 858 test_utf8_strup ()
 859 {
 860         RESULT r;
 861
 862         if ((r = test_utf8_strup_each ("aBc", "ABC")) != OK)
 863                 return r;
 864         if ((r = test_utf8_strup_each ("x86-64", "X86-64")) != OK)
 865                 return r;
 866         // U+3B1 U+392 -> U+391 U+392
 867         if ((r = test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK)
 868                 return r;
 869         // U+FF21 -> U+FF21
 870         if ((r = test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK)
 871                 return r;
 872         // U+FF41 -> U+FF21
 873         if ((r = test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK)
 874                 return r;
 875         // U+10428 -> U+10400
 876         if ((r = test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK)
 877                 return r;
 878
 879         return OK;
 880 }
 881
 882 /*
 883  * g_utf8_strdown
 884  */
 885 RESULT
 886 test_utf8_strdown ()
 887 {
 888         RESULT r;
 889
 890         if ((r = test_utf8_strdown_each ("aBc", "abc")) != OK)
 891                 return r;
 892         if ((r = test_utf8_strdown_each ("X86-64", "x86-64")) != OK)
 893                 return r;
 894         // U+391 U+3B2 -> U+3B1 U+3B2
 895         if ((r = test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK)
 896                 return r;
 897 /*
 898         // U+FF41 -> U+FF41
 899         if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK)
 900                 return r;
 901         // U+FF21 -> U+FF41
 902         if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK)
 903                 return r;
 904         // U+10400 -> U+10428
 905         if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK)
 906                 return r;
 907 */
 908         return OK;
 909 }
 910
 911 /*
 912  * test initialization
 913  */
 914
 915 static Test utf8_tests [] = {
 916         {"g_utf16_to_utf8", test_utf16_to_utf8},
 917         {"g_utf8_to_utf16", test_utf8_to_utf16},
 918         {"g_utf8_to_utf16_with_nuls", test_utf8_to_utf16_with_nuls},
 919         {"g_utf8_seq", test_utf8_seq},
 920         {"g_convert", test_convert },
 921         {"g_unichar_xdigit_value", test_xdigit },
 922         {"g_ucs4_to_utf16", test_ucs4_to_utf16 },
 923         {"g_utf16_to_ucs4", test_utf16_to_ucs4 },
 924         {"g_utf8_strlen", test_utf8_strlen },
 925         {"g_utf8_get_char", test_utf8_get_char },
 926         {"g_utf8_next_char", test_utf8_next_char },
 927         {"g_utf8_validate", test_utf8_validate },
 928         {"g_utf8_strup", test_utf8_strup},
 929         {"g_utf8_strdown", test_utf8_strdown},
 930         {NULL, NULL}
 931 };
 932
 933 DEFINE_TEST_GROUP_INIT(utf8_tests_init, utf8_tests)
 934
 935