eglib/test/utf8.c

   1 #include "test.h"
   2
   3 /*
   4  * g_utf16_to_utf8
   5  */
   6
   7 glong
   8 compare_strings_utf8_pos (const gchar *expected, const gchar *actual, glong size)
   9 {
  10         int i;
  11         for (i = 0; i < size; i++)
  12                 if (expected [i] != actual [i])
  13                         return i;
  14         return -1;
  15 }
  16
  17 RESULT
  18 compare_strings_utf8_RESULT (const gchar *expected, const gchar *actual, glong size)
  19 {
  20         glong ret;
  21
  22         ret = compare_strings_utf8_pos (expected, actual, size);
  23         if (ret < 0)
  24                 return OK;
  25         return FAILED ("Incorrect output: expected '%s' but was '%s', differ at %d\n", expected, actual, ret);
  26 }
  27
  28 void
  29 gchar_to_gunichar2 (gunichar2 ret[], const gchar *src)
  30 {
  31         int i;
  32
  33         for (i = 0; src [i]; i++)
  34                 ret [i] = src [i];
  35         ret [i] = 0;
  36 }
  37
  38 RESULT
  39 compare_utf16_to_utf8_explicit (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out, glong size_spec)
  40 {
  41         GError *error;
  42         gchar* ret;
  43         RESULT result;
  44         glong in_read, out_read;
  45
  46         result = NULL;
  47
  48         error = NULL;
  49         ret = g_utf16_to_utf8 (utf16, size_spec, &in_read, &out_read, &error);
  50         if (error) {
  51                 result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
  52                 g_error_free (error);
  53                 if (ret)
  54                         g_free (ret);
  55                 return result;
  56         }
  57         if (in_read != len_in)
  58                 result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
  59         else if (out_read != len_out)
  60                 result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
  61         else
  62                 result = compare_strings_utf8_RESULT (expected, ret, len_out);
  63
  64         g_free (ret);
  65         if (result)
  66                 return result;
  67
  68         return OK;
  69 }
  70
  71 RESULT
  72 compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_in, glong len_out)
  73 {
  74         RESULT result;
  75
  76         result = compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, -1);
  77         if (result != OK)
  78                 return result;
  79         return compare_utf16_to_utf8_explicit (expected, utf16, len_in, len_out, len_in);
  80 }
  81
  82 RESULT
  83 test_utf16_to_utf8 ()
  84 {
  85         const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81", *src5 = "\xF0\x90\x90\x80";
  86         gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}, str5 [] = {0xD801, 0xDC00, 0};
  87         RESULT result;
  88
  89         gchar_to_gunichar2 (str1, src1);
  90
  91         /* empty string */
  92         result = compare_utf16_to_utf8 (src0, str0, 0, 0);
  93         if (result != OK)
  94                 return result;
  95
  96         result = compare_utf16_to_utf8 (src1, str1, 5, 5);
  97         if (result != OK)
  98                 return result;
  99         result = compare_utf16_to_utf8 (src2, str2, 2, 4);
 100         if (result != OK)
 101                 return result;
 102         result = compare_utf16_to_utf8 (src3, str3, 1, 3);
 103         if (result != OK)
 104                 return result;
 105         result = compare_utf16_to_utf8 (src4, str4, 1, 3);
 106         if (result != OK)
 107                 return result;
 108         result = compare_utf16_to_utf8 (src5, str5, 2, 4);
 109         if (result != OK)
 110                 return result;
 111
 112         return OK;
 113 }
 114
 115 /*
 116  * g_utf8_to_utf16
 117  */
 118
 119 glong
 120 compare_strings_utf16_pos (const gunichar2 *expected, const gunichar2 *actual, glong size)
 121 {
 122         int i;
 123         for (i = 0; i < size; i++)
 124                 if (expected [i] != actual [i])
 125                         return i;
 126         return -1;
 127 }
 128
 129 RESULT
 130 compare_strings_utf16_RESULT (const gunichar2 *expected, const gunichar2 *actual, glong size)
 131 {
 132         glong ret;
 133
 134         ret = compare_strings_utf16_pos (expected, actual, size);
 135         if (ret < 0)
 136                 return OK;
 137         return FAILED ("Incorrect output: expected '%s' but was '%s'\n", expected, actual);
 138 }
 139
 140 RESULT
 141 compare_utf8_to_utf16_explicit (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out, glong size_spec)
 142 {
 143         GError *error;
 144         gunichar2* ret;
 145         RESULT result;
 146         glong in_read, out_read;
 147
 148         result = NULL;
 149
 150         error = NULL;
 151         ret = g_utf8_to_utf16 (utf8, size_spec, &in_read, &out_read, &error);
 152         if (error) {
 153                 result = FAILED ("The error is %d %s\n", (error)->code, (error)->message);
 154                 g_error_free (error);
 155                 if (ret)
 156                         g_free (ret);
 157                 return result;
 158         }
 159         if (in_read != len_in)
 160                 result = FAILED ("Read size is incorrect: expected %d but was %d\n", len_in, in_read);
 161         else if (out_read != len_out)
 162                 result = FAILED ("Converted size is incorrect: expected %d but was %d\n", len_out, out_read);
 163         else
 164                 result = compare_strings_utf16_RESULT (expected, ret, len_out);
 165
 166         g_free (ret);
 167         if (result)
 168                 return result;
 169
 170         return OK;
 171 }
 172
 173
 174 RESULT
 175 compare_utf8_to_utf16 (const gunichar2 *expected, const gchar *utf8, glong len_in, glong len_out)
 176 {
 177         RESULT result;
 178
 179         result = compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, -1);
 180         if (result != OK)
 181                 return result;
 182         return compare_utf8_to_utf16_explicit (expected, utf8, len_in, len_out, len_in);
 183 }
 184
 185 RESULT
 186 test_utf8_seq ()
 187 {
 188         const gchar *src = "\xE5\xB9\xB4\x27";
 189         glong in_read, out_read;
 190         //gunichar2 expected [6];
 191         GError *error = NULL;
 192         gunichar2 *dst;
 193
 194         printf ("got: %s\n", src);
 195         dst = g_utf8_to_utf16 (src, (glong)strlen (src), &in_read, &out_read, &error);
 196         if (error != NULL){
 197                 return error->message;
 198         }
 199
 200         if (in_read != 4) {
 201                 return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
 202         }
 203         if (out_read != 2) {
 204                 return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
 205         }
 206         g_free (dst);
 207
 208         return OK;
 209 }
 210
 211 RESULT
 212 test_utf8_to_utf16 ()
 213 {
 214         const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81";
 215         gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0};
 216         RESULT result;
 217
 218         gchar_to_gunichar2 (str1, src1);
 219
 220         /* empty string */
 221         result = compare_utf8_to_utf16 (str0, src0, 0, 0);
 222         if (result != OK)
 223                 return result;
 224
 225         result = compare_utf8_to_utf16 (str1, src1, 5, 5);
 226         if (result != OK)
 227                 return result;
 228         result = compare_utf8_to_utf16 (str2, src2, 4, 2);
 229         if (result != OK)
 230                 return result;
 231         result = compare_utf8_to_utf16 (str3, src3, 3, 1);
 232         if (result != OK)
 233                 return result;
 234         result = compare_utf8_to_utf16 (str4, src4, 3, 1);
 235         if (result != OK)
 236                 return result;
 237
 238         return OK;
 239 }
 240
 241 RESULT
 242 test_convert ()
 243 {
 244         gsize n;
 245         char *s = g_convert ("\242\241\243\242\241\243\242\241\243\242\241\243", -1, "UTF-8", "ISO-8859-1", NULL, &n, NULL);
 246         guchar *u = (guchar *) s;
 247
 248         if (!s)
 249                 return FAILED ("Expected 24 bytes, got: NULL");
 250
 251         if (strlen (s) != 24)
 252                 return FAILED ("Expected 24 bytes, got: %d", strlen (s));
 253
 254         if (u [1] != 162 || u [2] != 194 ||
 255             u [3] != 161 || u [4] != 194 ||
 256             u [5] != 163 || u [6] != 194)
 257                 return FAILED ("Incorrect conversion");
 258
 259         g_free (s);
 260
 261         return OK;
 262 }
 263
 264
 265 RESULT
 266 test_xdigit ()
 267 {
 268         static char test_chars[] = {
 269                 '0', '1', '2', '3', '4',
 270                 '5', '6', '7', '8', '9',
 271                 'a', 'b', 'c', 'd', 'e', 'f', 'g',
 272                 'A', 'B', 'C', 'D', 'E', 'F', 'G'};
 273         static gint32 test_values[] = {
 274                 0, 1, 2, 3, 4,
 275                 5, 6, 7, 8, 9,
 276                 10, 11, 12, 13, 14, 15, -1,
 277                 10, 11, 12, 13, 14, 15, -1};
 278
 279                 int i =0;
 280
 281                 for (i = 0; i < sizeof(test_chars); i++)
 282                         if (g_unichar_xdigit_value ((gunichar)test_chars[i]) != test_values[i])
 283                                 return FAILED("Incorrect value %d at index %d", test_values[i], i);
 284
 285                 return OK;
 286 }
 287
 288 static RESULT
 289 ucs4_to_utf16_check_result (const gunichar2 *result_str, const gunichar2 *expected_str,
 290                             glong result_items_read, glong expected_items_read,
 291                             glong result_items_written, glong expected_items_written,
 292                             GError* result_error, gboolean expect_error)
 293 {
 294         glong i;
 295         if (result_items_read != expected_items_read)
 296                 return FAILED("Incorrect number of items read %d", result_items_read);
 297         if (result_items_written != expected_items_written)
 298                 return FAILED("Incorrect number of items written %d", result_items_written);
 299         if (result_error && !expect_error)
 300                 return FAILED("There should not be an error code.");
 301         if (!result_error && expect_error)
 302                 return FAILED("Unexpected error object.");
 303         if (expect_error && result_str)
 304                 return FAILED("NULL should be returned when an error occurs.");
 305         if (!expect_error && !result_str)
 306                 return FAILED("When no error occurs NULL should not be returned.");
 307         for (i=0; i<expected_items_written;i++) {
 308                 if (result_str [i] != expected_str [i])
 309                         return FAILED("Incorrect value %d at index %d", result_str [i], i);
 310         }
 311         if (result_str && result_str[expected_items_written] != '\0')
 312                 return FAILED("Null termination not found at the end of the string.");
 313
 314         return OK;
 315 }
 316
 317 RESULT
 318 test_ucs4_to_utf16 ()
 319 {
 320         static gunichar str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
 321         static gunichar2 exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
 322         static gunichar str2[3] = {'h',0x80000000,'\0'};
 323         static gunichar2 exp2[2] = {'h','\0'};
 324         static gunichar str3[3] = {'h',0xDA00,'\0'};
 325         static gunichar str4[3] = {'h',0x10FFFF,'\0'};
 326         static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'};
 327         static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'};
 328         static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'};
 329         static gunichar str6[2] = {0x10400, '\0'};
 330         static gunichar2 exp6[3] = {0xD801, 0xDC00, '\0'};
 331         static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2};
 332         gunichar2* res;
 333         glong items_read, items_written, current_write_index;
 334         GError* err=0;
 335         RESULT check_result;
 336         glong i;
 337
 338         res = g_ucs4_to_utf16 (str1, 12, &items_read, &items_written, &err);
 339         check_result = ucs4_to_utf16_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
 340         if (check_result) return check_result;
 341         g_free (res);
 342
 343         items_read = items_written = 0;
 344         res = g_ucs4_to_utf16 (str2, 0, &items_read, &items_written, &err);
 345         check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
 346         if (check_result) return check_result;
 347         g_free (res);
 348
 349         items_read = items_written = 0;
 350         res = g_ucs4_to_utf16 (str2, 1, &items_read, &items_written, &err);
 351         check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
 352         if (check_result) return check_result;
 353         g_free (res);
 354
 355         items_read = items_written = 0;
 356         res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err);
 357         check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
 358         g_free (res);
 359         if (check_result) return check_result;
 360
 361         items_read = items_written = 0;
 362         err = 0;
 363         res = g_ucs4_to_utf16 (str3, 2, &items_read, &items_written, &err);
 364         check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE);
 365         if (check_result) return check_result;
 366         g_free (res);
 367
 368         items_read = items_written = 0;
 369         err = 0;
 370         res = g_ucs4_to_utf16 (str4, 5, &items_read, &items_written, &err);
 371         check_result = ucs4_to_utf16_check_result (res, exp4, items_read, 2, items_written, 3, err, FALSE);
 372         if (check_result) return check_result;
 373         g_free (res);
 374
 375         // This loop tests the bounds of the conversion algorithm
 376         current_write_index = 0;
 377         for (i=0;i<6;i++) {
 378                 items_read = items_written = 0;
 379                 err = 0;
 380                 res = g_ucs4_to_utf16 (&str5[i], 1, &items_read, &items_written, &err);
 381                 check_result = ucs4_to_utf16_check_result (res, &exp5[current_write_index],
 382                                         items_read, read_write[i*2], items_written, read_write[(i*2)+1], err, !read_write[(i*2)+1]);
 383                 if (check_result) return check_result;
 384                 g_free (res);
 385                 current_write_index += items_written;
 386         }
 387
 388         items_read = items_written = 0;
 389         err = 0;
 390         res = g_ucs4_to_utf16 (str6, 1, &items_read, &items_written, &err);
 391         check_result = ucs4_to_utf16_check_result (res, exp6, items_read, 1, items_written, 2, err, FALSE);
 392         if (check_result) return check_result;
 393         g_free (res);
 394
 395         return OK;
 396 }
 397
 398 static RESULT
 399 utf16_to_ucs4_check_result (const gunichar *result_str, const gunichar *expected_str,
 400                             glong result_items_read, glong expected_items_read,
 401                             glong result_items_written, glong expected_items_written,
 402                             GError* result_error, gboolean expect_error)
 403 {
 404         glong i;
 405         if (result_items_read != expected_items_read)
 406                 return FAILED("Incorrect number of items read %d", result_items_read);
 407         if (result_items_written != expected_items_written)
 408                 return FAILED("Incorrect number of items written %d", result_items_written);
 409         if (result_error && !expect_error)
 410                 return FAILED("There should not be an error code.");
 411         if (!result_error && expect_error)
 412                 return FAILED("Unexpected error object.");
 413         if (expect_error && result_str)
 414                 return FAILED("NULL should be returned when an error occurs.");
 415         if (!expect_error && !result_str)
 416                 return FAILED("When no error occurs NULL should not be returned.");
 417         for (i=0; i<expected_items_written;i++) {
 418                 if (result_str [i] != expected_str [i])
 419                         return FAILED("Incorrect value %d at index %d", result_str [i], i);
 420         }
 421         if (result_str && result_str[expected_items_written] != '\0')
 422                 return FAILED("Null termination not found at the end of the string.");
 423
 424         return OK;
 425 }
 426
 427 RESULT
 428 test_utf16_to_ucs4 ()
 429 {
 430         static gunichar2 str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
 431         static gunichar exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
 432         static gunichar2 str2[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'};
 433         static gunichar exp2[3] = {'H',0x00010001,'\0'};
 434         static gunichar2 str3[4] = {'H', 0xDC00 ,'l','\0'};
 435         static gunichar exp3[2] = {'H','\0'};
 436         static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
 437                                      0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
 438         static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
 439         static gunichar2 str5[3] = {0xD801, 0xDC00, 0};
 440         static gunichar exp5[2] = {0x10400, 0};
 441         static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
 442         gunichar* res;
 443         glong items_read, items_written, current_read_index,current_write_index;
 444         GError* err=0;
 445         RESULT check_result;
 446         glong i;
 447
 448         res = g_utf16_to_ucs4 (str1, 12, &items_read, &items_written, &err);
 449         check_result = utf16_to_ucs4_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
 450         if (check_result) return check_result;
 451         g_free (res);
 452
 453         items_read = items_written = 0;
 454         res = g_utf16_to_ucs4 (str2, 0, &items_read, &items_written, &err);
 455         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
 456         if (check_result) return check_result;
 457         g_free (res);
 458
 459         items_read = items_written = 0;
 460         res = g_utf16_to_ucs4 (str2, 1, &items_read, &items_written, &err);
 461         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
 462         if (check_result) return check_result;
 463         g_free (res);
 464
 465         items_read = items_written = 0;
 466         res = g_utf16_to_ucs4 (str2, 2, &items_read, &items_written, &err);
 467         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
 468         if (check_result) return check_result;
 469         g_free (res);
 470
 471         items_read = items_written = 0;
 472         res = g_utf16_to_ucs4 (str2, 3, &items_read, &items_written, &err);
 473         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
 474         if (check_result) return check_result;
 475         g_free (res);
 476
 477         items_read = items_written = 0;
 478         res = g_utf16_to_ucs4 (str2, 4, &items_read, &items_written, &err);
 479         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
 480         if (check_result) return check_result;
 481         g_free (res);
 482
 483         items_read = items_written = 0;
 484         res = g_utf16_to_ucs4 (str2, 5, &items_read, &items_written, &err);
 485         check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 4, items_written, 0, err, TRUE);
 486         if (check_result) return check_result;
 487         g_free (res);
 488
 489         items_read = items_written = 0;
 490         err = 0;
 491         res = g_utf16_to_ucs4 (str3, 5, &items_read, &items_written, &err);
 492         check_result = utf16_to_ucs4_check_result (res, exp3, items_read, 1, items_written, 0, err, TRUE);
 493         if (check_result) return check_result;
 494         g_free (res);
 495
 496         // This loop tests the bounds of the conversion algorithm
 497         current_read_index = current_write_index = 0;
 498         for (i=0;i<11;i++) {
 499                 items_read = items_written = 0;
 500                 err = 0;
 501                 res = g_utf16_to_ucs4 (&str4[current_read_index], read_write[i*3], &items_read, &items_written, &err);
 502                 check_result = utf16_to_ucs4_check_result (res, &exp4[current_write_index], items_read,
 503                                              read_write[(i*3)+1], items_written, read_write[(i*3)+2], err,
 504                                              !read_write[(i*3)+2]);
 505                 if (check_result) return check_result;
 506                 g_free (res);
 507                 current_read_index += read_write[i*3];
 508                 current_write_index += items_written;
 509         }
 510
 511         items_read = items_written = 0;
 512         err = 0;
 513         res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err);
 514         check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE);
 515         if (check_result) return check_result;
 516         g_free (res);
 517
 518         return OK;
 519 }
 520 RESULT
 521 test_utf8_strlen ()
 522 {
 523         gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'};//Valid, len = 5
 524         gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'};//Valid, len = 5
 525         gchar word3 [] = {'h','e',0xC2, 0x82,0x45,'\0'};                                                                                //Valid, len = 4
 526         gchar word4 [] = {0x62,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'};                                     //Valid, len = 5
 527
 528         glong len = 0;
 529
 530         //Test word1
 531         len = g_utf8_strlen (word1,-1);
 532         if (len != 5)
 533                 return FAILED ("Word1 expected length of 5, but was %i", len);
 534         //Do tests with different values for max parameter.
 535         len = g_utf8_strlen (word1,1);
 536         if (len != 0)
 537                 return FAILED ("Word1, max = 1, expected length of 0, but was %i", len);
 538         len = g_utf8_strlen (word1,2);
 539         if (len != 1)
 540                 return FAILED ("Word1, max = 1, expected length of 1, but was %i", len);
 541         len = g_utf8_strlen (word1,3);
 542         if (len != 2)
 543                 return FAILED ("Word1, max = 2, expected length of 2, but was %i", len);
 544
 545         //Test word2
 546         len = g_utf8_strlen (word2,-1);
 547         if (len != 5)
 548                 return FAILED ("Word2 expected length of 5, but was %i", len);
 549
 550         //Test word3
 551         len = g_utf8_strlen (word3,-1);
 552         if (len != 4)
 553                 return FAILED ("Word3 expected length of 4, but was %i", len);
 554
 555         //Test word4
 556         len = g_utf8_strlen (word4,-1);
 557         if (len != 5)
 558                 return FAILED ("Word4 expected length of 5, but was %i", len);
 559
 560         //Test null case
 561         len = g_utf8_strlen(NULL,0);
 562         if (len != 0)
 563                 return FAILED ("Expected passing null to result in a length of 0");
 564         return OK;
 565 }
 566
 567 RESULT
 568 test_utf8_get_char()
 569 {
 570         gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
 571
 572         gunichar value = g_utf8_get_char (&word1 [0]);
 573         if (value != 0x82UL)
 574                 return FAILED ("Expected value of 0x82, but was %x", value);
 575         value = g_utf8_get_char (&word1 [2]);
 576         if (value != 0x45UL)
 577                 return FAILED ("Expected value of 0x45, but was %x", value);
 578         value = g_utf8_get_char (&word1 [3]);
 579         if (value != 0x1043UL)
 580                 return FAILED ("Expected value of 0x1043, but was %x", value);
 581         value = g_utf8_get_char (&word1 [6]);
 582         if (value != 0x58UL)
 583                 return FAILED ("Expected value of 0x58, but was %x", value);
 584         value = g_utf8_get_char (&word1 [7]);
 585         if (value != 0x42082UL)
 586                 return FAILED ("Expected value of 0x42082, but was %x", value);
 587
 588         return OK;
 589 }
 590
 591 RESULT
 592 test_utf8_next_char()
 593 {
 594         gchar word1 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid, len = 5
 595         gchar word2 [] = {0xF1, 0x82, 0x82, 0x82,0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Valid, len = 5
 596         gchar word1ExpectedValues [] = {0xC2, 0x45,0xE1, 0x58, 0xF1};
 597         gchar word2ExpectedValues [] = {0xF1, 0xC2, 0x45, 0xE1, 0x58};
 598
 599         gchar* ptr = word1;
 600         gint count = 0;
 601         //Test word1
 602         while (*ptr != 0) {
 603                 if (count > 4)
 604                         return FAILED ("Word1 has gone past its expected length");
 605                 if (*ptr != word1ExpectedValues[count])
 606                         return FAILED ("Word1 has an incorrect next_char at index %i", count);
 607                 ptr = g_utf8_next_char (ptr);
 608                 count++;
 609         }
 610
 611         //Test word2
 612         count = 0;
 613         ptr = word2;
 614         while (*ptr != 0) {
 615                 if (count > 4)
 616                         return FAILED ("Word2 has gone past its expected length");
 617                 if (*ptr != word2ExpectedValues[count])
 618                         return FAILED ("Word2 has an incorrect next_char at index %i", count);
 619                 ptr = g_utf8_next_char (ptr);
 620                 count++;
 621         }
 622
 623         return OK;
 624 }
 625
 626 RESULT
 627 test_utf8_validate()
 628 {
 629         gchar invalidWord1 [] = {0xC3, 0x82, 0xC1,0x90,'\0'}; //Invalid, 1nd oct Can't be 0xC0 or 0xC1
 630         gchar invalidWord2 [] = {0xC1, 0x89, 0x60, '\0'}; //Invalid, 1st oct can not be 0xC1
 631         gchar invalidWord3 [] = {0xC2, 0x45,0xE1, 0x81, 0x83,0x58,'\0'}; //Invalid, oct after 0xC2 must be > 0x80
 632
 633         gchar validWord1 [] = {0xC2, 0x82, 0xC3,0xA0,'\0'}; //Valid
 634         gchar validWord2 [] = {0xC2, 0x82,0x45,0xE1, 0x81, 0x83,0x58,0xF1, 0x82, 0x82, 0x82,'\0'}; //Valid
 635
 636         const gchar* end;
 637         gboolean retVal = g_utf8_validate (invalidWord1, -1, &end);
 638         if (retVal != FALSE)
 639                 return FAILED ("Expected invalidWord1 to be invalid");
 640         if (end != &invalidWord1 [2])
 641                 return FAILED ("Expected end parameter to be pointing to invalidWord1[2]");
 642
 643         end = NULL;
 644         retVal = g_utf8_validate (invalidWord2, -1, &end);
 645         if (retVal != FALSE)
 646                 return FAILED ("Expected invalidWord2 to be invalid");
 647         if (end != &invalidWord2 [0])
 648                 return FAILED ("Expected end parameter to be pointing to invalidWord2[0]");
 649
 650         end = NULL;
 651         retVal = g_utf8_validate (invalidWord3, -1, &end);
 652         if (retVal != FALSE)
 653                 return FAILED ("Expected invalidWord3 to be invalid");
 654         if (end != &invalidWord3 [0])
 655                 return FAILED ("Expected end parameter to be pointing to invalidWord3[1]");
 656
 657         end = NULL;
 658         retVal = g_utf8_validate (validWord1, -1, &end);
 659         if (retVal != TRUE)
 660                 return FAILED ("Expected validWord1 to be valid");
 661         if (end != &validWord1 [4])
 662                 return FAILED ("Expected end parameter to be pointing to validWord1[4]");
 663
 664         end = NULL;
 665         retVal = g_utf8_validate (validWord2, -1, &end);
 666         if (retVal != TRUE)
 667                 return FAILED ("Expected validWord2 to be valid");
 668         if (end != &validWord2 [11])
 669                 return FAILED ("Expected end parameter to be pointing to validWord2[11]");
 670         return OK;
 671 }
 672
 673 glong
 674 utf8_byteslen (const gchar *src)
 675 {
 676         int i = 0;
 677         do {
 678                 if (src [i] == '\0')
 679                         return i;
 680                 i++;
 681         } while (TRUE);
 682 }
 683
 684 RESULT
 685 test_utf8_strcase_each (const gchar *src, const gchar *expected, gboolean strup)
 686 {
 687         gchar *tmp;
 688         glong len, len2;
 689         RESULT r;
 690
 691         len = utf8_byteslen (src);
 692         tmp = strup ? g_utf8_strup (src, len) : g_utf8_strdown (src, len);
 693         len2 = utf8_byteslen (tmp);
 694         r = compare_strings_utf8_RESULT (expected, tmp, len < len2 ? len2 : len);
 695         g_free (tmp);
 696         return r;
 697 }
 698
 699 RESULT
 700 test_utf8_strup_each (const gchar *src, const gchar *expected)
 701 {
 702         return test_utf8_strcase_each (src, expected, TRUE);
 703 }
 704
 705 RESULT
 706 test_utf8_strdown_each (const gchar *src, const gchar *expected)
 707 {
 708         return test_utf8_strcase_each (src, expected, FALSE);
 709 }
 710
 711 /*
 712  * g_utf8_strup
 713  */
 714 RESULT
 715 test_utf8_strup ()
 716 {
 717         RESULT r;
 718
 719         if ((r = test_utf8_strup_each ("aBc", "ABC")) != OK)
 720                 return r;
 721         if ((r = test_utf8_strup_each ("x86-64", "X86-64")) != OK)
 722                 return r;
 723         // U+3B1 U+392 -> U+391 U+392
 724         if ((r = test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK)
 725                 return r;
 726         // U+FF21 -> U+FF21
 727         if ((r = test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK)
 728                 return r;
 729         // U+FF41 -> U+FF21
 730         if ((r = test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK)
 731                 return r;
 732         // U+10428 -> U+10400
 733         if ((r = test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK)
 734                 return r;
 735
 736         return OK;
 737 }
 738
 739 /*
 740  * g_utf8_strdown
 741  */
 742 RESULT
 743 test_utf8_strdown ()
 744 {
 745         RESULT r;
 746
 747         if ((r = test_utf8_strdown_each ("aBc", "abc")) != OK)
 748                 return r;
 749         if ((r = test_utf8_strdown_each ("X86-64", "x86-64")) != OK)
 750                 return r;
 751         // U+391 U+3B2 -> U+3B1 U+3B2
 752         if ((r = test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK)
 753                 return r;
 754 /*
 755         // U+FF41 -> U+FF41
 756         if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK)
 757                 return r;
 758         // U+FF21 -> U+FF41
 759         if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK)
 760                 return r;
 761         // U+10400 -> U+10428
 762         if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK)
 763                 return r;
 764 */
 765         return OK;
 766 }
 767
 768 /*
 769  * test initialization
 770  */
 771
 772 static Test utf8_tests [] = {
 773         {"g_utf16_to_utf8", test_utf16_to_utf8},
 774         {"g_utf8_to_utf16", test_utf8_to_utf16},
 775         {"g_utf8_seq", test_utf8_seq},
 776         {"g_convert", test_convert },
 777         {"g_unichar_xdigit_value", test_xdigit },
 778         {"g_ucs4_to_utf16", test_ucs4_to_utf16 },
 779         {"g_utf16_to_ucs4", test_utf16_to_ucs4 },
 780         {"g_utf8_strlen", test_utf8_strlen },
 781         {"g_utf8_get_char", test_utf8_get_char },
 782         {"g_utf8_next_char", test_utf8_next_char },
 783         {"g_utf8_validate", test_utf8_validate },
 784         {"g_utf8_strup", test_utf8_strup},
 785         {"g_utf8_strdown", test_utf8_strdown},
 786         {NULL, NULL}
 787 };
 788
 789 DEFINE_TEST_GROUP_INIT(utf8_tests_init, utf8_tests)
 790
 791