src/glib/Unicode.d

   1 /*
   2  * This file is part of duit.
   3  *
   4  * duit is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU Lesser General Public License as published by
   6  * the Free Software Foundation; either version 2.1 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * duit is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public License
  15  * along with duit; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  17  */
  18
  19 // generated automatically - do not change
  20 // find conversion definition on APILookup.txt
  21 // implement new conversion functionalities on the wrap.utils pakage
  22
  23 /*
  24  * Conversion parameters:
  25  * inFile  = glib-Unicode-Manipulation.html
  26  * outPack = glib
  27  * outFile = Unicode
  28  * strct   =
  29  * realStrct=
  30  * ctorStrct=
  31  * clss    = Unicode
  32  * interf  =
  33  * class Code: No
  34  * interface Code: No
  35  * template for:
  36  * extend  =
  37  * implements:
  38  * prefixes:
  39  *      - g_
  40  * omit structs:
  41  * omit prefixes:
  42  * omit code:
  43  * imports:
  44  *      - glib.ErrorG
  45  *      - glib.Str
  46  * structWrap:
  47  * local aliases:
  48  */
  49
  50 module glib.Unicode;
  51
  52 private import glib.glibtypes;
  53
  54 private import lib.glib;
  55
  56 private import glib.ErrorG;
  57 private import glib.Str;
  58
  59 /**
  60  * Description
  61  * This section describes a number of functions for dealing with
  62  * Unicode characters and strings. There are analogues of the
  63  * traditional ctype.h character classification
  64  * and case conversion functions, UTF-8 analogues of some string utility
  65  * functions, functions to perform normalization, case conversion and
  66  * collation on UTF-8 strings and finally functions to convert between
  67  * the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
  68  * The implementations of the Unicode functions in GLib are based
  69  * on the Unicode Character Data tables, which are available from
  70  * www.unicode.org.
  71  * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
  72  * GLib 2.12 supports Unicode 5.0.
  73  */
  74 public class Unicode
  75 {
  76
  77         /**
  78          */
  79
  80
  81
  82         /**
  83          * Checks whether ch is a valid Unicode character. Some possible
  84          * integer values of ch will not be valid. 0 is considered a valid
  85          * character, though it's normally a string terminator.
  86          * ch:
  87          *  a Unicode character
  88          * Returns:
  89          *  TRUE if ch is a valid Unicode character
  90          */
  91         public static int unicharValidate(gunichar ch)
  92         {
  93                 // gboolean g_unichar_validate (gunichar ch);
  94                 return g_unichar_validate(ch);
  95         }
  96
  97         /**
  98          * Determines whether a character is alphanumeric.
  99          * Given some UTF-8 text, obtain a character value
 100          * with g_utf8_get_char().
 101          * c:
 102          *  a Unicode character
 103          * Returns:
 104          *  TRUE if c is an alphanumeric character
 105          */
 106         public static int unicharIsalnum(gunichar c)
 107         {
 108                 // gboolean g_unichar_isalnum (gunichar c);
 109                 return g_unichar_isalnum(c);
 110         }
 111
 112         /**
 113          * Determines whether a character is alphabetic (i.e. a letter).
 114          * Given some UTF-8 text, obtain a character value with
 115          * g_utf8_get_char().
 116          * c:
 117          *  a Unicode character
 118          * Returns:
 119          *  TRUE if c is an alphabetic character
 120          */
 121         public static int unicharIsalpha(gunichar c)
 122         {
 123                 // gboolean g_unichar_isalpha (gunichar c);
 124                 return g_unichar_isalpha(c);
 125         }
 126
 127         /**
 128          * Determines whether a character is a control character.
 129          * Given some UTF-8 text, obtain a character value with
 130          * g_utf8_get_char().
 131          * c:
 132          *  a Unicode character
 133          * Returns:
 134          *  TRUE if c is a control character
 135          */
 136         public static int unicharIscntrl(gunichar c)
 137         {
 138                 // gboolean g_unichar_iscntrl (gunichar c);
 139                 return g_unichar_iscntrl(c);
 140         }
 141
 142         /**
 143          * Determines whether a character is numeric (i.e. a digit). This
 144          * covers ASCII 0-9 and also digits in other languages/scripts. Given
 145          * some UTF-8 text, obtain a character value with g_utf8_get_char().
 146          * c:
 147          *  a Unicode character
 148          * Returns:
 149          *  TRUE if c is a digit
 150          */
 151         public static int unicharIsdigit(gunichar c)
 152         {
 153                 // gboolean g_unichar_isdigit (gunichar c);
 154                 return g_unichar_isdigit(c);
 155         }
 156
 157         /**
 158          * Determines whether a character is printable and not a space
 159          * (returns FALSE for control characters, format characters, and
 160          * spaces). g_unichar_isprint() is similar, but returns TRUE for
 161          * spaces. Given some UTF-8 text, obtain a character value with
 162          * g_utf8_get_char().
 163          * c:
 164          *  a Unicode character
 165          * Returns:
 166          *  TRUE if c is printable unless it's a space
 167          */
 168         public static int unicharIsgraph(gunichar c)
 169         {
 170                 // gboolean g_unichar_isgraph (gunichar c);
 171                 return g_unichar_isgraph(c);
 172         }
 173
 174         /**
 175          * Determines whether a character is a lowercase letter.
 176          * Given some UTF-8 text, obtain a character value with
 177          * g_utf8_get_char().
 178          * c:
 179          *  a Unicode character
 180          * Returns:
 181          *  TRUE if c is a lowercase letter
 182          */
 183         public static int unicharIslower(gunichar c)
 184         {
 185                 // gboolean g_unichar_islower (gunichar c);
 186                 return g_unichar_islower(c);
 187         }
 188
 189         /**
 190          * Determines whether a character is printable.
 191          * Unlike g_unichar_isgraph(), returns TRUE for spaces.
 192          * Given some UTF-8 text, obtain a character value with
 193          * g_utf8_get_char().
 194          * c:
 195          *  a Unicode character
 196          * Returns:
 197          *  TRUE if c is printable
 198          */
 199         public static int unicharIsprint(gunichar c)
 200         {
 201                 // gboolean g_unichar_isprint (gunichar c);
 202                 return g_unichar_isprint(c);
 203         }
 204
 205         /**
 206          * Determines whether a character is punctuation or a symbol.
 207          * Given some UTF-8 text, obtain a character value with
 208          * g_utf8_get_char().
 209          * c:
 210          *  a Unicode character
 211          * Returns:
 212          *  TRUE if c is a punctuation or symbol character
 213          */
 214         public static int unicharIspunct(gunichar c)
 215         {
 216                 // gboolean g_unichar_ispunct (gunichar c);
 217                 return g_unichar_ispunct(c);
 218         }
 219
 220         /**
 221          * Determines whether a character is a space, tab, or line separator
 222          * (newline, carriage return, etc.). Given some UTF-8 text, obtain a
 223          * character value with g_utf8_get_char().
 224          * (Note: don't use this to do word breaking; you have to use
 225          * Pango or equivalent to get word breaking right, the algorithm
 226          * is fairly complex.)
 227          * c:
 228          *  a Unicode character
 229          * Returns:
 230          *  TRUE if c is a space character
 231          */
 232         public static int unicharIsspace(gunichar c)
 233         {
 234                 // gboolean g_unichar_isspace (gunichar c);
 235                 return g_unichar_isspace(c);
 236         }
 237
 238         /**
 239          * Determines if a character is uppercase.
 240          * c:
 241          *  a Unicode character
 242          * Returns:
 243          *  TRUE if c is an uppercase character
 244          */
 245         public static int unicharIsupper(gunichar c)
 246         {
 247                 // gboolean g_unichar_isupper (gunichar c);
 248                 return g_unichar_isupper(c);
 249         }
 250
 251         /**
 252          * Determines if a character is a hexidecimal digit.
 253          * c:
 254          *  a Unicode character.
 255          * Returns:
 256          *  TRUE if the character is a hexadecimal digit
 257          */
 258         public static int unicharIsxdigit(gunichar c)
 259         {
 260                 // gboolean g_unichar_isxdigit (gunichar c);
 261                 return g_unichar_isxdigit(c);
 262         }
 263
 264         /**
 265          * Determines if a character is titlecase. Some characters in
 266          * Unicode which are composites, such as the DZ digraph
 267          * have three case variants instead of just two. The titlecase
 268          * form is used at the beginning of a word where only the
 269          * first letter is capitalized. The titlecase form of the DZ
 270          * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
 271          * c:
 272          *  a Unicode character
 273          * Returns:
 274          *  TRUE if the character is titlecase
 275          */
 276         public static int unicharIstitle(gunichar c)
 277         {
 278                 // gboolean g_unichar_istitle (gunichar c);
 279                 return g_unichar_istitle(c);
 280         }
 281
 282         /**
 283          * Determines if a given character is assigned in the Unicode
 284          * standard.
 285          * c:
 286          *  a Unicode character
 287          * Returns:
 288          *  TRUE if the character has an assigned value
 289          */
 290         public static int unicharIsdefined(gunichar c)
 291         {
 292                 // gboolean g_unichar_isdefined (gunichar c);
 293                 return g_unichar_isdefined(c);
 294         }
 295
 296         /**
 297          * Determines if a character is typically rendered in a double-width
 298          * cell.
 299          * c:
 300          *  a Unicode character
 301          * Returns:
 302          *  TRUE if the character is wide
 303          */
 304         public static int unicharIswide(gunichar c)
 305         {
 306                 // gboolean g_unichar_iswide (gunichar c);
 307                 return g_unichar_iswide(c);
 308         }
 309
 310         /**
 311          * Determines if a character is typically rendered in a double-width
 312          * cell under legacy East Asian locales. If a character is wide according to
 313          * g_unichar_iswide(), then it is also reported wide with this function, but
 314          * the converse is not necessarily true. See the
 315          * Unicode Standard
 316          * Annex 11 for details.
 317          * c:
 318          *  a Unicode character
 319          * Returns:
 320          *  TRUE if the character is wide in legacy East Asian locales
 321          * Since 2.12
 322          */
 323         public static int unicharIswideCjk(gunichar c)
 324         {
 325                 // gboolean g_unichar_iswide_cjk (gunichar c);
 326                 return g_unichar_iswide_cjk(c);
 327         }
 328
 329         /**
 330          * Converts a character to uppercase.
 331          * c:
 332          *  a Unicode character
 333          * Returns:
 334          *  the result of converting c to uppercase.
 335          *  If c is not an lowercase or titlecase character,
 336          *  or has no upper case equivalent c is returned unchanged.
 337          */
 338         public static gunichar unicharToupper(gunichar c)
 339         {
 340                 // gunichar g_unichar_toupper (gunichar c);
 341                 return g_unichar_toupper(c);
 342         }
 343
 344         /**
 345          * Converts a character to lower case.
 346          * c:
 347          *  a Unicode character.
 348          * Returns:
 349          *  the result of converting c to lower case.
 350          *  If c is not an upperlower or titlecase character,
 351          *  or has no lowercase equivalent c is returned unchanged.
 352          */
 353         public static gunichar unicharTolower(gunichar c)
 354         {
 355                 // gunichar g_unichar_tolower (gunichar c);
 356                 return g_unichar_tolower(c);
 357         }
 358
 359         /**
 360          * Converts a character to the titlecase.
 361          * c:
 362          *  a Unicode character
 363          * Returns:
 364          *  the result of converting c to titlecase.
 365          *  If c is not an uppercase or lowercase character,
 366          *  c is returned unchanged.
 367          */
 368         public static gunichar unicharTotitle(gunichar c)
 369         {
 370                 // gunichar g_unichar_totitle (gunichar c);
 371                 return g_unichar_totitle(c);
 372         }
 373
 374         /**
 375          * Determines the numeric value of a character as a decimal
 376          * digit.
 377          * c:
 378          *  a Unicode character
 379          * Returns:
 380          *  If c is a decimal digit (according to
 381          * g_unichar_isdigit()), its numeric value. Otherwise, -1.
 382          */
 383         public static int unicharDigitValue(gunichar c)
 384         {
 385                 // gint g_unichar_digit_value (gunichar c);
 386                 return g_unichar_digit_value(c);
 387         }
 388
 389         /**
 390          * Determines the numeric value of a character as a hexidecimal
 391          * digit.
 392          * c:
 393          *  a Unicode character
 394          * Returns:
 395          *  If c is a hex digit (according to
 396          * g_unichar_isxdigit()), its numeric value. Otherwise, -1.
 397          */
 398         public static int unicharXdigitValue(gunichar c)
 399         {
 400                 // gint g_unichar_xdigit_value (gunichar c);
 401                 return g_unichar_xdigit_value(c);
 402         }
 403
 404
 405         /**
 406          * Classifies a Unicode character by type.
 407          * c:
 408          *  a Unicode character
 409          * Returns:
 410          *  the type of the character.
 411          */
 412         public static GUnicodeType unicharType(gunichar c)
 413         {
 414                 // GUnicodeType g_unichar_type (gunichar c);
 415                 return g_unichar_type(c);
 416         }
 417
 418
 419         /**
 420          * Determines the break type of c. c should be a Unicode character
 421          * (to derive a character from UTF-8 encoded text, use
 422          * g_utf8_get_char()). The break type is used to find word and line
 423          * breaks ("text boundaries"), Pango implements the Unicode boundary
 424          * resolution algorithms and normally you would use a function such
 425          * as pango_break() instead of caring about break types yourself.
 426          * c:
 427          *  a Unicode character
 428          * Returns:
 429          *  the break type of c
 430          */
 431         public static GUnicodeBreakType unicharBreakType(gunichar c)
 432         {
 433                 // GUnicodeBreakType g_unichar_break_type (gunichar c);
 434                 return g_unichar_break_type(c);
 435         }
 436
 437         /**
 438          * Computes the canonical ordering of a string in-place.
 439          * This rearranges decomposed characters in the string
 440          * according to their combining classes. See the Unicode
 441          * manual for more information.
 442          * string:
 443          *  a UCS-4 encoded string.
 444          * len:
 445          *  the maximum length of string to use.
 446          */
 447         public static void unicodeCanonicalOrdering(gunichar* string, uint len)
 448         {
 449                 // void g_unicode_canonical_ordering (gunichar *string,  gsize len);
 450                 g_unicode_canonical_ordering(string, len);
 451         }
 452
 453         /**
 454          * Computes the canonical decomposition of a Unicode character.
 455          * ch:
 456          *  a Unicode character.
 457          * result_len:
 458          *  location to store the length of the return value.
 459          * Returns:
 460          *  a newly allocated string of Unicode characters.
 461          *  result_len is set to the resulting length of the string.
 462          */
 463         public static gunichar* unicodeCanonicalDecomposition(gunichar ch, uint* resultLen)
 464         {
 465                 // gunichar* g_unicode_canonical_decomposition  (gunichar ch,  gsize *result_len);
 466                 return g_unicode_canonical_decomposition(ch, resultLen);
 467         }
 468
 469         /**
 470          * In Unicode, some characters are mirrored. This
 471          * means that their images are mirrored horizontally in text that is laid
 472          * out from right to left. For instance, "(" would become its mirror image,
 473          * ")", in right-to-left text.
 474          * If ch has the Unicode mirrored property and there is another unicode
 475          * character that typically has a glyph that is the mirror image of ch's
 476          * glyph and mirrored_ch is set, it puts that character in the address
 477          * pointed to by mirrored_ch. Otherwise the original character is put.
 478          * ch:
 479          *  a Unicode character
 480          * mirrored_ch:
 481          *  location to store the mirrored character
 482          * Returns:
 483          *  TRUE if ch has a mirrored character, FALSE otherwise
 484          * Since 2.4
 485          */
 486         public static int unicharGetMirrorChar(gunichar ch, gunichar* mirroredCh)
 487         {
 488                 // gboolean g_unichar_get_mirror_char (gunichar ch,  gunichar *mirrored_ch);
 489                 return g_unichar_get_mirror_char(ch, mirroredCh);
 490         }
 491
 492
 493         /**
 494          * Looks up the GUnicodeScript for a particular character (as defined
 495          * by Unicode Standard Annex 24). No check is made for ch being a
 496          * valid Unicode character; if you pass in invalid character, the
 497          * result is undefined.
 498          * ch:
 499          *  a Unicode character
 500          * Returns:
 501          *  the GUnicodeScript for the character.
 502          * Since 2.14
 503          */
 504         public static GUnicodeScript unicharGetScript(gunichar ch)
 505         {
 506                 // GUnicodeScript g_unichar_get_script (gunichar ch);
 507                 return g_unichar_get_script(ch);
 508         }
 509
 510
 511         /**
 512          * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
 513          * If p does not point to a valid UTF-8 encoded character, results are
 514          * undefined. If you are not sure that the bytes are complete
 515          * valid Unicode characters, you should use g_utf8_get_char_validated()
 516          * instead.
 517          * p:
 518          *  a pointer to Unicode character encoded as UTF-8
 519          * Returns:
 520          *  the resulting character
 521          */
 522         public static gunichar utf8_GetChar(char[] p)
 523         {
 524                 // gunichar g_utf8_get_char (const gchar *p);
 525                 return g_utf8_get_char(Str.toStringz(p));
 526         }
 527
 528         /**
 529          * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
 530          * This function checks for incomplete characters, for invalid characters
 531          * such as characters that are out of the range of Unicode, and for
 532          * overlong encodings of valid characters.
 533          * p:
 534          *  a pointer to Unicode character encoded as UTF-8
 535          * max_len:
 536          *  the maximum number of bytes to read, or -1, for no maximum.
 537          * Returns:
 538          *  the resulting character. If p points to a partial
 539          *  sequence at the end of a string that could begin a valid
 540          *  character, returns (gunichar)-2; otherwise, if p does not point
 541          *  to a valid UTF-8 encoded Unicode character, returns (gunichar)-1.
 542          */
 543         public static gunichar utf8_GetCharValidated(char[] p, int maxLen)
 544         {
 545                 // gunichar g_utf8_get_char_validated (const gchar *p,  gssize max_len);
 546                 return g_utf8_get_char_validated(Str.toStringz(p), maxLen);
 547         }
 548
 549         /**
 550          * Converts from an integer character offset to a pointer to a position
 551          * within the string.
 552          * Since 2.10, this function allows to pass a negative offset to
 553          * step backwards. It is usually worth stepping backwards from the end
 554          * instead of forwards if offset is in the last fourth of the string,
 555          * since moving forward is about 3 times faster than moving backward.
 556          * str:
 557          *  a UTF-8 encoded string
 558          * offset:
 559          *  a character offset within str
 560          * Returns:
 561          *  the resulting pointer
 562          */
 563         public static char[] utf8_OffsetToPointer(char[] str, int offset)
 564         {
 565                 // gchar* g_utf8_offset_to_pointer (const gchar *str,  glong offset);
 566                 return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset) );
 567         }
 568
 569         /**
 570          * Converts from a pointer to position within a string to a integer
 571          * character offset.
 572          * Since 2.10, this function allows pos to be before str, and returns
 573          * a negative offset in this case.
 574          * str:
 575          *  a UTF-8 encoded string
 576          * pos:
 577          *  a pointer to a position within str
 578          * Returns:
 579          *  the resulting character offset
 580          */
 581         public static int utf8_PointerToOffset(char[] str, char[] pos)
 582         {
 583                 // glong g_utf8_pointer_to_offset (const gchar *str,  const gchar *pos);
 584                 return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos));
 585         }
 586
 587         /**
 588          * Finds the previous UTF-8 character in the string before p.
 589          * p does not have to be at the beginning of a UTF-8 character. No check
 590          * is made to see if the character found is actually valid other than
 591          * it starts with an appropriate byte. If p might be the first
 592          * character of the string, you must use g_utf8_find_prev_char() instead.
 593          * p:
 594          *  a pointer to a position within a UTF-8 encoded string
 595          * Returns:
 596          *  a pointer to the found character.
 597          */
 598         public static char[] utf8_PrevChar(char[] p)
 599         {
 600                 // gchar* g_utf8_prev_char (const gchar *p);
 601                 return Str.toString(g_utf8_prev_char(Str.toStringz(p)) );
 602         }
 603
 604         /**
 605          * Finds the start of the next UTF-8 character in the string after p.
 606          * p does not have to be at the beginning of a UTF-8 character. No check
 607          * is made to see if the character found is actually valid other than
 608          * it starts with an appropriate byte.
 609          * p:
 610          *  a pointer to a position within a UTF-8 encoded string
 611          * end:
 612          *  a pointer to the end of the string, or NULL to indicate
 613          *  that the string is nul-terminated, in which case
 614          *  the returned value will be
 615          * Returns:
 616          *  a pointer to the found character or NULL
 617          */
 618         public static char[] utf8_FindNextChar(char[] p, char[] end)
 619         {
 620                 // gchar* g_utf8_find_next_char (const gchar *p,  const gchar *end);
 621                 return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end)) );
 622         }
 623
 624         /**
 625          * Given a position p with a UTF-8 encoded string str, find the start
 626          * of the previous UTF-8 character starting before p. Returns NULL if no
 627          * UTF-8 characters are present in str before p.
 628          * p does not have to be at the beginning of a UTF-8 character. No check
 629          * is made to see if the character found is actually valid other than
 630          * it starts with an appropriate byte.
 631          * str:
 632          *  pointer to the beginning of a UTF-8 encoded string
 633          * p:
 634          *  pointer to some position within str
 635          * Returns:
 636          *  a pointer to the found character or NULL.
 637          */
 638         public static char[] utf8_FindPrevChar(char[] str, char[] p)
 639         {
 640                 // gchar* g_utf8_find_prev_char (const gchar *str,  const gchar *p);
 641                 return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p)) );
 642         }
 643
 644         /**
 645          * Returns the length of the string in characters.
 646          * p:
 647          *  pointer to the start of a UTF-8 encoded string.
 648          * max:
 649          *  the maximum number of bytes to examine. If max
 650          *  is less than 0, then the string is assumed to be
 651          *  nul-terminated. If max is 0, p will not be examined and
 652          *  may be NULL.
 653          * Returns:
 654          *  the length of the string in characters
 655          */
 656         public static int utf8_Strlen(char[] p, int max)
 657         {
 658                 // glong g_utf8_strlen (const gchar *p,  gssize max);
 659                 return g_utf8_strlen(Str.toStringz(p), max);
 660         }
 661
 662         /**
 663          * Like the standard C strncpy() function, but
 664          * copies a given number of characters instead of a given number of
 665          * bytes. The src string must be valid UTF-8 encoded text.
 666          * (Use g_utf8_validate() on all text before trying to use UTF-8
 667          * utility functions with it.)
 668          * dest:
 669          *  buffer to fill with characters from src
 670          * src:
 671          *  UTF-8 encoded string
 672          * n:
 673          *  character count
 674          * Returns:
 675          *  dest
 676          */
 677         public static char[] utf8_Strncpy(char[] dest, char[] src, uint n)
 678         {
 679                 // gchar* g_utf8_strncpy (gchar *dest,  const gchar *src,  gsize n);
 680                 return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n) );
 681         }
 682
 683         /**
 684          * Finds the leftmost occurrence of the given Unicode character
 685          * in a UTF-8 encoded string, while limiting the search to len bytes.
 686          * If len is -1, allow unbounded search.
 687          * p:
 688          *  a nul-terminated UTF-8 encoded string
 689          * len:
 690          *  the maximum length of p
 691          * c:
 692          *  a Unicode character
 693          * Returns:
 694          *  NULL if the string does not contain the character,
 695          *  otherwise, a pointer to the start of the leftmost occurrence of
 696          *  the character in the string.
 697          */
 698         public static char[] utf8_Strchr(char[] p, int len, gunichar c)
 699         {
 700                 // gchar* g_utf8_strchr (const gchar *p,  gssize len,  gunichar c);
 701                 return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c) );
 702         }
 703
 704         /**
 705          * Find the rightmost occurrence of the given Unicode character
 706          * in a UTF-8 encoded string, while limiting the search to len bytes.
 707          * If len is -1, allow unbounded search.
 708          * p:
 709          *  a nul-terminated UTF-8 encoded string
 710          * len:
 711          *  the maximum length of p
 712          * c:
 713          *  a Unicode character
 714          * Returns:
 715          *  NULL if the string does not contain the character,
 716          *  otherwise, a pointer to the start of the rightmost occurrence of the
 717          *  character in the string.
 718          */
 719         public static char[] utf8_Strrchr(char[] p, int len, gunichar c)
 720         {
 721                 // gchar* g_utf8_strrchr (const gchar *p,  gssize len,  gunichar c);
 722                 return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c) );
 723         }
 724
 725         /**
 726          * Reverses a UTF-8 string. str must be valid UTF-8 encoded text.
 727          * (Use g_utf8_validate() on all text before trying to use UTF-8
 728          * utility functions with it.)
 729          * Note that unlike g_strreverse(), this function returns
 730          * newly-allocated memory, which should be freed with g_free() when
 731          * no longer needed.
 732          * str:
 733          *  a UTF-8 encoded string
 734          * len:
 735          *  the maximum length of str to use. If len < 0, then
 736          *  the string is nul-terminated.
 737          * Returns:
 738          *  a newly-allocated string which is the reverse of str.
 739          * Since 2.2
 740          */
 741         public static char[] utf8_Strreverse(char[] str, int len)
 742         {
 743                 // gchar* g_utf8_strreverse (const gchar *str,  gssize len);
 744                 return Str.toString(g_utf8_strreverse(Str.toStringz(str), len) );
 745         }
 746
 747         /**
 748          * Validates UTF-8 encoded text. str is the text to validate;
 749          * if str is nul-terminated, then max_len can be -1, otherwise
 750          * max_len should be the number of bytes to validate.
 751          * If end is non-NULL, then the end of the valid range
 752          * will be stored there (i.e. the start of the first invalid
 753          * character if some bytes were invalid, or the end of the text
 754          * being validated otherwise).
 755          * Note that g_utf8_validate() returns FALSE if max_len is
 756          * positive and NUL is met before max_len bytes have been read.
 757          * Returns TRUE if all of str was valid. Many GLib and GTK+
 758          * routines require valid UTF-8 as input;
 759          * so data read from a file or the network should be checked
 760          * with g_utf8_validate() before doing anything else with it.
 761          * str:
 762          *  a pointer to character data
 763          * max_len:
 764          *  max bytes to validate, or -1 to go until NUL
 765          * end:
 766          *  return location for end of valid data
 767          * Returns:
 768          *  TRUE if the text was valid UTF-8
 769          */
 770         public static int utf8_Validate(char[] str, int maxLen, char** end)
 771         {
 772                 // gboolean g_utf8_validate (const gchar *str,  gssize max_len,  const gchar **end);
 773                 return g_utf8_validate(Str.toStringz(str), maxLen, end);
 774         }
 775
 776         /**
 777          * Converts all Unicode characters in the string that have a case
 778          * to uppercase. The exact manner that this is done depends
 779          * on the current locale, and may result in the number of
 780          * characters in the string increasing. (For instance, the
 781          * German ess-zet will be changed to SS.)
 782          * str:
 783          *  a UTF-8 encoded string
 784          * len:
 785          *  length of str, in bytes, or -1 if str is nul-terminated.
 786          * Returns:
 787          *  a newly allocated string, with all characters
 788          *  converted to uppercase.
 789          */
 790         public static char[] utf8_Strup(char[] str, int len)
 791         {
 792                 // gchar* g_utf8_strup (const gchar *str,  gssize len);
 793                 return Str.toString(g_utf8_strup(Str.toStringz(str), len) );
 794         }
 795
 796         /**
 797          * Converts all Unicode characters in the string that have a case
 798          * to lowercase. The exact manner that this is done depends
 799          * on the current locale, and may result in the number of
 800          * characters in the string changing.
 801          * str:
 802          *  a UTF-8 encoded string
 803          * len:
 804          *  length of str, in bytes, or -1 if str is nul-terminated.
 805          * Returns:
 806          *  a newly allocated string, with all characters
 807          *  converted to lowercase.
 808          */
 809         public static char[] utf8_Strdown(char[] str, int len)
 810         {
 811                 // gchar* g_utf8_strdown (const gchar *str,  gssize len);
 812                 return Str.toString(g_utf8_strdown(Str.toStringz(str), len) );
 813         }
 814
 815         /**
 816          * Converts a string into a form that is independent of case. The
 817          * result will not correspond to any particular case, but can be
 818          * compared for equality or ordered with the results of calling
 819          * g_utf8_casefold() on other strings.
 820          * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
 821          * only an approximation to the correct linguistic case insensitive
 822          * ordering, though it is a fairly good one. Getting this exactly
 823          * right would require a more sophisticated collation function that
 824          * takes case sensitivity into account. GLib does not currently
 825          * provide such a function.
 826          * str:
 827          *  a UTF-8 encoded string
 828          * len:
 829          *  length of str, in bytes, or -1 if str is nul-terminated.
 830          * Returns:
 831          *  a newly allocated string, that is a
 832          *  case independent form of str.
 833          */
 834         public static char[] utf8_Casefold(char[] str, int len)
 835         {
 836                 // gchar* g_utf8_casefold (const gchar *str,  gssize len);
 837                 return Str.toString(g_utf8_casefold(Str.toStringz(str), len) );
 838         }
 839
 840         /**
 841          * Converts a string into canonical form, standardizing
 842          * such issues as whether a character with an accent
 843          * is represented as a base character and combining
 844          * accent or as a single precomposed character. You
 845          * should generally call g_utf8_normalize() before
 846          * comparing two Unicode strings.
 847          * The normalization mode G_NORMALIZE_DEFAULT only
 848          * standardizes differences that do not affect the
 849          * text content, such as the above-mentioned accent
 850          * representation. G_NORMALIZE_ALL also standardizes
 851          * the "compatibility" characters in Unicode, such
 852          * as SUPERSCRIPT THREE to the standard forms
 853          * (in this case DIGIT THREE). Formatting information
 854          * may be lost but for most text operations such
 855          * characters should be considered the same.
 856          * For example, g_utf8_collate() normalizes
 857          * with G_NORMALIZE_ALL as its first step.
 858          * G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE
 859          * are like G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL,
 860          * but returned a result with composed forms rather
 861          * than a maximally decomposed form. This is often
 862          * useful if you intend to convert the string to
 863          * a legacy encoding or pass it to a system with
 864          * less capable Unicode handling.
 865          * str:
 866          *  a UTF-8 encoded string.
 867          * len:
 868          *  length of str, in bytes, or -1 if str is nul-terminated.
 869          * mode:
 870          *  the type of normalization to perform.
 871          * Returns:
 872          *  a newly allocated string, that is the
 873          *  normalized form of str.
 874          */
 875         public static char[] utf8_Normalize(char[] str, int len, GNormalizeMode mode)
 876         {
 877                 // gchar* g_utf8_normalize (const gchar *str,  gssize len,  GNormalizeMode mode);
 878                 return Str.toString(g_utf8_normalize(Str.toStringz(str), len, mode) );
 879         }
 880
 881
 882         /**
 883          * Compares two strings for ordering using the linguistically
 884          * correct rules for the current locale. When sorting a large
 885          * number of strings, it will be significantly faster to
 886          * obtain collation keys with g_utf8_collate_key() and
 887          * compare the keys with strcmp() when
 888          * sorting instead of sorting the original strings.
 889          * str1:
 890          *  a UTF-8 encoded string
 891          * str2:
 892          *  a UTF-8 encoded string
 893          * Returns:
 894          *  < 0 if str1 compares before str2,
 895          *  0 if they compare equal, > 0 if str1 compares after str2.
 896          */
 897         public static int utf8_Collate(char[] str1, char[] str2)
 898         {
 899                 // gint g_utf8_collate (const gchar *str1,  const gchar *str2);
 900                 return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2));
 901         }
 902
 903         /**
 904          * Converts a string into a collation key that can be compared
 905          * with other collation keys produced by the same function using
 906          * strcmp().
 907          * The results of comparing the collation keys of two strings
 908          * with strcmp() will always be the same as
 909          * comparing the two original keys with g_utf8_collate().
 910          * str:
 911          *  a UTF-8 encoded string.
 912          * len:
 913          *  length of str, in bytes, or -1 if str is nul-terminated.
 914          * Returns:
 915          *  a newly allocated string. This string should
 916          *  be freed with g_free() when you are done with it.
 917          */
 918         public static char[] utf8_CollateKey(char[] str, int len)
 919         {
 920                 // gchar* g_utf8_collate_key (const gchar *str,  gssize len);
 921                 return Str.toString(g_utf8_collate_key(Str.toStringz(str), len) );
 922         }
 923
 924         /**
 925          * Converts a string into a collation key that can be compared
 926          * with other collation keys produced by the same function using strcmp().
 927          * In order to sort filenames correctly, this function treats the dot '.'
 928          * as a special case. Most dictionary orderings seem to consider it
 929          * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
 930          * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
 931          * would like to treat numbers intelligently so that "file1" "file10" "file5"
 932          * is sorted as "file1" "file5" "file10".
 933          * str:
 934          *  a UTF-8 encoded string.
 935          * len:
 936          *  length of str, in bytes, or -1 if str is nul-terminated.
 937          * Returns:
 938          *  a newly allocated string. This string should
 939          *  be freed with g_free() when you are done with it.
 940          * Since 2.8
 941          */
 942         public static char[] utf8_CollateKeyForFilename(char[] str, int len)
 943         {
 944                 // gchar* g_utf8_collate_key_for_filename (const gchar *str,  gssize len);
 945                 return Str.toString(g_utf8_collate_key_for_filename(Str.toStringz(str), len) );
 946         }
 947
 948         /**
 949          * Convert a string from UTF-8 to UTF-16. A 0 character will be
 950          * added to the result after the converted text.
 951          * str:
 952          *  a UTF-8 encoded string
 953          * len:
 954          *  the maximum length (number of characters) of str to use.
 955          *  If len < 0, then the string is nul-terminated.
 956          * items_read:
 957          *  location to store number of bytes read, or NULL.
 958          *  If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
 959          *  returned in case str contains a trailing partial
 960          *  character. If an error occurs then the index of the
 961          *  invalid input is stored here.
 962          * items_written:
 963          *  location to store number of gunichar2 written,
 964          *  or NULL.
 965          *  The value stored here does not include the trailing 0.
 966          * error:
 967          *  location to store the error occuring, or NULL to ignore
 968          *  errors. Any of the errors in GConvertError other than
 969          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
 970          * Returns:
 971          *  a pointer to a newly allocated UTF-16 string.
 972          *  This value must be freed with g_free(). If an
 973          *  error occurs, NULL will be returned and
 974          *  error set.
 975          */
 976         public static gunichar2* utf8_ToUtf16(char[] str, int len, int* itemsRead, int* itemsWritten, GError** error)
 977         {
 978                 // gunichar2* g_utf8_to_utf16 (const gchar *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
 979                 return g_utf8_to_utf16(Str.toStringz(str), len, itemsRead, itemsWritten, error);
 980         }
 981
 982         /**
 983          * Convert a string from UTF-8 to a 32-bit fixed width
 984          * representation as UCS-4. A trailing 0 will be added to the
 985          * string after the converted text.
 986          * str:
 987          *  a UTF-8 encoded string
 988          * len:
 989          *  the maximum length of str to use. If len < 0, then
 990          *  the string is nul-terminated.
 991          * items_read:
 992          *  location to store number of bytes read, or NULL.
 993          *  If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
 994          *  returned in case str contains a trailing partial
 995          *  character. If an error occurs then the index of the
 996          *  invalid input is stored here.
 997          * items_written:
 998          *  location to store number of characters written or NULL.
 999          *  The value here stored does not include the trailing 0
1000          *  character.
1001          * error:
1002          *  location to store the error occuring, or NULL to ignore
1003          *  errors. Any of the errors in GConvertError other than
1004          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1005          * Returns:
1006          *  a pointer to a newly allocated UCS-4 string.
1007          *  This value must be freed with g_free(). If an
1008          *  error occurs, NULL will be returned and
1009          *  error set.
1010          */
1011         public static gunichar* utf8_ToUcs4(char[] str, int len, int* itemsRead, int* itemsWritten, GError** error)
1012         {
1013                 // gunichar* g_utf8_to_ucs4 (const gchar *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1014                 return g_utf8_to_ucs4(Str.toStringz(str), len, itemsRead, itemsWritten, error);
1015         }
1016
1017         /**
1018          * Convert a string from UTF-8 to a 32-bit fixed width
1019          * representation as UCS-4, assuming valid UTF-8 input.
1020          * This function is roughly twice as fast as g_utf8_to_ucs4()
1021          * but does no error checking on the input.
1022          * str:
1023          *  a UTF-8 encoded string
1024          * len:
1025          *  the maximum length of str to use. If len < 0, then
1026          *  the string is nul-terminated.
1027          * items_written:
1028          *  location to store the number of characters in the
1029          *  result, or NULL.
1030          * Returns:
1031          *  a pointer to a newly allocated UCS-4 string.
1032          *  This value must be freed with g_free().
1033          */
1034         public static gunichar* utf8_ToUcs4_Fast(char[] str, int len, int* itemsWritten)
1035         {
1036                 // gunichar* g_utf8_to_ucs4_fast (const gchar *str,  glong len,  glong *items_written);
1037                 return g_utf8_to_ucs4_fast(Str.toStringz(str), len, itemsWritten);
1038         }
1039
1040         /**
1041          * Convert a string from UTF-16 to UCS-4. The result will be
1042          * terminated with a 0 character.
1043          * str:
1044          *  a UTF-16 encoded string
1045          * len:
1046          *  the maximum length (number of gunichar2) of str to use.
1047          *  If len < 0, then the string is terminated with a 0 character.
1048          * items_read:
1049          *  location to store number of words read, or NULL.
1050          *  If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1051          *  returned in case str contains a trailing partial
1052          *  character. If an error occurs then the index of the
1053          *  invalid input is stored here.
1054          * items_written:
1055          *  location to store number of characters written, or NULL.
1056          *  The value stored here does not include the trailing
1057          *  0 character.
1058          * error:
1059          *  location to store the error occuring, or NULL to ignore
1060          *  errors. Any of the errors in GConvertError other than
1061          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1062          * Returns:
1063          *  a pointer to a newly allocated UCS-4 string.
1064          *  This value must be freed with g_free(). If an
1065          *  error occurs, NULL will be returned and
1066          *  error set.
1067          */
1068         public static gunichar* utf16_ToUcs4(gunichar2* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1069         {
1070                 // gunichar* g_utf16_to_ucs4 (const gunichar2 *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1071                 return g_utf16_to_ucs4(str, len, itemsRead, itemsWritten, error);
1072         }
1073
1074         /**
1075          * Convert a string from UTF-16 to UTF-8. The result will be
1076          * terminated with a 0 byte.
1077          * Note that the input is expected to be already in native endianness,
1078          * an initial byte-order-mark character is not handled specially.
1079          * g_convert() can be used to convert a byte buffer of UTF-16 data of
1080          * ambiguous endianess.
1081          * str:
1082          *  a UTF-16 encoded string
1083          * len:
1084          *  the maximum length (number of gunichar2) of str to use.
1085          *  If len < 0, then the string is terminated with a 0 character.
1086          * items_read:
1087          *  location to store number of words read, or NULL.
1088          *  If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1089          *  returned in case str contains a trailing partial
1090          *  character. If an error occurs then the index of the
1091          *  invalid input is stored here.
1092          * items_written:
1093          *  location to store number of bytes written, or NULL.
1094          *  The value stored here does not include the trailing
1095          *  0 byte.
1096          * error:
1097          *  location to store the error occuring, or NULL to ignore
1098          *  errors. Any of the errors in GConvertError other than
1099          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1100          * Returns:
1101          *  a pointer to a newly allocated UTF-8 string.
1102          *  This value must be freed with g_free(). If an
1103          *  error occurs, NULL will be returned and
1104          *  error set.
1105          */
1106         public static char[] utf16_ToUtf8(gunichar2* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1107         {
1108                 // gchar* g_utf16_to_utf8 (const gunichar2 *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1109                 return Str.toString(g_utf16_to_utf8(str, len, itemsRead, itemsWritten, error) );
1110         }
1111
1112         /**
1113          * Convert a string from UCS-4 to UTF-16. A 0 character will be
1114          * added to the result after the converted text.
1115          * str:
1116          *  a UCS-4 encoded string
1117          * len:
1118          *  the maximum length (number of characters) of str to use.
1119          *  If len < 0, then the string is terminated with a 0 character.
1120          * items_read:
1121          *  location to store number of bytes read, or NULL.
1122          *  If an error occurs then the index of the invalid input
1123          *  is stored here.
1124          * items_written:
1125          *  location to store number of gunichar2
1126          *  written, or NULL. The value stored here does not
1127          *  include the trailing 0.
1128          * error:
1129          *  location to store the error occuring, or NULL to ignore
1130          *  errors. Any of the errors in GConvertError other than
1131          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1132          * Returns:
1133          *  a pointer to a newly allocated UTF-16 string.
1134          *  This value must be freed with g_free(). If an
1135          *  error occurs, NULL will be returned and
1136          *  error set.
1137          */
1138         public static gunichar2* ucs4_ToUtf16(gunichar* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1139         {
1140                 // gunichar2* g_ucs4_to_utf16 (const gunichar *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1141                 return g_ucs4_to_utf16(str, len, itemsRead, itemsWritten, error);
1142         }
1143
1144         /**
1145          * Convert a string from a 32-bit fixed width representation as UCS-4.
1146          * to UTF-8. The result will be terminated with a 0 byte.
1147          * str:
1148          *  a UCS-4 encoded string
1149          * len:
1150          *  the maximum length (number of characters) of str to use.
1151          *  If len < 0, then the string is terminated with a 0 character.
1152          * items_read:
1153          *  location to store number of characters read, or NULL.
1154          * items_written:
1155          *  location to store number of bytes written or NULL.
1156          *  The value here stored does not include the trailing 0
1157          *  byte.
1158          * error:
1159          *  location to store the error occuring, or NULL to ignore
1160          *  errors. Any of the errors in GConvertError other than
1161          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1162          * Returns:
1163          *  a pointer to a newly allocated UTF-8 string.
1164          *  This value must be freed with g_free(). If an
1165          *  error occurs, NULL will be returned and
1166          *  error set. In that case, items_read will be
1167          *  set to the position of the first invalid input
1168          *  character.
1169          */
1170         public static char[] ucs4_ToUtf8(gunichar* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1171         {
1172                 // gchar* g_ucs4_to_utf8 (const gunichar *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1173                 return Str.toString(g_ucs4_to_utf8(str, len, itemsRead, itemsWritten, error) );
1174         }
1175
1176         /**
1177          * Converts a single character to UTF-8.
1178          * c:
1179          *  a Unicode character code
1180          * outbuf:
1181          *  output buffer, must have at least 6 bytes of space.
1182          *  If NULL, the length will be computed and returned
1183          *  and nothing will be written to outbuf.
1184          * Returns:
1185          *  number of bytes written
1186          * See Also
1187          * g_locale_to_utf8(), g_locale_from_utf8()
1188          * Convenience functions for converting between UTF-8 and the locale encoding.
1189          * [3] surrogate pairs
1190          */
1191         public static int unicharToUtf8(gunichar c, char[] outbuf)
1192         {
1193                 // gint g_unichar_to_utf8 (gunichar c,  gchar *outbuf);
1194                 return g_unichar_to_utf8(c, Str.toStringz(outbuf));
1195         }
1196 }