gtkD/src/glib/Unicode.d

   1 /*
   2  * This file is part of gtkD.
   3  *
   4  * gtkD is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU Lesser General Public License as published by
   6  * the Free Software Foundation; either version 2.1 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * gtkD is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public License
  15  * along with gtkD; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  17  */
  18
  19 // generated automatically - do not change
  20 // find conversion definition on APILookup.txt
  21 // implement new conversion functionalities on the wrap.utils pakage
  22
  23 /*
  24  * Conversion parameters:
  25  * inFile  = glib-Unicode-Manipulation.html
  26  * outPack = glib
  27  * outFile = Unicode
  28  * strct   =
  29  * realStrct=
  30  * ctorStrct=
  31  * clss    = Unicode
  32  * interf  =
  33  * class Code: No
  34  * interface Code: No
  35  * template for:
  36  * extend  =
  37  * implements:
  38  * prefixes:
  39  *      - g_
  40  * omit structs:
  41  * omit prefixes:
  42  * omit code:
  43  * imports:
  44  *      - glib.ErrorG
  45  *      - glib.Str
  46  * structWrap:
  47  * module aliases:
  48  * local aliases:
  49  */
  50
  51 module glib.Unicode;
  52
  53 version(noAssert)
  54 {
  55         version(Tango)
  56         {
  57                 import tango.io.Stdout; // use the tango loging?
  58         }
  59 }
  60
  61 private import gtkc.glibtypes;
  62
  63 private import gtkc.glib;
  64
  65
  66 private import glib.ErrorG;
  67 private import glib.Str;
  68
  69
  70
  71
  72 /**
  73  * Description
  74  * This section describes a number of functions for dealing with
  75  * Unicode characters and strings. There are analogues of the
  76  * traditional ctype.h character classification
  77  * and case conversion functions, UTF-8 analogues of some string utility
  78  * functions, functions to perform normalization, case conversion and
  79  * collation on UTF-8 strings and finally functions to convert between
  80  * the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
  81  * The implementations of the Unicode functions in GLib are based
  82  * on the Unicode Character Data tables, which are available from
  83  * www.unicode.org.
  84  * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
  85  * GLib 2.12 supports Unicode 5.0.
  86  */
  87 public class Unicode
  88 {
  89
  90         /**
  91          */
  92
  93
  94
  95         /**
  96          * Checks whether ch is a valid Unicode character. Some possible
  97          * integer values of ch will not be valid. 0 is considered a valid
  98          * character, though it's normally a string terminator.
  99          * ch:
 100          *  a Unicode character
 101          * Returns:
 102          *  TRUE if ch is a valid Unicode character
 103          */
 104         public static int unicharValidate(gunichar ch)
 105         {
 106                 // gboolean g_unichar_validate (gunichar ch);
 107                 return g_unichar_validate(ch);
 108         }
 109
 110         /**
 111          * Determines whether a character is alphanumeric.
 112          * Given some UTF-8 text, obtain a character value
 113          * with g_utf8_get_char().
 114          * c:
 115          *  a Unicode character
 116          * Returns:
 117          *  TRUE if c is an alphanumeric character
 118          */
 119         public static int unicharIsalnum(gunichar c)
 120         {
 121                 // gboolean g_unichar_isalnum (gunichar c);
 122                 return g_unichar_isalnum(c);
 123         }
 124
 125         /**
 126          * Determines whether a character is alphabetic (i.e. a letter).
 127          * Given some UTF-8 text, obtain a character value with
 128          * g_utf8_get_char().
 129          * c:
 130          *  a Unicode character
 131          * Returns:
 132          *  TRUE if c is an alphabetic character
 133          */
 134         public static int unicharIsalpha(gunichar c)
 135         {
 136                 // gboolean g_unichar_isalpha (gunichar c);
 137                 return g_unichar_isalpha(c);
 138         }
 139
 140         /**
 141          * Determines whether a character is a control character.
 142          * Given some UTF-8 text, obtain a character value with
 143          * g_utf8_get_char().
 144          * c:
 145          *  a Unicode character
 146          * Returns:
 147          *  TRUE if c is a control character
 148          */
 149         public static int unicharIscntrl(gunichar c)
 150         {
 151                 // gboolean g_unichar_iscntrl (gunichar c);
 152                 return g_unichar_iscntrl(c);
 153         }
 154
 155         /**
 156          * Determines whether a character is numeric (i.e. a digit). This
 157          * covers ASCII 0-9 and also digits in other languages/scripts. Given
 158          * some UTF-8 text, obtain a character value with g_utf8_get_char().
 159          * c:
 160          *  a Unicode character
 161          * Returns:
 162          *  TRUE if c is a digit
 163          */
 164         public static int unicharIsdigit(gunichar c)
 165         {
 166                 // gboolean g_unichar_isdigit (gunichar c);
 167                 return g_unichar_isdigit(c);
 168         }
 169
 170         /**
 171          * Determines whether a character is printable and not a space
 172          * (returns FALSE for control characters, format characters, and
 173          * spaces). g_unichar_isprint() is similar, but returns TRUE for
 174          * spaces. Given some UTF-8 text, obtain a character value with
 175          * g_utf8_get_char().
 176          * c:
 177          *  a Unicode character
 178          * Returns:
 179          *  TRUE if c is printable unless it's a space
 180          */
 181         public static int unicharIsgraph(gunichar c)
 182         {
 183                 // gboolean g_unichar_isgraph (gunichar c);
 184                 return g_unichar_isgraph(c);
 185         }
 186
 187         /**
 188          * Determines whether a character is a lowercase letter.
 189          * Given some UTF-8 text, obtain a character value with
 190          * g_utf8_get_char().
 191          * c:
 192          *  a Unicode character
 193          * Returns:
 194          *  TRUE if c is a lowercase letter
 195          */
 196         public static int unicharIslower(gunichar c)
 197         {
 198                 // gboolean g_unichar_islower (gunichar c);
 199                 return g_unichar_islower(c);
 200         }
 201
 202         /**
 203          * Determines whether a character is printable.
 204          * Unlike g_unichar_isgraph(), returns TRUE for spaces.
 205          * Given some UTF-8 text, obtain a character value with
 206          * g_utf8_get_char().
 207          * c:
 208          *  a Unicode character
 209          * Returns:
 210          *  TRUE if c is printable
 211          */
 212         public static int unicharIsprint(gunichar c)
 213         {
 214                 // gboolean g_unichar_isprint (gunichar c);
 215                 return g_unichar_isprint(c);
 216         }
 217
 218         /**
 219          * Determines whether a character is punctuation or a symbol.
 220          * Given some UTF-8 text, obtain a character value with
 221          * g_utf8_get_char().
 222          * c:
 223          *  a Unicode character
 224          * Returns:
 225          *  TRUE if c is a punctuation or symbol character
 226          */
 227         public static int unicharIspunct(gunichar c)
 228         {
 229                 // gboolean g_unichar_ispunct (gunichar c);
 230                 return g_unichar_ispunct(c);
 231         }
 232
 233         /**
 234          * Determines whether a character is a space, tab, or line separator
 235          * (newline, carriage return, etc.). Given some UTF-8 text, obtain a
 236          * character value with g_utf8_get_char().
 237          * (Note: don't use this to do word breaking; you have to use
 238          * Pango or equivalent to get word breaking right, the algorithm
 239          * is fairly complex.)
 240          * c:
 241          *  a Unicode character
 242          * Returns:
 243          *  TRUE if c is a space character
 244          */
 245         public static int unicharIsspace(gunichar c)
 246         {
 247                 // gboolean g_unichar_isspace (gunichar c);
 248                 return g_unichar_isspace(c);
 249         }
 250
 251         /**
 252          * Determines if a character is uppercase.
 253          * c:
 254          *  a Unicode character
 255          * Returns:
 256          *  TRUE if c is an uppercase character
 257          */
 258         public static int unicharIsupper(gunichar c)
 259         {
 260                 // gboolean g_unichar_isupper (gunichar c);
 261                 return g_unichar_isupper(c);
 262         }
 263
 264         /**
 265          * Determines if a character is a hexidecimal digit.
 266          * c:
 267          *  a Unicode character.
 268          * Returns:
 269          *  TRUE if the character is a hexadecimal digit
 270          */
 271         public static int unicharIsxdigit(gunichar c)
 272         {
 273                 // gboolean g_unichar_isxdigit (gunichar c);
 274                 return g_unichar_isxdigit(c);
 275         }
 276
 277         /**
 278          * Determines if a character is titlecase. Some characters in
 279          * Unicode which are composites, such as the DZ digraph
 280          * have three case variants instead of just two. The titlecase
 281          * form is used at the beginning of a word where only the
 282          * first letter is capitalized. The titlecase form of the DZ
 283          * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
 284          * c:
 285          *  a Unicode character
 286          * Returns:
 287          *  TRUE if the character is titlecase
 288          */
 289         public static int unicharIstitle(gunichar c)
 290         {
 291                 // gboolean g_unichar_istitle (gunichar c);
 292                 return g_unichar_istitle(c);
 293         }
 294
 295         /**
 296          * Determines if a given character is assigned in the Unicode
 297          * standard.
 298          * c:
 299          *  a Unicode character
 300          * Returns:
 301          *  TRUE if the character has an assigned value
 302          */
 303         public static int unicharIsdefined(gunichar c)
 304         {
 305                 // gboolean g_unichar_isdefined (gunichar c);
 306                 return g_unichar_isdefined(c);
 307         }
 308
 309         /**
 310          * Determines if a character is typically rendered in a double-width
 311          * cell.
 312          * c:
 313          *  a Unicode character
 314          * Returns:
 315          *  TRUE if the character is wide
 316          */
 317         public static int unicharIswide(gunichar c)
 318         {
 319                 // gboolean g_unichar_iswide (gunichar c);
 320                 return g_unichar_iswide(c);
 321         }
 322
 323         /**
 324          * Determines if a character is typically rendered in a double-width
 325          * cell under legacy East Asian locales. If a character is wide according to
 326          * g_unichar_iswide(), then it is also reported wide with this function, but
 327          * the converse is not necessarily true. See the
 328          * Unicode Standard
 329          * Annex 11 for details.
 330          * c:
 331          *  a Unicode character
 332          * Returns:
 333          *  TRUE if the character is wide in legacy East Asian locales
 334          * Since 2.12
 335          */
 336         public static int unicharIswideCjk(gunichar c)
 337         {
 338                 // gboolean g_unichar_iswide_cjk (gunichar c);
 339                 return g_unichar_iswide_cjk(c);
 340         }
 341
 342         /**
 343          * Determines if a given character typically takes zero width when rendered.
 344          * The return value is TRUE for all non-spacing and enclosing marks
 345          * (e.g., combining accents), format characters, zero-width
 346          * space, but not U+00AD SOFT HYPHEN.
 347          * A typical use of this function is with one of g_unichar_iswide() or
 348          * g_unichar_iswide_cjk() to determine the number of cells a string occupies
 349          * when displayed on a grid display (terminals). However, note that not all
 350          * terminals support zero-width rendering of zero-width marks.
 351          * c:
 352          *  a Unicode character
 353          * Returns:
 354          *  TRUE if the character has zero width
 355          * Since 2.14
 356          */
 357         public static int unicharIszerowidth(gunichar c)
 358         {
 359                 // gboolean g_unichar_iszerowidth (gunichar c);
 360                 return g_unichar_iszerowidth(c);
 361         }
 362
 363         /**
 364          * Converts a character to uppercase.
 365          * c:
 366          *  a Unicode character
 367          * Returns:
 368          *  the result of converting c to uppercase.
 369          *  If c is not an lowercase or titlecase character,
 370          *  or has no upper case equivalent c is returned unchanged.
 371          */
 372         public static gunichar unicharToupper(gunichar c)
 373         {
 374                 // gunichar g_unichar_toupper (gunichar c);
 375                 return g_unichar_toupper(c);
 376         }
 377
 378         /**
 379          * Converts a character to lower case.
 380          * c:
 381          *  a Unicode character.
 382          * Returns:
 383          *  the result of converting c to lower case.
 384          *  If c is not an upperlower or titlecase character,
 385          *  or has no lowercase equivalent c is returned unchanged.
 386          */
 387         public static gunichar unicharTolower(gunichar c)
 388         {
 389                 // gunichar g_unichar_tolower (gunichar c);
 390                 return g_unichar_tolower(c);
 391         }
 392
 393         /**
 394          * Converts a character to the titlecase.
 395          * c:
 396          *  a Unicode character
 397          * Returns:
 398          *  the result of converting c to titlecase.
 399          *  If c is not an uppercase or lowercase character,
 400          *  c is returned unchanged.
 401          */
 402         public static gunichar unicharTotitle(gunichar c)
 403         {
 404                 // gunichar g_unichar_totitle (gunichar c);
 405                 return g_unichar_totitle(c);
 406         }
 407
 408         /**
 409          * Determines the numeric value of a character as a decimal
 410          * digit.
 411          * c:
 412          *  a Unicode character
 413          * Returns:
 414          *  If c is a decimal digit (according to
 415          * g_unichar_isdigit()), its numeric value. Otherwise, -1.
 416          */
 417         public static int unicharDigitValue(gunichar c)
 418         {
 419                 // gint g_unichar_digit_value (gunichar c);
 420                 return g_unichar_digit_value(c);
 421         }
 422
 423         /**
 424          * Determines the numeric value of a character as a hexidecimal
 425          * digit.
 426          * c:
 427          *  a Unicode character
 428          * Returns:
 429          *  If c is a hex digit (according to
 430          * g_unichar_isxdigit()), its numeric value. Otherwise, -1.
 431          */
 432         public static int unicharXdigitValue(gunichar c)
 433         {
 434                 // gint g_unichar_xdigit_value (gunichar c);
 435                 return g_unichar_xdigit_value(c);
 436         }
 437
 438
 439         /**
 440          * Classifies a Unicode character by type.
 441          * c:
 442          *  a Unicode character
 443          * Returns:
 444          *  the type of the character.
 445          */
 446         public static GUnicodeType unicharType(gunichar c)
 447         {
 448                 // GUnicodeType g_unichar_type (gunichar c);
 449                 return g_unichar_type(c);
 450         }
 451
 452
 453         /**
 454          * Determines the break type of c. c should be a Unicode character
 455          * (to derive a character from UTF-8 encoded text, use
 456          * g_utf8_get_char()). The break type is used to find word and line
 457          * breaks ("text boundaries"), Pango implements the Unicode boundary
 458          * resolution algorithms and normally you would use a function such
 459          * as pango_break() instead of caring about break types yourself.
 460          * c:
 461          *  a Unicode character
 462          * Returns:
 463          *  the break type of c
 464          */
 465         public static GUnicodeBreakType unicharBreakType(gunichar c)
 466         {
 467                 // GUnicodeBreakType g_unichar_break_type (gunichar c);
 468                 return g_unichar_break_type(c);
 469         }
 470
 471         /**
 472          * Computes the canonical ordering of a string in-place.
 473          * This rearranges decomposed characters in the string
 474          * according to their combining classes. See the Unicode
 475          * manual for more information.
 476          * string:
 477          *  a UCS-4 encoded string.
 478          * len:
 479          *  the maximum length of string to use.
 480          */
 481         public static void unicodeCanonicalOrdering(gunichar* string, uint len)
 482         {
 483                 // void g_unicode_canonical_ordering (gunichar *string,  gsize len);
 484                 g_unicode_canonical_ordering(string, len);
 485         }
 486
 487         /**
 488          * Computes the canonical decomposition of a Unicode character.
 489          * ch:
 490          *  a Unicode character.
 491          * result_len:
 492          *  location to store the length of the return value.
 493          * Returns:
 494          *  a newly allocated string of Unicode characters.
 495          *  result_len is set to the resulting length of the string.
 496          */
 497         public static gunichar* unicodeCanonicalDecomposition(gunichar ch, uint* resultLen)
 498         {
 499                 // gunichar* g_unicode_canonical_decomposition (gunichar ch,  gsize *result_len);
 500                 return g_unicode_canonical_decomposition(ch, resultLen);
 501         }
 502
 503         /**
 504          * In Unicode, some characters are mirrored. This
 505          * means that their images are mirrored horizontally in text that is laid
 506          * out from right to left. For instance, "(" would become its mirror image,
 507          * ")", in right-to-left text.
 508          * If ch has the Unicode mirrored property and there is another unicode
 509          * character that typically has a glyph that is the mirror image of ch's
 510          * glyph and mirrored_ch is set, it puts that character in the address
 511          * pointed to by mirrored_ch. Otherwise the original character is put.
 512          * ch:
 513          *  a Unicode character
 514          * mirrored_ch:
 515          *  location to store the mirrored character
 516          * Returns:
 517          *  TRUE if ch has a mirrored character, FALSE otherwise
 518          * Since 2.4
 519          */
 520         public static int unicharGetMirrorChar(gunichar ch, gunichar* mirroredCh)
 521         {
 522                 // gboolean g_unichar_get_mirror_char (gunichar ch,  gunichar *mirrored_ch);
 523                 return g_unichar_get_mirror_char(ch, mirroredCh);
 524         }
 525
 526
 527         /**
 528          * Looks up the GUnicodeScript for a particular character (as defined
 529          * by Unicode Standard Annex 24). No check is made for ch being a
 530          * valid Unicode character; if you pass in invalid character, the
 531          * result is undefined.
 532          * ch:
 533          *  a Unicode character
 534          * Returns:
 535          *  the GUnicodeScript for the character.
 536          * Since 2.14
 537          */
 538         public static GUnicodeScript unicharGetScript(gunichar ch)
 539         {
 540                 // GUnicodeScript g_unichar_get_script (gunichar ch);
 541                 return g_unichar_get_script(ch);
 542         }
 543
 544
 545         /**
 546          * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
 547          * If p does not point to a valid UTF-8 encoded character, results are
 548          * undefined. If you are not sure that the bytes are complete
 549          * valid Unicode characters, you should use g_utf8_get_char_validated()
 550          * instead.
 551          * p:
 552          *  a pointer to Unicode character encoded as UTF-8
 553          * Returns:
 554          *  the resulting character
 555          */
 556         public static gunichar utf8_GetChar(char[] p)
 557         {
 558                 // gunichar g_utf8_get_char (const gchar *p);
 559                 return g_utf8_get_char(Str.toStringz(p));
 560         }
 561
 562         /**
 563          * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
 564          * This function checks for incomplete characters, for invalid characters
 565          * such as characters that are out of the range of Unicode, and for
 566          * overlong encodings of valid characters.
 567          * p:
 568          *  a pointer to Unicode character encoded as UTF-8
 569          * max_len:
 570          *  the maximum number of bytes to read, or -1, for no maximum.
 571          * Returns:
 572          *  the resulting character. If p points to a partial
 573          *  sequence at the end of a string that could begin a valid
 574          *  character (or if max_len is zero), returns (gunichar)-2;
 575          *  otherwise, if p does not point to a valid UTF-8 encoded
 576          *  Unicode character, returns (gunichar)-1.
 577          */
 578         public static gunichar utf8_GetCharValidated(char[] p, int maxLen)
 579         {
 580                 // gunichar g_utf8_get_char_validated (const gchar *p,  gssize max_len);
 581                 return g_utf8_get_char_validated(Str.toStringz(p), maxLen);
 582         }
 583
 584         /**
 585          * Converts from an integer character offset to a pointer to a position
 586          * within the string.
 587          * Since 2.10, this function allows to pass a negative offset to
 588          * step backwards. It is usually worth stepping backwards from the end
 589          * instead of forwards if offset is in the last fourth of the string,
 590          * since moving forward is about 3 times faster than moving backward.
 591          * str:
 592          *  a UTF-8 encoded string
 593          * offset:
 594          *  a character offset within str
 595          * Returns:
 596          *  the resulting pointer
 597          */
 598         public static char[] utf8_OffsetToPointer(char[] str, int offset)
 599         {
 600                 // gchar* g_utf8_offset_to_pointer (const gchar *str,  glong offset);
 601                 return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset) );
 602         }
 603
 604         /**
 605          * Converts from a pointer to position within a string to a integer
 606          * character offset.
 607          * Since 2.10, this function allows pos to be before str, and returns
 608          * a negative offset in this case.
 609          * str:
 610          *  a UTF-8 encoded string
 611          * pos:
 612          *  a pointer to a position within str
 613          * Returns:
 614          *  the resulting character offset
 615          */
 616         public static int utf8_PointerToOffset(char[] str, char[] pos)
 617         {
 618                 // glong g_utf8_pointer_to_offset (const gchar *str,  const gchar *pos);
 619                 return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos));
 620         }
 621
 622         /**
 623          * Finds the previous UTF-8 character in the string before p.
 624          * p does not have to be at the beginning of a UTF-8 character. No check
 625          * is made to see if the character found is actually valid other than
 626          * it starts with an appropriate byte. If p might be the first
 627          * character of the string, you must use g_utf8_find_prev_char() instead.
 628          * p:
 629          *  a pointer to a position within a UTF-8 encoded string
 630          * Returns:
 631          *  a pointer to the found character.
 632          */
 633         public static char[] utf8_PrevChar(char[] p)
 634         {
 635                 // gchar* g_utf8_prev_char (const gchar *p);
 636                 return Str.toString(g_utf8_prev_char(Str.toStringz(p)) );
 637         }
 638
 639         /**
 640          * Finds the start of the next UTF-8 character in the string after p.
 641          * p does not have to be at the beginning of a UTF-8 character. No check
 642          * is made to see if the character found is actually valid other than
 643          * it starts with an appropriate byte.
 644          * p:
 645          *  a pointer to a position within a UTF-8 encoded string
 646          * end:
 647          *  a pointer to the end of the string, or NULL to indicate
 648          *  that the string is nul-terminated, in which case
 649          *  the returned value will be
 650          * Returns:
 651          *  a pointer to the found character or NULL
 652          */
 653         public static char[] utf8_FindNextChar(char[] p, char[] end)
 654         {
 655                 // gchar* g_utf8_find_next_char (const gchar *p,  const gchar *end);
 656                 return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end)) );
 657         }
 658
 659         /**
 660          * Given a position p with a UTF-8 encoded string str, find the start
 661          * of the previous UTF-8 character starting before p. Returns NULL if no
 662          * UTF-8 characters are present in str before p.
 663          * p does not have to be at the beginning of a UTF-8 character. No check
 664          * is made to see if the character found is actually valid other than
 665          * it starts with an appropriate byte.
 666          * str:
 667          *  pointer to the beginning of a UTF-8 encoded string
 668          * p:
 669          *  pointer to some position within str
 670          * Returns:
 671          *  a pointer to the found character or NULL.
 672          */
 673         public static char[] utf8_FindPrevChar(char[] str, char[] p)
 674         {
 675                 // gchar* g_utf8_find_prev_char (const gchar *str,  const gchar *p);
 676                 return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p)) );
 677         }
 678
 679         /**
 680          * Returns the length of the string in characters.
 681          * p:
 682          *  pointer to the start of a UTF-8 encoded string.
 683          * max:
 684          *  the maximum number of bytes to examine. If max
 685          *  is less than 0, then the string is assumed to be
 686          *  nul-terminated. If max is 0, p will not be examined and
 687          *  may be NULL.
 688          * Returns:
 689          *  the length of the string in characters
 690          */
 691         public static int utf8_Strlen(char[] p, int max)
 692         {
 693                 // glong g_utf8_strlen (const gchar *p,  gssize max);
 694                 return g_utf8_strlen(Str.toStringz(p), max);
 695         }
 696
 697         /**
 698          * Like the standard C strncpy() function, but
 699          * copies a given number of characters instead of a given number of
 700          * bytes. The src string must be valid UTF-8 encoded text.
 701          * (Use g_utf8_validate() on all text before trying to use UTF-8
 702          * utility functions with it.)
 703          * dest:
 704          *  buffer to fill with characters from src
 705          * src:
 706          *  UTF-8 encoded string
 707          * n:
 708          *  character count
 709          * Returns:
 710          *  dest
 711          */
 712         public static char[] utf8_Strncpy(char[] dest, char[] src, uint n)
 713         {
 714                 // gchar* g_utf8_strncpy (gchar *dest,  const gchar *src,  gsize n);
 715                 return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n) );
 716         }
 717
 718         /**
 719          * Finds the leftmost occurrence of the given Unicode character
 720          * in a UTF-8 encoded string, while limiting the search to len bytes.
 721          * If len is -1, allow unbounded search.
 722          * p:
 723          *  a nul-terminated UTF-8 encoded string
 724          * len:
 725          *  the maximum length of p
 726          * c:
 727          *  a Unicode character
 728          * Returns:
 729          *  NULL if the string does not contain the character,
 730          *  otherwise, a pointer to the start of the leftmost occurrence of
 731          *  the character in the string.
 732          */
 733         public static char[] utf8_Strchr(char[] p, int len, gunichar c)
 734         {
 735                 // gchar* g_utf8_strchr (const gchar *p,  gssize len,  gunichar c);
 736                 return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c) );
 737         }
 738
 739         /**
 740          * Find the rightmost occurrence of the given Unicode character
 741          * in a UTF-8 encoded string, while limiting the search to len bytes.
 742          * If len is -1, allow unbounded search.
 743          * p:
 744          *  a nul-terminated UTF-8 encoded string
 745          * len:
 746          *  the maximum length of p
 747          * c:
 748          *  a Unicode character
 749          * Returns:
 750          *  NULL if the string does not contain the character,
 751          *  otherwise, a pointer to the start of the rightmost occurrence of the
 752          *  character in the string.
 753          */
 754         public static char[] utf8_Strrchr(char[] p, int len, gunichar c)
 755         {
 756                 // gchar* g_utf8_strrchr (const gchar *p,  gssize len,  gunichar c);
 757                 return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c) );
 758         }
 759
 760         /**
 761          * Reverses a UTF-8 string. str must be valid UTF-8 encoded text.
 762          * (Use g_utf8_validate() on all text before trying to use UTF-8
 763          * utility functions with it.)
 764          * Note that unlike g_strreverse(), this function returns
 765          * newly-allocated memory, which should be freed with g_free() when
 766          * no longer needed.
 767          * str:
 768          *  a UTF-8 encoded string
 769          * len:
 770          *  the maximum length of str to use. If len < 0, then
 771          *  the string is nul-terminated.
 772          * Returns:
 773          *  a newly-allocated string which is the reverse of str.
 774          * Since 2.2
 775          */
 776         public static char[] utf8_Strreverse(char[] str, int len)
 777         {
 778                 // gchar* g_utf8_strreverse (const gchar *str,  gssize len);
 779                 return Str.toString(g_utf8_strreverse(Str.toStringz(str), len) );
 780         }
 781
 782         /**
 783          * Validates UTF-8 encoded text. str is the text to validate;
 784          * if str is nul-terminated, then max_len can be -1, otherwise
 785          * max_len should be the number of bytes to validate.
 786          * If end is non-NULL, then the end of the valid range
 787          * will be stored there (i.e. the start of the first invalid
 788          * character if some bytes were invalid, or the end of the text
 789          * being validated otherwise).
 790          * Note that g_utf8_validate() returns FALSE if max_len is
 791          * positive and NUL is met before max_len bytes have been read.
 792          * Returns TRUE if all of str was valid. Many GLib and GTK+
 793          * routines require valid UTF-8 as input;
 794          * so data read from a file or the network should be checked
 795          * with g_utf8_validate() before doing anything else with it.
 796          * str:
 797          *  a pointer to character data
 798          * max_len:
 799          *  max bytes to validate, or -1 to go until NUL
 800          * end:
 801          *  return location for end of valid data
 802          * Returns:
 803          *  TRUE if the text was valid UTF-8
 804          */
 805         public static int utf8_Validate(char[] str, int maxLen, char** end)
 806         {
 807                 // gboolean g_utf8_validate (const gchar *str,  gssize max_len,  const gchar **end);
 808                 return g_utf8_validate(Str.toStringz(str), maxLen, end);
 809         }
 810
 811         /**
 812          * Converts all Unicode characters in the string that have a case
 813          * to uppercase. The exact manner that this is done depends
 814          * on the current locale, and may result in the number of
 815          * characters in the string increasing. (For instance, the
 816          * German ess-zet will be changed to SS.)
 817          * str:
 818          *  a UTF-8 encoded string
 819          * len:
 820          *  length of str, in bytes, or -1 if str is nul-terminated.
 821          * Returns:
 822          *  a newly allocated string, with all characters
 823          *  converted to uppercase.
 824          */
 825         public static char[] utf8_Strup(char[] str, int len)
 826         {
 827                 // gchar* g_utf8_strup (const gchar *str,  gssize len);
 828                 return Str.toString(g_utf8_strup(Str.toStringz(str), len) );
 829         }
 830
 831         /**
 832          * Converts all Unicode characters in the string that have a case
 833          * to lowercase. The exact manner that this is done depends
 834          * on the current locale, and may result in the number of
 835          * characters in the string changing.
 836          * str:
 837          *  a UTF-8 encoded string
 838          * len:
 839          *  length of str, in bytes, or -1 if str is nul-terminated.
 840          * Returns:
 841          *  a newly allocated string, with all characters
 842          *  converted to lowercase.
 843          */
 844         public static char[] utf8_Strdown(char[] str, int len)
 845         {
 846                 // gchar* g_utf8_strdown (const gchar *str,  gssize len);
 847                 return Str.toString(g_utf8_strdown(Str.toStringz(str), len) );
 848         }
 849
 850         /**
 851          * Converts a string into a form that is independent of case. The
 852          * result will not correspond to any particular case, but can be
 853          * compared for equality or ordered with the results of calling
 854          * g_utf8_casefold() on other strings.
 855          * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
 856          * only an approximation to the correct linguistic case insensitive
 857          * ordering, though it is a fairly good one. Getting this exactly
 858          * right would require a more sophisticated collation function that
 859          * takes case sensitivity into account. GLib does not currently
 860          * provide such a function.
 861          * str:
 862          *  a UTF-8 encoded string
 863          * len:
 864          *  length of str, in bytes, or -1 if str is nul-terminated.
 865          * Returns:
 866          *  a newly allocated string, that is a
 867          *  case independent form of str.
 868          */
 869         public static char[] utf8_Casefold(char[] str, int len)
 870         {
 871                 // gchar* g_utf8_casefold (const gchar *str,  gssize len);
 872                 return Str.toString(g_utf8_casefold(Str.toStringz(str), len) );
 873         }
 874
 875         /**
 876          * Converts a string into canonical form, standardizing
 877          * such issues as whether a character with an accent
 878          * is represented as a base character and combining
 879          * accent or as a single precomposed character. You
 880          * should generally call g_utf8_normalize() before
 881          * comparing two Unicode strings.
 882          * The normalization mode G_NORMALIZE_DEFAULT only
 883          * standardizes differences that do not affect the
 884          * text content, such as the above-mentioned accent
 885          * representation. G_NORMALIZE_ALL also standardizes
 886          * the "compatibility" characters in Unicode, such
 887          * as SUPERSCRIPT THREE to the standard forms
 888          * (in this case DIGIT THREE). Formatting information
 889          * may be lost but for most text operations such
 890          * characters should be considered the same.
 891          * For example, g_utf8_collate() normalizes
 892          * with G_NORMALIZE_ALL as its first step.
 893          * G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE
 894          * are like G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL,
 895          * but returned a result with composed forms rather
 896          * than a maximally decomposed form. This is often
 897          * useful if you intend to convert the string to
 898          * a legacy encoding or pass it to a system with
 899          * less capable Unicode handling.
 900          * str:
 901          *  a UTF-8 encoded string.
 902          * len:
 903          *  length of str, in bytes, or -1 if str is nul-terminated.
 904          * mode:
 905          *  the type of normalization to perform.
 906          * Returns:
 907          *  a newly allocated string, that is the
 908          *  normalized form of str.
 909          */
 910         public static char[] utf8_Normalize(char[] str, int len, GNormalizeMode mode)
 911         {
 912                 // gchar* g_utf8_normalize (const gchar *str,  gssize len,  GNormalizeMode mode);
 913                 return Str.toString(g_utf8_normalize(Str.toStringz(str), len, mode) );
 914         }
 915
 916
 917         /**
 918          * Compares two strings for ordering using the linguistically
 919          * correct rules for the current locale.
 920          * When sorting a large number of strings, it will be significantly
 921          * faster to obtain collation keys with g_utf8_collate_key() and
 922          * compare the keys with strcmp() when sorting instead of sorting
 923          * the original strings.
 924          * str1:
 925          *  a UTF-8 encoded string
 926          * str2:
 927          *  a UTF-8 encoded string
 928          * Returns:
 929          *  < 0 if str1 compares before str2,
 930          *  0 if they compare equal, > 0 if str1 compares after str2.
 931          */
 932         public static int utf8_Collate(char[] str1, char[] str2)
 933         {
 934                 // gint g_utf8_collate (const gchar *str1,  const gchar *str2);
 935                 return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2));
 936         }
 937
 938         /**
 939          * Converts a string into a collation key that can be compared
 940          * with other collation keys produced by the same function using
 941          * strcmp().
 942          * The results of comparing the collation keys of two strings
 943          * with strcmp() will always be the same as comparing the two
 944          * original keys with g_utf8_collate().
 945          * Note that this function depends on the
 946          * current locale.
 947          * str:
 948          *  a UTF-8 encoded string.
 949          * len:
 950          *  length of str, in bytes, or -1 if str is nul-terminated.
 951          * Returns:
 952          *  a newly allocated string. This string should
 953          *  be freed with g_free() when you are done with it.
 954          */
 955         public static char[] utf8_CollateKey(char[] str, int len)
 956         {
 957                 // gchar* g_utf8_collate_key (const gchar *str,  gssize len);
 958                 return Str.toString(g_utf8_collate_key(Str.toStringz(str), len) );
 959         }
 960
 961         /**
 962          * Converts a string into a collation key that can be compared
 963          * with other collation keys produced by the same function using strcmp().
 964          * In order to sort filenames correctly, this function treats the dot '.'
 965          * as a special case. Most dictionary orderings seem to consider it
 966          * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
 967          * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
 968          * would like to treat numbers intelligently so that "file1" "file10" "file5"
 969          * is sorted as "file1" "file5" "file10".
 970          * Note that this function depends on the
 971          * current locale.
 972          * str:
 973          *  a UTF-8 encoded string.
 974          * len:
 975          *  length of str, in bytes, or -1 if str is nul-terminated.
 976          * Returns:
 977          *  a newly allocated string. This string should
 978          *  be freed with g_free() when you are done with it.
 979          * Since 2.8
 980          */
 981         public static char[] utf8_CollateKeyForFilename(char[] str, int len)
 982         {
 983                 // gchar* g_utf8_collate_key_for_filename (const gchar *str,  gssize len);
 984                 return Str.toString(g_utf8_collate_key_for_filename(Str.toStringz(str), len) );
 985         }
 986
 987         /**
 988          * Convert a string from UTF-8 to UTF-16. A 0 character will be
 989          * added to the result after the converted text.
 990          * str:
 991          *  a UTF-8 encoded string
 992          * len:
 993          *  the maximum length (number of characters) of str to use.
 994          *  If len < 0, then the string is nul-terminated.
 995          * items_read:
 996          *  location to store number of bytes read, or NULL.
 997          *  If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
 998          *  returned in case str contains a trailing partial
 999          *  character. If an error occurs then the index of the
1000          *  invalid input is stored here.
1001          * items_written:
1002          *  location to store number of gunichar2 written,
1003          *  or NULL.
1004          *  The value stored here does not include the trailing 0.
1005          * error:
1006          *  location to store the error occuring, or NULL to ignore
1007          *  errors. Any of the errors in GConvertError other than
1008          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1009          * Returns:
1010          *  a pointer to a newly allocated UTF-16 string.
1011          *  This value must be freed with g_free(). If an
1012          *  error occurs, NULL will be returned and
1013          *  error set.
1014          */
1015         public static gunichar2* utf8_ToUtf16(char[] str, int len, int* itemsRead, int* itemsWritten, GError** error)
1016         {
1017                 // gunichar2* g_utf8_to_utf16 (const gchar *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1018                 return g_utf8_to_utf16(Str.toStringz(str), len, itemsRead, itemsWritten, error);
1019         }
1020
1021         /**
1022          * Convert a string from UTF-8 to a 32-bit fixed width
1023          * representation as UCS-4. A trailing 0 will be added to the
1024          * string after the converted text.
1025          * str:
1026          *  a UTF-8 encoded string
1027          * len:
1028          *  the maximum length of str to use. If len < 0, then
1029          *  the string is nul-terminated.
1030          * items_read:
1031          *  location to store number of bytes read, or NULL.
1032          *  If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1033          *  returned in case str contains a trailing partial
1034          *  character. If an error occurs then the index of the
1035          *  invalid input is stored here.
1036          * items_written:
1037          *  location to store number of characters written or NULL.
1038          *  The value here stored does not include the trailing 0
1039          *  character.
1040          * error:
1041          *  location to store the error occuring, or NULL to ignore
1042          *  errors. Any of the errors in GConvertError other than
1043          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1044          * Returns:
1045          *  a pointer to a newly allocated UCS-4 string.
1046          *  This value must be freed with g_free(). If an
1047          *  error occurs, NULL will be returned and
1048          *  error set.
1049          */
1050         public static gunichar* utf8_ToUcs4(char[] str, int len, int* itemsRead, int* itemsWritten, GError** error)
1051         {
1052                 // gunichar* g_utf8_to_ucs4 (const gchar *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1053                 return g_utf8_to_ucs4(Str.toStringz(str), len, itemsRead, itemsWritten, error);
1054         }
1055
1056         /**
1057          * Convert a string from UTF-8 to a 32-bit fixed width
1058          * representation as UCS-4, assuming valid UTF-8 input.
1059          * This function is roughly twice as fast as g_utf8_to_ucs4()
1060          * but does no error checking on the input.
1061          * str:
1062          *  a UTF-8 encoded string
1063          * len:
1064          *  the maximum length of str to use. If len < 0, then
1065          *  the string is nul-terminated.
1066          * items_written:
1067          *  location to store the number of characters in the
1068          *  result, or NULL.
1069          * Returns:
1070          *  a pointer to a newly allocated UCS-4 string.
1071          *  This value must be freed with g_free().
1072          */
1073         public static gunichar* utf8_ToUcs4_Fast(char[] str, int len, int* itemsWritten)
1074         {
1075                 // gunichar* g_utf8_to_ucs4_fast (const gchar *str,  glong len,  glong *items_written);
1076                 return g_utf8_to_ucs4_fast(Str.toStringz(str), len, itemsWritten);
1077         }
1078
1079         /**
1080          * Convert a string from UTF-16 to UCS-4. The result will be
1081          * terminated with a 0 character.
1082          * str:
1083          *  a UTF-16 encoded string
1084          * len:
1085          *  the maximum length (number of gunichar2) of str to use.
1086          *  If len < 0, then the string is terminated with a 0 character.
1087          * items_read:
1088          *  location to store number of words read, or NULL.
1089          *  If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1090          *  returned in case str contains a trailing partial
1091          *  character. If an error occurs then the index of the
1092          *  invalid input is stored here.
1093          * items_written:
1094          *  location to store number of characters written, or NULL.
1095          *  The value stored here does not include the trailing
1096          *  0 character.
1097          * error:
1098          *  location to store the error occuring, or NULL to ignore
1099          *  errors. Any of the errors in GConvertError other than
1100          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1101          * Returns:
1102          *  a pointer to a newly allocated UCS-4 string.
1103          *  This value must be freed with g_free(). If an
1104          *  error occurs, NULL will be returned and
1105          *  error set.
1106          */
1107         public static gunichar* utf16_ToUcs4(gunichar2* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1108         {
1109                 // gunichar* g_utf16_to_ucs4 (const gunichar2 *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1110                 return g_utf16_to_ucs4(str, len, itemsRead, itemsWritten, error);
1111         }
1112
1113         /**
1114          * Convert a string from UTF-16 to UTF-8. The result will be
1115          * terminated with a 0 byte.
1116          * Note that the input is expected to be already in native endianness,
1117          * an initial byte-order-mark character is not handled specially.
1118          * g_convert() can be used to convert a byte buffer of UTF-16 data of
1119          * ambiguous endianess.
1120          * str:
1121          *  a UTF-16 encoded string
1122          * len:
1123          *  the maximum length (number of gunichar2) of str to use.
1124          *  If len < 0, then the string is terminated with a 0 character.
1125          * items_read:
1126          *  location to store number of words read, or NULL.
1127          *  If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1128          *  returned in case str contains a trailing partial
1129          *  character. If an error occurs then the index of the
1130          *  invalid input is stored here.
1131          * items_written:
1132          *  location to store number of bytes written, or NULL.
1133          *  The value stored here does not include the trailing
1134          *  0 byte.
1135          * error:
1136          *  location to store the error occuring, or NULL to ignore
1137          *  errors. Any of the errors in GConvertError other than
1138          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1139          * Returns:
1140          *  a pointer to a newly allocated UTF-8 string.
1141          *  This value must be freed with g_free(). If an
1142          *  error occurs, NULL will be returned and
1143          *  error set.
1144          */
1145         public static char[] utf16_ToUtf8(gunichar2* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1146         {
1147                 // gchar* g_utf16_to_utf8 (const gunichar2 *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1148                 return Str.toString(g_utf16_to_utf8(str, len, itemsRead, itemsWritten, error) );
1149         }
1150
1151         /**
1152          * Convert a string from UCS-4 to UTF-16. A 0 character will be
1153          * added to the result after the converted text.
1154          * str:
1155          *  a UCS-4 encoded string
1156          * len:
1157          *  the maximum length (number of characters) of str to use.
1158          *  If len < 0, then the string is terminated with a 0 character.
1159          * items_read:
1160          *  location to store number of bytes read, or NULL.
1161          *  If an error occurs then the index of the invalid input
1162          *  is stored here.
1163          * items_written:
1164          *  location to store number of gunichar2
1165          *  written, or NULL. The value stored here does not
1166          *  include the trailing 0.
1167          * error:
1168          *  location to store the error occuring, or NULL to ignore
1169          *  errors. Any of the errors in GConvertError other than
1170          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1171          * Returns:
1172          *  a pointer to a newly allocated UTF-16 string.
1173          *  This value must be freed with g_free(). If an
1174          *  error occurs, NULL will be returned and
1175          *  error set.
1176          */
1177         public static gunichar2* ucs4_ToUtf16(gunichar* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1178         {
1179                 // gunichar2* g_ucs4_to_utf16 (const gunichar *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1180                 return g_ucs4_to_utf16(str, len, itemsRead, itemsWritten, error);
1181         }
1182
1183         /**
1184          * Convert a string from a 32-bit fixed width representation as UCS-4.
1185          * to UTF-8. The result will be terminated with a 0 byte.
1186          * str:
1187          *  a UCS-4 encoded string
1188          * len:
1189          *  the maximum length (number of characters) of str to use.
1190          *  If len < 0, then the string is terminated with a 0 character.
1191          * items_read:
1192          *  location to store number of characters read, or NULL.
1193          * items_written:
1194          *  location to store number of bytes written or NULL.
1195          *  The value here stored does not include the trailing 0
1196          *  byte.
1197          * error:
1198          *  location to store the error occuring, or NULL to ignore
1199          *  errors. Any of the errors in GConvertError other than
1200          *  G_CONVERT_ERROR_NO_CONVERSION may occur.
1201          * Returns:
1202          *  a pointer to a newly allocated UTF-8 string.
1203          *  This value must be freed with g_free(). If an
1204          *  error occurs, NULL will be returned and
1205          *  error set. In that case, items_read will be
1206          *  set to the position of the first invalid input
1207          *  character.
1208          */
1209         public static char[] ucs4_ToUtf8(gunichar* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1210         {
1211                 // gchar* g_ucs4_to_utf8 (const gunichar *str,  glong len,  glong *items_read,  glong *items_written,  GError **error);
1212                 return Str.toString(g_ucs4_to_utf8(str, len, itemsRead, itemsWritten, error) );
1213         }
1214
1215         /**
1216          * Converts a single character to UTF-8.
1217          * c:
1218          *  a Unicode character code
1219          * outbuf:
1220          *  output buffer, must have at least 6 bytes of space.
1221          *  If NULL, the length will be computed and returned
1222          *  and nothing will be written to outbuf.
1223          * Returns:
1224          *  number of bytes written
1225          * See Also
1226          * g_locale_to_utf8(), g_locale_from_utf8()
1227          * Convenience functions for converting between UTF-8 and the locale encoding.
1228          * [3] surrogate pairs
1229          */
1230         public static int unicharToUtf8(gunichar c, char[] outbuf)
1231         {
1232                 // gint g_unichar_to_utf8 (gunichar c,  gchar *outbuf);
1233                 return g_unichar_to_utf8(c, Str.toStringz(outbuf));
1234         }
1235 }