alternative to assert
[gtkD.git] / gtkD / src / glib / Unicode.d
blobc9547c1b1aa7428f2f67d23418bdb58a052381e4
1 /*
2 * This file is part of gtkD.
4 * gtkD is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
9 * gtkD is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with gtkD; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 // generated automatically - do not change
20 // find conversion definition on APILookup.txt
21 // implement new conversion functionalities on the wrap.utils pakage
24 * Conversion parameters:
25 * inFile = glib-Unicode-Manipulation.html
26 * outPack = glib
27 * outFile = Unicode
28 * strct =
29 * realStrct=
30 * ctorStrct=
31 * clss = Unicode
32 * interf =
33 * class Code: No
34 * interface Code: No
35 * template for:
36 * extend =
37 * implements:
38 * prefixes:
39 * - g_
40 * omit structs:
41 * omit prefixes:
42 * omit code:
43 * imports:
44 * - glib.ErrorG
45 * - glib.Str
46 * structWrap:
47 * module aliases:
48 * local aliases:
51 module glib.Unicode;
53 version(noAssert)
55 version(Tango)
57 import tango.io.Stdout; // use the tango loging?
61 private import gtkc.glibtypes;
63 private import gtkc.glib;
66 private import glib.ErrorG;
67 private import glib.Str;
72 /**
73 * Description
74 * This section describes a number of functions for dealing with
75 * Unicode characters and strings. There are analogues of the
76 * traditional ctype.h character classification
77 * and case conversion functions, UTF-8 analogues of some string utility
78 * functions, functions to perform normalization, case conversion and
79 * collation on UTF-8 strings and finally functions to convert between
80 * the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
81 * The implementations of the Unicode functions in GLib are based
82 * on the Unicode Character Data tables, which are available from
83 * www.unicode.org.
84 * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
85 * GLib 2.12 supports Unicode 5.0.
87 public class Unicode
90 /**
95 /**
96 * Checks whether ch is a valid Unicode character. Some possible
97 * integer values of ch will not be valid. 0 is considered a valid
98 * character, though it's normally a string terminator.
99 * ch:
100 * a Unicode character
101 * Returns:
102 * TRUE if ch is a valid Unicode character
104 public static int unicharValidate(gunichar ch)
106 // gboolean g_unichar_validate (gunichar ch);
107 return g_unichar_validate(ch);
111 * Determines whether a character is alphanumeric.
112 * Given some UTF-8 text, obtain a character value
113 * with g_utf8_get_char().
114 * c:
115 * a Unicode character
116 * Returns:
117 * TRUE if c is an alphanumeric character
119 public static int unicharIsalnum(gunichar c)
121 // gboolean g_unichar_isalnum (gunichar c);
122 return g_unichar_isalnum(c);
126 * Determines whether a character is alphabetic (i.e. a letter).
127 * Given some UTF-8 text, obtain a character value with
128 * g_utf8_get_char().
129 * c:
130 * a Unicode character
131 * Returns:
132 * TRUE if c is an alphabetic character
134 public static int unicharIsalpha(gunichar c)
136 // gboolean g_unichar_isalpha (gunichar c);
137 return g_unichar_isalpha(c);
141 * Determines whether a character is a control character.
142 * Given some UTF-8 text, obtain a character value with
143 * g_utf8_get_char().
144 * c:
145 * a Unicode character
146 * Returns:
147 * TRUE if c is a control character
149 public static int unicharIscntrl(gunichar c)
151 // gboolean g_unichar_iscntrl (gunichar c);
152 return g_unichar_iscntrl(c);
156 * Determines whether a character is numeric (i.e. a digit). This
157 * covers ASCII 0-9 and also digits in other languages/scripts. Given
158 * some UTF-8 text, obtain a character value with g_utf8_get_char().
159 * c:
160 * a Unicode character
161 * Returns:
162 * TRUE if c is a digit
164 public static int unicharIsdigit(gunichar c)
166 // gboolean g_unichar_isdigit (gunichar c);
167 return g_unichar_isdigit(c);
171 * Determines whether a character is printable and not a space
172 * (returns FALSE for control characters, format characters, and
173 * spaces). g_unichar_isprint() is similar, but returns TRUE for
174 * spaces. Given some UTF-8 text, obtain a character value with
175 * g_utf8_get_char().
176 * c:
177 * a Unicode character
178 * Returns:
179 * TRUE if c is printable unless it's a space
181 public static int unicharIsgraph(gunichar c)
183 // gboolean g_unichar_isgraph (gunichar c);
184 return g_unichar_isgraph(c);
188 * Determines whether a character is a lowercase letter.
189 * Given some UTF-8 text, obtain a character value with
190 * g_utf8_get_char().
191 * c:
192 * a Unicode character
193 * Returns:
194 * TRUE if c is a lowercase letter
196 public static int unicharIslower(gunichar c)
198 // gboolean g_unichar_islower (gunichar c);
199 return g_unichar_islower(c);
203 * Determines whether a character is printable.
204 * Unlike g_unichar_isgraph(), returns TRUE for spaces.
205 * Given some UTF-8 text, obtain a character value with
206 * g_utf8_get_char().
207 * c:
208 * a Unicode character
209 * Returns:
210 * TRUE if c is printable
212 public static int unicharIsprint(gunichar c)
214 // gboolean g_unichar_isprint (gunichar c);
215 return g_unichar_isprint(c);
219 * Determines whether a character is punctuation or a symbol.
220 * Given some UTF-8 text, obtain a character value with
221 * g_utf8_get_char().
222 * c:
223 * a Unicode character
224 * Returns:
225 * TRUE if c is a punctuation or symbol character
227 public static int unicharIspunct(gunichar c)
229 // gboolean g_unichar_ispunct (gunichar c);
230 return g_unichar_ispunct(c);
234 * Determines whether a character is a space, tab, or line separator
235 * (newline, carriage return, etc.). Given some UTF-8 text, obtain a
236 * character value with g_utf8_get_char().
237 * (Note: don't use this to do word breaking; you have to use
238 * Pango or equivalent to get word breaking right, the algorithm
239 * is fairly complex.)
240 * c:
241 * a Unicode character
242 * Returns:
243 * TRUE if c is a space character
245 public static int unicharIsspace(gunichar c)
247 // gboolean g_unichar_isspace (gunichar c);
248 return g_unichar_isspace(c);
252 * Determines if a character is uppercase.
253 * c:
254 * a Unicode character
255 * Returns:
256 * TRUE if c is an uppercase character
258 public static int unicharIsupper(gunichar c)
260 // gboolean g_unichar_isupper (gunichar c);
261 return g_unichar_isupper(c);
265 * Determines if a character is a hexidecimal digit.
266 * c:
267 * a Unicode character.
268 * Returns:
269 * TRUE if the character is a hexadecimal digit
271 public static int unicharIsxdigit(gunichar c)
273 // gboolean g_unichar_isxdigit (gunichar c);
274 return g_unichar_isxdigit(c);
278 * Determines if a character is titlecase. Some characters in
279 * Unicode which are composites, such as the DZ digraph
280 * have three case variants instead of just two. The titlecase
281 * form is used at the beginning of a word where only the
282 * first letter is capitalized. The titlecase form of the DZ
283 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
284 * c:
285 * a Unicode character
286 * Returns:
287 * TRUE if the character is titlecase
289 public static int unicharIstitle(gunichar c)
291 // gboolean g_unichar_istitle (gunichar c);
292 return g_unichar_istitle(c);
296 * Determines if a given character is assigned in the Unicode
297 * standard.
298 * c:
299 * a Unicode character
300 * Returns:
301 * TRUE if the character has an assigned value
303 public static int unicharIsdefined(gunichar c)
305 // gboolean g_unichar_isdefined (gunichar c);
306 return g_unichar_isdefined(c);
310 * Determines if a character is typically rendered in a double-width
311 * cell.
312 * c:
313 * a Unicode character
314 * Returns:
315 * TRUE if the character is wide
317 public static int unicharIswide(gunichar c)
319 // gboolean g_unichar_iswide (gunichar c);
320 return g_unichar_iswide(c);
324 * Determines if a character is typically rendered in a double-width
325 * cell under legacy East Asian locales. If a character is wide according to
326 * g_unichar_iswide(), then it is also reported wide with this function, but
327 * the converse is not necessarily true. See the
328 * Unicode Standard
329 * Annex 11 for details.
330 * c:
331 * a Unicode character
332 * Returns:
333 * TRUE if the character is wide in legacy East Asian locales
334 * Since 2.12
336 public static int unicharIswideCjk(gunichar c)
338 // gboolean g_unichar_iswide_cjk (gunichar c);
339 return g_unichar_iswide_cjk(c);
343 * Determines if a given character typically takes zero width when rendered.
344 * The return value is TRUE for all non-spacing and enclosing marks
345 * (e.g., combining accents), format characters, zero-width
346 * space, but not U+00AD SOFT HYPHEN.
347 * A typical use of this function is with one of g_unichar_iswide() or
348 * g_unichar_iswide_cjk() to determine the number of cells a string occupies
349 * when displayed on a grid display (terminals). However, note that not all
350 * terminals support zero-width rendering of zero-width marks.
351 * c:
352 * a Unicode character
353 * Returns:
354 * TRUE if the character has zero width
355 * Since 2.14
357 public static int unicharIszerowidth(gunichar c)
359 // gboolean g_unichar_iszerowidth (gunichar c);
360 return g_unichar_iszerowidth(c);
364 * Converts a character to uppercase.
365 * c:
366 * a Unicode character
367 * Returns:
368 * the result of converting c to uppercase.
369 * If c is not an lowercase or titlecase character,
370 * or has no upper case equivalent c is returned unchanged.
372 public static gunichar unicharToupper(gunichar c)
374 // gunichar g_unichar_toupper (gunichar c);
375 return g_unichar_toupper(c);
379 * Converts a character to lower case.
380 * c:
381 * a Unicode character.
382 * Returns:
383 * the result of converting c to lower case.
384 * If c is not an upperlower or titlecase character,
385 * or has no lowercase equivalent c is returned unchanged.
387 public static gunichar unicharTolower(gunichar c)
389 // gunichar g_unichar_tolower (gunichar c);
390 return g_unichar_tolower(c);
394 * Converts a character to the titlecase.
395 * c:
396 * a Unicode character
397 * Returns:
398 * the result of converting c to titlecase.
399 * If c is not an uppercase or lowercase character,
400 * c is returned unchanged.
402 public static gunichar unicharTotitle(gunichar c)
404 // gunichar g_unichar_totitle (gunichar c);
405 return g_unichar_totitle(c);
409 * Determines the numeric value of a character as a decimal
410 * digit.
411 * c:
412 * a Unicode character
413 * Returns:
414 * If c is a decimal digit (according to
415 * g_unichar_isdigit()), its numeric value. Otherwise, -1.
417 public static int unicharDigitValue(gunichar c)
419 // gint g_unichar_digit_value (gunichar c);
420 return g_unichar_digit_value(c);
424 * Determines the numeric value of a character as a hexidecimal
425 * digit.
426 * c:
427 * a Unicode character
428 * Returns:
429 * If c is a hex digit (according to
430 * g_unichar_isxdigit()), its numeric value. Otherwise, -1.
432 public static int unicharXdigitValue(gunichar c)
434 // gint g_unichar_xdigit_value (gunichar c);
435 return g_unichar_xdigit_value(c);
440 * Classifies a Unicode character by type.
441 * c:
442 * a Unicode character
443 * Returns:
444 * the type of the character.
446 public static GUnicodeType unicharType(gunichar c)
448 // GUnicodeType g_unichar_type (gunichar c);
449 return g_unichar_type(c);
454 * Determines the break type of c. c should be a Unicode character
455 * (to derive a character from UTF-8 encoded text, use
456 * g_utf8_get_char()). The break type is used to find word and line
457 * breaks ("text boundaries"), Pango implements the Unicode boundary
458 * resolution algorithms and normally you would use a function such
459 * as pango_break() instead of caring about break types yourself.
460 * c:
461 * a Unicode character
462 * Returns:
463 * the break type of c
465 public static GUnicodeBreakType unicharBreakType(gunichar c)
467 // GUnicodeBreakType g_unichar_break_type (gunichar c);
468 return g_unichar_break_type(c);
472 * Computes the canonical ordering of a string in-place.
473 * This rearranges decomposed characters in the string
474 * according to their combining classes. See the Unicode
475 * manual for more information.
476 * string:
477 * a UCS-4 encoded string.
478 * len:
479 * the maximum length of string to use.
481 public static void unicodeCanonicalOrdering(gunichar* string, uint len)
483 // void g_unicode_canonical_ordering (gunichar *string, gsize len);
484 g_unicode_canonical_ordering(string, len);
488 * Computes the canonical decomposition of a Unicode character.
489 * ch:
490 * a Unicode character.
491 * result_len:
492 * location to store the length of the return value.
493 * Returns:
494 * a newly allocated string of Unicode characters.
495 * result_len is set to the resulting length of the string.
497 public static gunichar* unicodeCanonicalDecomposition(gunichar ch, uint* resultLen)
499 // gunichar* g_unicode_canonical_decomposition (gunichar ch, gsize *result_len);
500 return g_unicode_canonical_decomposition(ch, resultLen);
504 * In Unicode, some characters are mirrored. This
505 * means that their images are mirrored horizontally in text that is laid
506 * out from right to left. For instance, "(" would become its mirror image,
507 * ")", in right-to-left text.
508 * If ch has the Unicode mirrored property and there is another unicode
509 * character that typically has a glyph that is the mirror image of ch's
510 * glyph and mirrored_ch is set, it puts that character in the address
511 * pointed to by mirrored_ch. Otherwise the original character is put.
512 * ch:
513 * a Unicode character
514 * mirrored_ch:
515 * location to store the mirrored character
516 * Returns:
517 * TRUE if ch has a mirrored character, FALSE otherwise
518 * Since 2.4
520 public static int unicharGetMirrorChar(gunichar ch, gunichar* mirroredCh)
522 // gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch);
523 return g_unichar_get_mirror_char(ch, mirroredCh);
528 * Looks up the GUnicodeScript for a particular character (as defined
529 * by Unicode Standard Annex 24). No check is made for ch being a
530 * valid Unicode character; if you pass in invalid character, the
531 * result is undefined.
532 * ch:
533 * a Unicode character
534 * Returns:
535 * the GUnicodeScript for the character.
536 * Since 2.14
538 public static GUnicodeScript unicharGetScript(gunichar ch)
540 // GUnicodeScript g_unichar_get_script (gunichar ch);
541 return g_unichar_get_script(ch);
546 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
547 * If p does not point to a valid UTF-8 encoded character, results are
548 * undefined. If you are not sure that the bytes are complete
549 * valid Unicode characters, you should use g_utf8_get_char_validated()
550 * instead.
551 * p:
552 * a pointer to Unicode character encoded as UTF-8
553 * Returns:
554 * the resulting character
556 public static gunichar utf8_GetChar(char[] p)
558 // gunichar g_utf8_get_char (const gchar *p);
559 return g_utf8_get_char(Str.toStringz(p));
563 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
564 * This function checks for incomplete characters, for invalid characters
565 * such as characters that are out of the range of Unicode, and for
566 * overlong encodings of valid characters.
567 * p:
568 * a pointer to Unicode character encoded as UTF-8
569 * max_len:
570 * the maximum number of bytes to read, or -1, for no maximum.
571 * Returns:
572 * the resulting character. If p points to a partial
573 * sequence at the end of a string that could begin a valid
574 * character (or if max_len is zero), returns (gunichar)-2;
575 * otherwise, if p does not point to a valid UTF-8 encoded
576 * Unicode character, returns (gunichar)-1.
578 public static gunichar utf8_GetCharValidated(char[] p, int maxLen)
580 // gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len);
581 return g_utf8_get_char_validated(Str.toStringz(p), maxLen);
585 * Converts from an integer character offset to a pointer to a position
586 * within the string.
587 * Since 2.10, this function allows to pass a negative offset to
588 * step backwards. It is usually worth stepping backwards from the end
589 * instead of forwards if offset is in the last fourth of the string,
590 * since moving forward is about 3 times faster than moving backward.
591 * str:
592 * a UTF-8 encoded string
593 * offset:
594 * a character offset within str
595 * Returns:
596 * the resulting pointer
598 public static char[] utf8_OffsetToPointer(char[] str, int offset)
600 // gchar* g_utf8_offset_to_pointer (const gchar *str, glong offset);
601 return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset) );
605 * Converts from a pointer to position within a string to a integer
606 * character offset.
607 * Since 2.10, this function allows pos to be before str, and returns
608 * a negative offset in this case.
609 * str:
610 * a UTF-8 encoded string
611 * pos:
612 * a pointer to a position within str
613 * Returns:
614 * the resulting character offset
616 public static int utf8_PointerToOffset(char[] str, char[] pos)
618 // glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos);
619 return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos));
623 * Finds the previous UTF-8 character in the string before p.
624 * p does not have to be at the beginning of a UTF-8 character. No check
625 * is made to see if the character found is actually valid other than
626 * it starts with an appropriate byte. If p might be the first
627 * character of the string, you must use g_utf8_find_prev_char() instead.
628 * p:
629 * a pointer to a position within a UTF-8 encoded string
630 * Returns:
631 * a pointer to the found character.
633 public static char[] utf8_PrevChar(char[] p)
635 // gchar* g_utf8_prev_char (const gchar *p);
636 return Str.toString(g_utf8_prev_char(Str.toStringz(p)) );
640 * Finds the start of the next UTF-8 character in the string after p.
641 * p does not have to be at the beginning of a UTF-8 character. No check
642 * is made to see if the character found is actually valid other than
643 * it starts with an appropriate byte.
644 * p:
645 * a pointer to a position within a UTF-8 encoded string
646 * end:
647 * a pointer to the end of the string, or NULL to indicate
648 * that the string is nul-terminated, in which case
649 * the returned value will be
650 * Returns:
651 * a pointer to the found character or NULL
653 public static char[] utf8_FindNextChar(char[] p, char[] end)
655 // gchar* g_utf8_find_next_char (const gchar *p, const gchar *end);
656 return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end)) );
660 * Given a position p with a UTF-8 encoded string str, find the start
661 * of the previous UTF-8 character starting before p. Returns NULL if no
662 * UTF-8 characters are present in str before p.
663 * p does not have to be at the beginning of a UTF-8 character. No check
664 * is made to see if the character found is actually valid other than
665 * it starts with an appropriate byte.
666 * str:
667 * pointer to the beginning of a UTF-8 encoded string
668 * p:
669 * pointer to some position within str
670 * Returns:
671 * a pointer to the found character or NULL.
673 public static char[] utf8_FindPrevChar(char[] str, char[] p)
675 // gchar* g_utf8_find_prev_char (const gchar *str, const gchar *p);
676 return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p)) );
680 * Returns the length of the string in characters.
681 * p:
682 * pointer to the start of a UTF-8 encoded string.
683 * max:
684 * the maximum number of bytes to examine. If max
685 * is less than 0, then the string is assumed to be
686 * nul-terminated. If max is 0, p will not be examined and
687 * may be NULL.
688 * Returns:
689 * the length of the string in characters
691 public static int utf8_Strlen(char[] p, int max)
693 // glong g_utf8_strlen (const gchar *p, gssize max);
694 return g_utf8_strlen(Str.toStringz(p), max);
698 * Like the standard C strncpy() function, but
699 * copies a given number of characters instead of a given number of
700 * bytes. The src string must be valid UTF-8 encoded text.
701 * (Use g_utf8_validate() on all text before trying to use UTF-8
702 * utility functions with it.)
703 * dest:
704 * buffer to fill with characters from src
705 * src:
706 * UTF-8 encoded string
707 * n:
708 * character count
709 * Returns:
710 * dest
712 public static char[] utf8_Strncpy(char[] dest, char[] src, uint n)
714 // gchar* g_utf8_strncpy (gchar *dest, const gchar *src, gsize n);
715 return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n) );
719 * Finds the leftmost occurrence of the given Unicode character
720 * in a UTF-8 encoded string, while limiting the search to len bytes.
721 * If len is -1, allow unbounded search.
722 * p:
723 * a nul-terminated UTF-8 encoded string
724 * len:
725 * the maximum length of p
726 * c:
727 * a Unicode character
728 * Returns:
729 * NULL if the string does not contain the character,
730 * otherwise, a pointer to the start of the leftmost occurrence of
731 * the character in the string.
733 public static char[] utf8_Strchr(char[] p, int len, gunichar c)
735 // gchar* g_utf8_strchr (const gchar *p, gssize len, gunichar c);
736 return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c) );
740 * Find the rightmost occurrence of the given Unicode character
741 * in a UTF-8 encoded string, while limiting the search to len bytes.
742 * If len is -1, allow unbounded search.
743 * p:
744 * a nul-terminated UTF-8 encoded string
745 * len:
746 * the maximum length of p
747 * c:
748 * a Unicode character
749 * Returns:
750 * NULL if the string does not contain the character,
751 * otherwise, a pointer to the start of the rightmost occurrence of the
752 * character in the string.
754 public static char[] utf8_Strrchr(char[] p, int len, gunichar c)
756 // gchar* g_utf8_strrchr (const gchar *p, gssize len, gunichar c);
757 return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c) );
761 * Reverses a UTF-8 string. str must be valid UTF-8 encoded text.
762 * (Use g_utf8_validate() on all text before trying to use UTF-8
763 * utility functions with it.)
764 * Note that unlike g_strreverse(), this function returns
765 * newly-allocated memory, which should be freed with g_free() when
766 * no longer needed.
767 * str:
768 * a UTF-8 encoded string
769 * len:
770 * the maximum length of str to use. If len < 0, then
771 * the string is nul-terminated.
772 * Returns:
773 * a newly-allocated string which is the reverse of str.
774 * Since 2.2
776 public static char[] utf8_Strreverse(char[] str, int len)
778 // gchar* g_utf8_strreverse (const gchar *str, gssize len);
779 return Str.toString(g_utf8_strreverse(Str.toStringz(str), len) );
783 * Validates UTF-8 encoded text. str is the text to validate;
784 * if str is nul-terminated, then max_len can be -1, otherwise
785 * max_len should be the number of bytes to validate.
786 * If end is non-NULL, then the end of the valid range
787 * will be stored there (i.e. the start of the first invalid
788 * character if some bytes were invalid, or the end of the text
789 * being validated otherwise).
790 * Note that g_utf8_validate() returns FALSE if max_len is
791 * positive and NUL is met before max_len bytes have been read.
792 * Returns TRUE if all of str was valid. Many GLib and GTK+
793 * routines require valid UTF-8 as input;
794 * so data read from a file or the network should be checked
795 * with g_utf8_validate() before doing anything else with it.
796 * str:
797 * a pointer to character data
798 * max_len:
799 * max bytes to validate, or -1 to go until NUL
800 * end:
801 * return location for end of valid data
802 * Returns:
803 * TRUE if the text was valid UTF-8
805 public static int utf8_Validate(char[] str, int maxLen, char** end)
807 // gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end);
808 return g_utf8_validate(Str.toStringz(str), maxLen, end);
812 * Converts all Unicode characters in the string that have a case
813 * to uppercase. The exact manner that this is done depends
814 * on the current locale, and may result in the number of
815 * characters in the string increasing. (For instance, the
816 * German ess-zet will be changed to SS.)
817 * str:
818 * a UTF-8 encoded string
819 * len:
820 * length of str, in bytes, or -1 if str is nul-terminated.
821 * Returns:
822 * a newly allocated string, with all characters
823 * converted to uppercase.
825 public static char[] utf8_Strup(char[] str, int len)
827 // gchar* g_utf8_strup (const gchar *str, gssize len);
828 return Str.toString(g_utf8_strup(Str.toStringz(str), len) );
832 * Converts all Unicode characters in the string that have a case
833 * to lowercase. The exact manner that this is done depends
834 * on the current locale, and may result in the number of
835 * characters in the string changing.
836 * str:
837 * a UTF-8 encoded string
838 * len:
839 * length of str, in bytes, or -1 if str is nul-terminated.
840 * Returns:
841 * a newly allocated string, with all characters
842 * converted to lowercase.
844 public static char[] utf8_Strdown(char[] str, int len)
846 // gchar* g_utf8_strdown (const gchar *str, gssize len);
847 return Str.toString(g_utf8_strdown(Str.toStringz(str), len) );
851 * Converts a string into a form that is independent of case. The
852 * result will not correspond to any particular case, but can be
853 * compared for equality or ordered with the results of calling
854 * g_utf8_casefold() on other strings.
855 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
856 * only an approximation to the correct linguistic case insensitive
857 * ordering, though it is a fairly good one. Getting this exactly
858 * right would require a more sophisticated collation function that
859 * takes case sensitivity into account. GLib does not currently
860 * provide such a function.
861 * str:
862 * a UTF-8 encoded string
863 * len:
864 * length of str, in bytes, or -1 if str is nul-terminated.
865 * Returns:
866 * a newly allocated string, that is a
867 * case independent form of str.
869 public static char[] utf8_Casefold(char[] str, int len)
871 // gchar* g_utf8_casefold (const gchar *str, gssize len);
872 return Str.toString(g_utf8_casefold(Str.toStringz(str), len) );
876 * Converts a string into canonical form, standardizing
877 * such issues as whether a character with an accent
878 * is represented as a base character and combining
879 * accent or as a single precomposed character. You
880 * should generally call g_utf8_normalize() before
881 * comparing two Unicode strings.
882 * The normalization mode G_NORMALIZE_DEFAULT only
883 * standardizes differences that do not affect the
884 * text content, such as the above-mentioned accent
885 * representation. G_NORMALIZE_ALL also standardizes
886 * the "compatibility" characters in Unicode, such
887 * as SUPERSCRIPT THREE to the standard forms
888 * (in this case DIGIT THREE). Formatting information
889 * may be lost but for most text operations such
890 * characters should be considered the same.
891 * For example, g_utf8_collate() normalizes
892 * with G_NORMALIZE_ALL as its first step.
893 * G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE
894 * are like G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL,
895 * but returned a result with composed forms rather
896 * than a maximally decomposed form. This is often
897 * useful if you intend to convert the string to
898 * a legacy encoding or pass it to a system with
899 * less capable Unicode handling.
900 * str:
901 * a UTF-8 encoded string.
902 * len:
903 * length of str, in bytes, or -1 if str is nul-terminated.
904 * mode:
905 * the type of normalization to perform.
906 * Returns:
907 * a newly allocated string, that is the
908 * normalized form of str.
910 public static char[] utf8_Normalize(char[] str, int len, GNormalizeMode mode)
912 // gchar* g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode);
913 return Str.toString(g_utf8_normalize(Str.toStringz(str), len, mode) );
918 * Compares two strings for ordering using the linguistically
919 * correct rules for the current locale.
920 * When sorting a large number of strings, it will be significantly
921 * faster to obtain collation keys with g_utf8_collate_key() and
922 * compare the keys with strcmp() when sorting instead of sorting
923 * the original strings.
924 * str1:
925 * a UTF-8 encoded string
926 * str2:
927 * a UTF-8 encoded string
928 * Returns:
929 * < 0 if str1 compares before str2,
930 * 0 if they compare equal, > 0 if str1 compares after str2.
932 public static int utf8_Collate(char[] str1, char[] str2)
934 // gint g_utf8_collate (const gchar *str1, const gchar *str2);
935 return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2));
939 * Converts a string into a collation key that can be compared
940 * with other collation keys produced by the same function using
941 * strcmp().
942 * The results of comparing the collation keys of two strings
943 * with strcmp() will always be the same as comparing the two
944 * original keys with g_utf8_collate().
945 * Note that this function depends on the
946 * current locale.
947 * str:
948 * a UTF-8 encoded string.
949 * len:
950 * length of str, in bytes, or -1 if str is nul-terminated.
951 * Returns:
952 * a newly allocated string. This string should
953 * be freed with g_free() when you are done with it.
955 public static char[] utf8_CollateKey(char[] str, int len)
957 // gchar* g_utf8_collate_key (const gchar *str, gssize len);
958 return Str.toString(g_utf8_collate_key(Str.toStringz(str), len) );
962 * Converts a string into a collation key that can be compared
963 * with other collation keys produced by the same function using strcmp().
964 * In order to sort filenames correctly, this function treats the dot '.'
965 * as a special case. Most dictionary orderings seem to consider it
966 * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
967 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
968 * would like to treat numbers intelligently so that "file1" "file10" "file5"
969 * is sorted as "file1" "file5" "file10".
970 * Note that this function depends on the
971 * current locale.
972 * str:
973 * a UTF-8 encoded string.
974 * len:
975 * length of str, in bytes, or -1 if str is nul-terminated.
976 * Returns:
977 * a newly allocated string. This string should
978 * be freed with g_free() when you are done with it.
979 * Since 2.8
981 public static char[] utf8_CollateKeyForFilename(char[] str, int len)
983 // gchar* g_utf8_collate_key_for_filename (const gchar *str, gssize len);
984 return Str.toString(g_utf8_collate_key_for_filename(Str.toStringz(str), len) );
988 * Convert a string from UTF-8 to UTF-16. A 0 character will be
989 * added to the result after the converted text.
990 * str:
991 * a UTF-8 encoded string
992 * len:
993 * the maximum length (number of characters) of str to use.
994 * If len < 0, then the string is nul-terminated.
995 * items_read:
996 * location to store number of bytes read, or NULL.
997 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
998 * returned in case str contains a trailing partial
999 * character. If an error occurs then the index of the
1000 * invalid input is stored here.
1001 * items_written:
1002 * location to store number of gunichar2 written,
1003 * or NULL.
1004 * The value stored here does not include the trailing 0.
1005 * error:
1006 * location to store the error occuring, or NULL to ignore
1007 * errors. Any of the errors in GConvertError other than
1008 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1009 * Returns:
1010 * a pointer to a newly allocated UTF-16 string.
1011 * This value must be freed with g_free(). If an
1012 * error occurs, NULL will be returned and
1013 * error set.
1015 public static gunichar2* utf8_ToUtf16(char[] str, int len, int* itemsRead, int* itemsWritten, GError** error)
1017 // gunichar2* g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error);
1018 return g_utf8_to_utf16(Str.toStringz(str), len, itemsRead, itemsWritten, error);
1022 * Convert a string from UTF-8 to a 32-bit fixed width
1023 * representation as UCS-4. A trailing 0 will be added to the
1024 * string after the converted text.
1025 * str:
1026 * a UTF-8 encoded string
1027 * len:
1028 * the maximum length of str to use. If len < 0, then
1029 * the string is nul-terminated.
1030 * items_read:
1031 * location to store number of bytes read, or NULL.
1032 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1033 * returned in case str contains a trailing partial
1034 * character. If an error occurs then the index of the
1035 * invalid input is stored here.
1036 * items_written:
1037 * location to store number of characters written or NULL.
1038 * The value here stored does not include the trailing 0
1039 * character.
1040 * error:
1041 * location to store the error occuring, or NULL to ignore
1042 * errors. Any of the errors in GConvertError other than
1043 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1044 * Returns:
1045 * a pointer to a newly allocated UCS-4 string.
1046 * This value must be freed with g_free(). If an
1047 * error occurs, NULL will be returned and
1048 * error set.
1050 public static gunichar* utf8_ToUcs4(char[] str, int len, int* itemsRead, int* itemsWritten, GError** error)
1052 // gunichar* g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error);
1053 return g_utf8_to_ucs4(Str.toStringz(str), len, itemsRead, itemsWritten, error);
1057 * Convert a string from UTF-8 to a 32-bit fixed width
1058 * representation as UCS-4, assuming valid UTF-8 input.
1059 * This function is roughly twice as fast as g_utf8_to_ucs4()
1060 * but does no error checking on the input.
1061 * str:
1062 * a UTF-8 encoded string
1063 * len:
1064 * the maximum length of str to use. If len < 0, then
1065 * the string is nul-terminated.
1066 * items_written:
1067 * location to store the number of characters in the
1068 * result, or NULL.
1069 * Returns:
1070 * a pointer to a newly allocated UCS-4 string.
1071 * This value must be freed with g_free().
1073 public static gunichar* utf8_ToUcs4_Fast(char[] str, int len, int* itemsWritten)
1075 // gunichar* g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written);
1076 return g_utf8_to_ucs4_fast(Str.toStringz(str), len, itemsWritten);
1080 * Convert a string from UTF-16 to UCS-4. The result will be
1081 * terminated with a 0 character.
1082 * str:
1083 * a UTF-16 encoded string
1084 * len:
1085 * the maximum length (number of gunichar2) of str to use.
1086 * If len < 0, then the string is terminated with a 0 character.
1087 * items_read:
1088 * location to store number of words read, or NULL.
1089 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1090 * returned in case str contains a trailing partial
1091 * character. If an error occurs then the index of the
1092 * invalid input is stored here.
1093 * items_written:
1094 * location to store number of characters written, or NULL.
1095 * The value stored here does not include the trailing
1096 * 0 character.
1097 * error:
1098 * location to store the error occuring, or NULL to ignore
1099 * errors. Any of the errors in GConvertError other than
1100 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1101 * Returns:
1102 * a pointer to a newly allocated UCS-4 string.
1103 * This value must be freed with g_free(). If an
1104 * error occurs, NULL will be returned and
1105 * error set.
1107 public static gunichar* utf16_ToUcs4(gunichar2* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1109 // gunichar* g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
1110 return g_utf16_to_ucs4(str, len, itemsRead, itemsWritten, error);
1114 * Convert a string from UTF-16 to UTF-8. The result will be
1115 * terminated with a 0 byte.
1116 * Note that the input is expected to be already in native endianness,
1117 * an initial byte-order-mark character is not handled specially.
1118 * g_convert() can be used to convert a byte buffer of UTF-16 data of
1119 * ambiguous endianess.
1120 * str:
1121 * a UTF-16 encoded string
1122 * len:
1123 * the maximum length (number of gunichar2) of str to use.
1124 * If len < 0, then the string is terminated with a 0 character.
1125 * items_read:
1126 * location to store number of words read, or NULL.
1127 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1128 * returned in case str contains a trailing partial
1129 * character. If an error occurs then the index of the
1130 * invalid input is stored here.
1131 * items_written:
1132 * location to store number of bytes written, or NULL.
1133 * The value stored here does not include the trailing
1134 * 0 byte.
1135 * error:
1136 * location to store the error occuring, or NULL to ignore
1137 * errors. Any of the errors in GConvertError other than
1138 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1139 * Returns:
1140 * a pointer to a newly allocated UTF-8 string.
1141 * This value must be freed with g_free(). If an
1142 * error occurs, NULL will be returned and
1143 * error set.
1145 public static char[] utf16_ToUtf8(gunichar2* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1147 // gchar* g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
1148 return Str.toString(g_utf16_to_utf8(str, len, itemsRead, itemsWritten, error) );
1152 * Convert a string from UCS-4 to UTF-16. A 0 character will be
1153 * added to the result after the converted text.
1154 * str:
1155 * a UCS-4 encoded string
1156 * len:
1157 * the maximum length (number of characters) of str to use.
1158 * If len < 0, then the string is terminated with a 0 character.
1159 * items_read:
1160 * location to store number of bytes read, or NULL.
1161 * If an error occurs then the index of the invalid input
1162 * is stored here.
1163 * items_written:
1164 * location to store number of gunichar2
1165 * written, or NULL. The value stored here does not
1166 * include the trailing 0.
1167 * error:
1168 * location to store the error occuring, or NULL to ignore
1169 * errors. Any of the errors in GConvertError other than
1170 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1171 * Returns:
1172 * a pointer to a newly allocated UTF-16 string.
1173 * This value must be freed with g_free(). If an
1174 * error occurs, NULL will be returned and
1175 * error set.
1177 public static gunichar2* ucs4_ToUtf16(gunichar* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1179 // gunichar2* g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
1180 return g_ucs4_to_utf16(str, len, itemsRead, itemsWritten, error);
1184 * Convert a string from a 32-bit fixed width representation as UCS-4.
1185 * to UTF-8. The result will be terminated with a 0 byte.
1186 * str:
1187 * a UCS-4 encoded string
1188 * len:
1189 * the maximum length (number of characters) of str to use.
1190 * If len < 0, then the string is terminated with a 0 character.
1191 * items_read:
1192 * location to store number of characters read, or NULL.
1193 * items_written:
1194 * location to store number of bytes written or NULL.
1195 * The value here stored does not include the trailing 0
1196 * byte.
1197 * error:
1198 * location to store the error occuring, or NULL to ignore
1199 * errors. Any of the errors in GConvertError other than
1200 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1201 * Returns:
1202 * a pointer to a newly allocated UTF-8 string.
1203 * This value must be freed with g_free(). If an
1204 * error occurs, NULL will be returned and
1205 * error set. In that case, items_read will be
1206 * set to the position of the first invalid input
1207 * character.
1209 public static char[] ucs4_ToUtf8(gunichar* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1211 // gchar* g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
1212 return Str.toString(g_ucs4_to_utf8(str, len, itemsRead, itemsWritten, error) );
1216 * Converts a single character to UTF-8.
1217 * c:
1218 * a Unicode character code
1219 * outbuf:
1220 * output buffer, must have at least 6 bytes of space.
1221 * If NULL, the length will be computed and returned
1222 * and nothing will be written to outbuf.
1223 * Returns:
1224 * number of bytes written
1225 * See Also
1226 * g_locale_to_utf8(), g_locale_from_utf8()
1227 * Convenience functions for converting between UTF-8 and the locale encoding.
1228 * [3] surrogate pairs
1230 public static int unicharToUtf8(gunichar c, char[] outbuf)
1232 // gint g_unichar_to_utf8 (gunichar c, gchar *outbuf);
1233 return g_unichar_to_utf8(c, Str.toStringz(outbuf));