2 * This file is part of duit.
4 * duit is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
9 * duit is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with duit; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 // generated automatically - do not change
20 // find conversion definition on APILookup.txt
21 // implement new conversion functionalities on the wrap.utils pakage
24 * Conversion parameters:
25 * inFile = glib-Unicode-Manipulation.html
52 private import glib
.glibtypes
;
54 private import lib
.glib
;
56 private import glib
.ErrorG
;
57 private import glib
.Str
;
61 * This section describes a number of functions for dealing with
62 * Unicode characters and strings. There are analogues of the
63 * traditional ctype.h character classification
64 * and case conversion functions, UTF-8 analogues of some string utility
65 * functions, functions to perform normalization, case conversion and
66 * collation on UTF-8 strings and finally functions to convert between
67 * the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
68 * The implementations of the Unicode functions in GLib are based
69 * on the Unicode Character Data tables, which are available from
71 * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
72 * GLib 2.12 supports Unicode 5.0.
83 * Checks whether ch is a valid Unicode character. Some possible
84 * integer values of ch will not be valid. 0 is considered a valid
85 * character, though it's normally a string terminator.
89 * TRUE if ch is a valid Unicode character
91 public static int unicharValidate(gunichar ch
)
93 // gboolean g_unichar_validate (gunichar ch);
94 return g_unichar_validate(ch
);
98 * Determines whether a character is alphanumeric.
99 * Given some UTF-8 text, obtain a character value
100 * with g_utf8_get_char().
102 * a Unicode character
104 * TRUE if c is an alphanumeric character
106 public static int unicharIsalnum(gunichar c
)
108 // gboolean g_unichar_isalnum (gunichar c);
109 return g_unichar_isalnum(c
);
113 * Determines whether a character is alphabetic (i.e. a letter).
114 * Given some UTF-8 text, obtain a character value with
117 * a Unicode character
119 * TRUE if c is an alphabetic character
121 public static int unicharIsalpha(gunichar c
)
123 // gboolean g_unichar_isalpha (gunichar c);
124 return g_unichar_isalpha(c
);
128 * Determines whether a character is a control character.
129 * Given some UTF-8 text, obtain a character value with
132 * a Unicode character
134 * TRUE if c is a control character
136 public static int unicharIscntrl(gunichar c
)
138 // gboolean g_unichar_iscntrl (gunichar c);
139 return g_unichar_iscntrl(c
);
143 * Determines whether a character is numeric (i.e. a digit). This
144 * covers ASCII 0-9 and also digits in other languages/scripts. Given
145 * some UTF-8 text, obtain a character value with g_utf8_get_char().
147 * a Unicode character
149 * TRUE if c is a digit
151 public static int unicharIsdigit(gunichar c
)
153 // gboolean g_unichar_isdigit (gunichar c);
154 return g_unichar_isdigit(c
);
158 * Determines whether a character is printable and not a space
159 * (returns FALSE for control characters, format characters, and
160 * spaces). g_unichar_isprint() is similar, but returns TRUE for
161 * spaces. Given some UTF-8 text, obtain a character value with
164 * a Unicode character
166 * TRUE if c is printable unless it's a space
168 public static int unicharIsgraph(gunichar c
)
170 // gboolean g_unichar_isgraph (gunichar c);
171 return g_unichar_isgraph(c
);
175 * Determines whether a character is a lowercase letter.
176 * Given some UTF-8 text, obtain a character value with
179 * a Unicode character
181 * TRUE if c is a lowercase letter
183 public static int unicharIslower(gunichar c
)
185 // gboolean g_unichar_islower (gunichar c);
186 return g_unichar_islower(c
);
190 * Determines whether a character is printable.
191 * Unlike g_unichar_isgraph(), returns TRUE for spaces.
192 * Given some UTF-8 text, obtain a character value with
195 * a Unicode character
197 * TRUE if c is printable
199 public static int unicharIsprint(gunichar c
)
201 // gboolean g_unichar_isprint (gunichar c);
202 return g_unichar_isprint(c
);
206 * Determines whether a character is punctuation or a symbol.
207 * Given some UTF-8 text, obtain a character value with
210 * a Unicode character
212 * TRUE if c is a punctuation or symbol character
214 public static int unicharIspunct(gunichar c
)
216 // gboolean g_unichar_ispunct (gunichar c);
217 return g_unichar_ispunct(c
);
221 * Determines whether a character is a space, tab, or line separator
222 * (newline, carriage return, etc.). Given some UTF-8 text, obtain a
223 * character value with g_utf8_get_char().
224 * (Note: don't use this to do word breaking; you have to use
225 * Pango or equivalent to get word breaking right, the algorithm
226 * is fairly complex.)
228 * a Unicode character
230 * TRUE if c is a space character
232 public static int unicharIsspace(gunichar c
)
234 // gboolean g_unichar_isspace (gunichar c);
235 return g_unichar_isspace(c
);
239 * Determines if a character is uppercase.
241 * a Unicode character
243 * TRUE if c is an uppercase character
245 public static int unicharIsupper(gunichar c
)
247 // gboolean g_unichar_isupper (gunichar c);
248 return g_unichar_isupper(c
);
252 * Determines if a character is a hexidecimal digit.
254 * a Unicode character.
256 * TRUE if the character is a hexadecimal digit
258 public static int unicharIsxdigit(gunichar c
)
260 // gboolean g_unichar_isxdigit (gunichar c);
261 return g_unichar_isxdigit(c
);
265 * Determines if a character is titlecase. Some characters in
266 * Unicode which are composites, such as the DZ digraph
267 * have three case variants instead of just two. The titlecase
268 * form is used at the beginning of a word where only the
269 * first letter is capitalized. The titlecase form of the DZ
270 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
272 * a Unicode character
274 * TRUE if the character is titlecase
276 public static int unicharIstitle(gunichar c
)
278 // gboolean g_unichar_istitle (gunichar c);
279 return g_unichar_istitle(c
);
283 * Determines if a given character is assigned in the Unicode
286 * a Unicode character
288 * TRUE if the character has an assigned value
290 public static int unicharIsdefined(gunichar c
)
292 // gboolean g_unichar_isdefined (gunichar c);
293 return g_unichar_isdefined(c
);
297 * Determines if a character is typically rendered in a double-width
300 * a Unicode character
302 * TRUE if the character is wide
304 public static int unicharIswide(gunichar c
)
306 // gboolean g_unichar_iswide (gunichar c);
307 return g_unichar_iswide(c
);
311 * Determines if a character is typically rendered in a double-width
312 * cell under legacy East Asian locales. If a character is wide according to
313 * g_unichar_iswide(), then it is also reported wide with this function, but
314 * the converse is not necessarily true. See the
316 * Annex 11 for details.
318 * a Unicode character
320 * TRUE if the character is wide in legacy East Asian locales
323 public static int unicharIswideCjk(gunichar c
)
325 // gboolean g_unichar_iswide_cjk (gunichar c);
326 return g_unichar_iswide_cjk(c
);
330 * Converts a character to uppercase.
332 * a Unicode character
334 * the result of converting c to uppercase.
335 * If c is not an lowercase or titlecase character,
336 * or has no upper case equivalent c is returned unchanged.
338 public static gunichar
unicharToupper(gunichar c
)
340 // gunichar g_unichar_toupper (gunichar c);
341 return g_unichar_toupper(c
);
345 * Converts a character to lower case.
347 * a Unicode character.
349 * the result of converting c to lower case.
350 * If c is not an upperlower or titlecase character,
351 * or has no lowercase equivalent c is returned unchanged.
353 public static gunichar
unicharTolower(gunichar c
)
355 // gunichar g_unichar_tolower (gunichar c);
356 return g_unichar_tolower(c
);
360 * Converts a character to the titlecase.
362 * a Unicode character
364 * the result of converting c to titlecase.
365 * If c is not an uppercase or lowercase character,
366 * c is returned unchanged.
368 public static gunichar
unicharTotitle(gunichar c
)
370 // gunichar g_unichar_totitle (gunichar c);
371 return g_unichar_totitle(c
);
375 * Determines the numeric value of a character as a decimal
378 * a Unicode character
380 * If c is a decimal digit (according to
381 * g_unichar_isdigit()), its numeric value. Otherwise, -1.
383 public static int unicharDigitValue(gunichar c
)
385 // gint g_unichar_digit_value (gunichar c);
386 return g_unichar_digit_value(c
);
390 * Determines the numeric value of a character as a hexidecimal
393 * a Unicode character
395 * If c is a hex digit (according to
396 * g_unichar_isxdigit()), its numeric value. Otherwise, -1.
398 public static int unicharXdigitValue(gunichar c
)
400 // gint g_unichar_xdigit_value (gunichar c);
401 return g_unichar_xdigit_value(c
);
406 * Classifies a Unicode character by type.
408 * a Unicode character
410 * the type of the character.
412 public static GUnicodeType
unicharType(gunichar c
)
414 // GUnicodeType g_unichar_type (gunichar c);
415 return g_unichar_type(c
);
420 * Determines the break type of c. c should be a Unicode character
421 * (to derive a character from UTF-8 encoded text, use
422 * g_utf8_get_char()). The break type is used to find word and line
423 * breaks ("text boundaries"), Pango implements the Unicode boundary
424 * resolution algorithms and normally you would use a function such
425 * as pango_break() instead of caring about break types yourself.
427 * a Unicode character
429 * the break type of c
431 public static GUnicodeBreakType
unicharBreakType(gunichar c
)
433 // GUnicodeBreakType g_unichar_break_type (gunichar c);
434 return g_unichar_break_type(c
);
438 * Computes the canonical ordering of a string in-place.
439 * This rearranges decomposed characters in the string
440 * according to their combining classes. See the Unicode
441 * manual for more information.
443 * a UCS-4 encoded string.
445 * the maximum length of string to use.
447 public static void unicodeCanonicalOrdering(gunichar
* string
, uint len
)
449 // void g_unicode_canonical_ordering (gunichar *string, gsize len);
450 g_unicode_canonical_ordering(string
, len
);
454 * Computes the canonical decomposition of a Unicode character.
456 * a Unicode character.
458 * location to store the length of the return value.
460 * a newly allocated string of Unicode characters.
461 * result_len is set to the resulting length of the string.
463 public static gunichar
* unicodeCanonicalDecomposition(gunichar ch
, uint* resultLen
)
465 // gunichar* g_unicode_canonical_decomposition (gunichar ch, gsize *result_len);
466 return g_unicode_canonical_decomposition(ch
, resultLen
);
470 * In Unicode, some characters are mirrored. This
471 * means that their images are mirrored horizontally in text that is laid
472 * out from right to left. For instance, "(" would become its mirror image,
473 * ")", in right-to-left text.
474 * If ch has the Unicode mirrored property and there is another unicode
475 * character that typically has a glyph that is the mirror image of ch's
476 * glyph and mirrored_ch is set, it puts that character in the address
477 * pointed to by mirrored_ch. Otherwise the original character is put.
479 * a Unicode character
481 * location to store the mirrored character
483 * TRUE if ch has a mirrored character, FALSE otherwise
486 public static int unicharGetMirrorChar(gunichar ch
, gunichar
* mirroredCh
)
488 // gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch);
489 return g_unichar_get_mirror_char(ch
, mirroredCh
);
494 * Looks up the GUnicodeScript for a particular character (as defined
495 * by Unicode Standard Annex 24). No check is made for ch being a
496 * valid Unicode character; if you pass in invalid character, the
497 * result is undefined.
499 * a Unicode character
501 * the GUnicodeScript for the character.
504 public static GUnicodeScript
unicharGetScript(gunichar ch
)
506 // GUnicodeScript g_unichar_get_script (gunichar ch);
507 return g_unichar_get_script(ch
);
512 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
513 * If p does not point to a valid UTF-8 encoded character, results are
514 * undefined. If you are not sure that the bytes are complete
515 * valid Unicode characters, you should use g_utf8_get_char_validated()
518 * a pointer to Unicode character encoded as UTF-8
520 * the resulting character
522 public static gunichar
utf8_GetChar(char[] p
)
524 // gunichar g_utf8_get_char (const gchar *p);
525 return g_utf8_get_char(Str
.toStringz(p
));
529 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
530 * This function checks for incomplete characters, for invalid characters
531 * such as characters that are out of the range of Unicode, and for
532 * overlong encodings of valid characters.
534 * a pointer to Unicode character encoded as UTF-8
536 * the maximum number of bytes to read, or -1, for no maximum.
538 * the resulting character. If p points to a partial
539 * sequence at the end of a string that could begin a valid
540 * character, returns (gunichar)-2; otherwise, if p does not point
541 * to a valid UTF-8 encoded Unicode character, returns (gunichar)-1.
543 public static gunichar
utf8_GetCharValidated(char[] p
, int maxLen
)
545 // gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len);
546 return g_utf8_get_char_validated(Str
.toStringz(p
), maxLen
);
550 * Converts from an integer character offset to a pointer to a position
552 * Since 2.10, this function allows to pass a negative offset to
553 * step backwards. It is usually worth stepping backwards from the end
554 * instead of forwards if offset is in the last fourth of the string,
555 * since moving forward is about 3 times faster than moving backward.
557 * a UTF-8 encoded string
559 * a character offset within str
561 * the resulting pointer
563 public static char[] utf8_OffsetToPointer(char[] str, int offset
)
565 // gchar* g_utf8_offset_to_pointer (const gchar *str, glong offset);
566 return Str
.toString(g_utf8_offset_to_pointer(Str
.toStringz(str), offset
) );
570 * Converts from a pointer to position within a string to a integer
572 * Since 2.10, this function allows pos to be before str, and returns
573 * a negative offset in this case.
575 * a UTF-8 encoded string
577 * a pointer to a position within str
579 * the resulting character offset
581 public static int utf8_PointerToOffset(char[] str, char[] pos
)
583 // glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos);
584 return g_utf8_pointer_to_offset(Str
.toStringz(str), Str
.toStringz(pos
));
588 * Finds the previous UTF-8 character in the string before p.
589 * p does not have to be at the beginning of a UTF-8 character. No check
590 * is made to see if the character found is actually valid other than
591 * it starts with an appropriate byte. If p might be the first
592 * character of the string, you must use g_utf8_find_prev_char() instead.
594 * a pointer to a position within a UTF-8 encoded string
596 * a pointer to the found character.
598 public static char[] utf8_PrevChar(char[] p
)
600 // gchar* g_utf8_prev_char (const gchar *p);
601 return Str
.toString(g_utf8_prev_char(Str
.toStringz(p
)) );
605 * Finds the start of the next UTF-8 character in the string after p.
606 * p does not have to be at the beginning of a UTF-8 character. No check
607 * is made to see if the character found is actually valid other than
608 * it starts with an appropriate byte.
610 * a pointer to a position within a UTF-8 encoded string
612 * a pointer to the end of the string, or NULL to indicate
613 * that the string is nul-terminated, in which case
614 * the returned value will be
616 * a pointer to the found character or NULL
618 public static char[] utf8_FindNextChar(char[] p
, char[] end
)
620 // gchar* g_utf8_find_next_char (const gchar *p, const gchar *end);
621 return Str
.toString(g_utf8_find_next_char(Str
.toStringz(p
), Str
.toStringz(end
)) );
625 * Given a position p with a UTF-8 encoded string str, find the start
626 * of the previous UTF-8 character starting before p. Returns NULL if no
627 * UTF-8 characters are present in str before p.
628 * p does not have to be at the beginning of a UTF-8 character. No check
629 * is made to see if the character found is actually valid other than
630 * it starts with an appropriate byte.
632 * pointer to the beginning of a UTF-8 encoded string
634 * pointer to some position within str
636 * a pointer to the found character or NULL.
638 public static char[] utf8_FindPrevChar(char[] str, char[] p
)
640 // gchar* g_utf8_find_prev_char (const gchar *str, const gchar *p);
641 return Str
.toString(g_utf8_find_prev_char(Str
.toStringz(str), Str
.toStringz(p
)) );
645 * Returns the length of the string in characters.
647 * pointer to the start of a UTF-8 encoded string.
649 * the maximum number of bytes to examine. If max
650 * is less than 0, then the string is assumed to be
651 * nul-terminated. If max is 0, p will not be examined and
654 * the length of the string in characters
656 public static int utf8_Strlen(char[] p
, int max
)
658 // glong g_utf8_strlen (const gchar *p, gssize max);
659 return g_utf8_strlen(Str
.toStringz(p
), max
);
663 * Like the standard C strncpy() function, but
664 * copies a given number of characters instead of a given number of
665 * bytes. The src string must be valid UTF-8 encoded text.
666 * (Use g_utf8_validate() on all text before trying to use UTF-8
667 * utility functions with it.)
669 * buffer to fill with characters from src
671 * UTF-8 encoded string
677 public static char[] utf8_Strncpy(char[] dest
, char[] src
, uint n
)
679 // gchar* g_utf8_strncpy (gchar *dest, const gchar *src, gsize n);
680 return Str
.toString(g_utf8_strncpy(Str
.toStringz(dest
), Str
.toStringz(src
), n
) );
684 * Finds the leftmost occurrence of the given Unicode character
685 * in a UTF-8 encoded string, while limiting the search to len bytes.
686 * If len is -1, allow unbounded search.
688 * a nul-terminated UTF-8 encoded string
690 * the maximum length of p
692 * a Unicode character
694 * NULL if the string does not contain the character,
695 * otherwise, a pointer to the start of the leftmost occurrence of
696 * the character in the string.
698 public static char[] utf8_Strchr(char[] p
, int len
, gunichar c
)
700 // gchar* g_utf8_strchr (const gchar *p, gssize len, gunichar c);
701 return Str
.toString(g_utf8_strchr(Str
.toStringz(p
), len
, c
) );
705 * Find the rightmost occurrence of the given Unicode character
706 * in a UTF-8 encoded string, while limiting the search to len bytes.
707 * If len is -1, allow unbounded search.
709 * a nul-terminated UTF-8 encoded string
711 * the maximum length of p
713 * a Unicode character
715 * NULL if the string does not contain the character,
716 * otherwise, a pointer to the start of the rightmost occurrence of the
717 * character in the string.
719 public static char[] utf8_Strrchr(char[] p
, int len
, gunichar c
)
721 // gchar* g_utf8_strrchr (const gchar *p, gssize len, gunichar c);
722 return Str
.toString(g_utf8_strrchr(Str
.toStringz(p
), len
, c
) );
726 * Reverses a UTF-8 string. str must be valid UTF-8 encoded text.
727 * (Use g_utf8_validate() on all text before trying to use UTF-8
728 * utility functions with it.)
729 * Note that unlike g_strreverse(), this function returns
730 * newly-allocated memory, which should be freed with g_free() when
733 * a UTF-8 encoded string
735 * the maximum length of str to use. If len < 0, then
736 * the string is nul-terminated.
738 * a newly-allocated string which is the reverse of str.
741 public static char[] utf8_Strreverse(char[] str, int len
)
743 // gchar* g_utf8_strreverse (const gchar *str, gssize len);
744 return Str
.toString(g_utf8_strreverse(Str
.toStringz(str), len
) );
748 * Validates UTF-8 encoded text. str is the text to validate;
749 * if str is nul-terminated, then max_len can be -1, otherwise
750 * max_len should be the number of bytes to validate.
751 * If end is non-NULL, then the end of the valid range
752 * will be stored there (i.e. the start of the first invalid
753 * character if some bytes were invalid, or the end of the text
754 * being validated otherwise).
755 * Note that g_utf8_validate() returns FALSE if max_len is
756 * positive and NUL is met before max_len bytes have been read.
757 * Returns TRUE if all of str was valid. Many GLib and GTK+
758 * routines require valid UTF-8 as input;
759 * so data read from a file or the network should be checked
760 * with g_utf8_validate() before doing anything else with it.
762 * a pointer to character data
764 * max bytes to validate, or -1 to go until NUL
766 * return location for end of valid data
768 * TRUE if the text was valid UTF-8
770 public static int utf8_Validate(char[] str, int maxLen
, char** end
)
772 // gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end);
773 return g_utf8_validate(Str
.toStringz(str), maxLen
, end
);
777 * Converts all Unicode characters in the string that have a case
778 * to uppercase. The exact manner that this is done depends
779 * on the current locale, and may result in the number of
780 * characters in the string increasing. (For instance, the
781 * German ess-zet will be changed to SS.)
783 * a UTF-8 encoded string
785 * length of str, in bytes, or -1 if str is nul-terminated.
787 * a newly allocated string, with all characters
788 * converted to uppercase.
790 public static char[] utf8_Strup(char[] str, int len
)
792 // gchar* g_utf8_strup (const gchar *str, gssize len);
793 return Str
.toString(g_utf8_strup(Str
.toStringz(str), len
) );
797 * Converts all Unicode characters in the string that have a case
798 * to lowercase. The exact manner that this is done depends
799 * on the current locale, and may result in the number of
800 * characters in the string changing.
802 * a UTF-8 encoded string
804 * length of str, in bytes, or -1 if str is nul-terminated.
806 * a newly allocated string, with all characters
807 * converted to lowercase.
809 public static char[] utf8_Strdown(char[] str, int len
)
811 // gchar* g_utf8_strdown (const gchar *str, gssize len);
812 return Str
.toString(g_utf8_strdown(Str
.toStringz(str), len
) );
816 * Converts a string into a form that is independent of case. The
817 * result will not correspond to any particular case, but can be
818 * compared for equality or ordered with the results of calling
819 * g_utf8_casefold() on other strings.
820 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
821 * only an approximation to the correct linguistic case insensitive
822 * ordering, though it is a fairly good one. Getting this exactly
823 * right would require a more sophisticated collation function that
824 * takes case sensitivity into account. GLib does not currently
825 * provide such a function.
827 * a UTF-8 encoded string
829 * length of str, in bytes, or -1 if str is nul-terminated.
831 * a newly allocated string, that is a
832 * case independent form of str.
834 public static char[] utf8_Casefold(char[] str, int len
)
836 // gchar* g_utf8_casefold (const gchar *str, gssize len);
837 return Str
.toString(g_utf8_casefold(Str
.toStringz(str), len
) );
841 * Converts a string into canonical form, standardizing
842 * such issues as whether a character with an accent
843 * is represented as a base character and combining
844 * accent or as a single precomposed character. You
845 * should generally call g_utf8_normalize() before
846 * comparing two Unicode strings.
847 * The normalization mode G_NORMALIZE_DEFAULT only
848 * standardizes differences that do not affect the
849 * text content, such as the above-mentioned accent
850 * representation. G_NORMALIZE_ALL also standardizes
851 * the "compatibility" characters in Unicode, such
852 * as SUPERSCRIPT THREE to the standard forms
853 * (in this case DIGIT THREE). Formatting information
854 * may be lost but for most text operations such
855 * characters should be considered the same.
856 * For example, g_utf8_collate() normalizes
857 * with G_NORMALIZE_ALL as its first step.
858 * G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE
859 * are like G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL,
860 * but returned a result with composed forms rather
861 * than a maximally decomposed form. This is often
862 * useful if you intend to convert the string to
863 * a legacy encoding or pass it to a system with
864 * less capable Unicode handling.
866 * a UTF-8 encoded string.
868 * length of str, in bytes, or -1 if str is nul-terminated.
870 * the type of normalization to perform.
872 * a newly allocated string, that is the
873 * normalized form of str.
875 public static char[] utf8_Normalize(char[] str, int len
, GNormalizeMode mode
)
877 // gchar* g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode);
878 return Str
.toString(g_utf8_normalize(Str
.toStringz(str), len
, mode
) );
883 * Compares two strings for ordering using the linguistically
884 * correct rules for the current locale. When sorting a large
885 * number of strings, it will be significantly faster to
886 * obtain collation keys with g_utf8_collate_key() and
887 * compare the keys with strcmp() when
888 * sorting instead of sorting the original strings.
890 * a UTF-8 encoded string
892 * a UTF-8 encoded string
894 * < 0 if str1 compares before str2,
895 * 0 if they compare equal, > 0 if str1 compares after str2.
897 public static int utf8_Collate(char[] str1
, char[] str2
)
899 // gint g_utf8_collate (const gchar *str1, const gchar *str2);
900 return g_utf8_collate(Str
.toStringz(str1
), Str
.toStringz(str2
));
904 * Converts a string into a collation key that can be compared
905 * with other collation keys produced by the same function using
907 * The results of comparing the collation keys of two strings
908 * with strcmp() will always be the same as
909 * comparing the two original keys with g_utf8_collate().
911 * a UTF-8 encoded string.
913 * length of str, in bytes, or -1 if str is nul-terminated.
915 * a newly allocated string. This string should
916 * be freed with g_free() when you are done with it.
918 public static char[] utf8_CollateKey(char[] str, int len
)
920 // gchar* g_utf8_collate_key (const gchar *str, gssize len);
921 return Str
.toString(g_utf8_collate_key(Str
.toStringz(str), len
) );
925 * Converts a string into a collation key that can be compared
926 * with other collation keys produced by the same function using strcmp().
927 * In order to sort filenames correctly, this function treats the dot '.'
928 * as a special case. Most dictionary orderings seem to consider it
929 * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
930 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
931 * would like to treat numbers intelligently so that "file1" "file10" "file5"
932 * is sorted as "file1" "file5" "file10".
934 * a UTF-8 encoded string.
936 * length of str, in bytes, or -1 if str is nul-terminated.
938 * a newly allocated string. This string should
939 * be freed with g_free() when you are done with it.
942 public static char[] utf8_CollateKeyForFilename(char[] str, int len
)
944 // gchar* g_utf8_collate_key_for_filename (const gchar *str, gssize len);
945 return Str
.toString(g_utf8_collate_key_for_filename(Str
.toStringz(str), len
) );
949 * Convert a string from UTF-8 to UTF-16. A 0 character will be
950 * added to the result after the converted text.
952 * a UTF-8 encoded string
954 * the maximum length (number of characters) of str to use.
955 * If len < 0, then the string is nul-terminated.
957 * location to store number of bytes read, or NULL.
958 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
959 * returned in case str contains a trailing partial
960 * character. If an error occurs then the index of the
961 * invalid input is stored here.
963 * location to store number of gunichar2 written,
965 * The value stored here does not include the trailing 0.
967 * location to store the error occuring, or NULL to ignore
968 * errors. Any of the errors in GConvertError other than
969 * G_CONVERT_ERROR_NO_CONVERSION may occur.
971 * a pointer to a newly allocated UTF-16 string.
972 * This value must be freed with g_free(). If an
973 * error occurs, NULL will be returned and
976 public static gunichar2
* utf8_ToUtf16(char[] str, int len
, int* itemsRead
, int* itemsWritten
, GError
** error
)
978 // gunichar2* g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error);
979 return g_utf8_to_utf16(Str
.toStringz(str), len
, itemsRead
, itemsWritten
, error
);
983 * Convert a string from UTF-8 to a 32-bit fixed width
984 * representation as UCS-4. A trailing 0 will be added to the
985 * string after the converted text.
987 * a UTF-8 encoded string
989 * the maximum length of str to use. If len < 0, then
990 * the string is nul-terminated.
992 * location to store number of bytes read, or NULL.
993 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
994 * returned in case str contains a trailing partial
995 * character. If an error occurs then the index of the
996 * invalid input is stored here.
998 * location to store number of characters written or NULL.
999 * The value here stored does not include the trailing 0
1002 * location to store the error occuring, or NULL to ignore
1003 * errors. Any of the errors in GConvertError other than
1004 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1006 * a pointer to a newly allocated UCS-4 string.
1007 * This value must be freed with g_free(). If an
1008 * error occurs, NULL will be returned and
1011 public static gunichar
* utf8_ToUcs4(char[] str, int len
, int* itemsRead
, int* itemsWritten
, GError
** error
)
1013 // gunichar* g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error);
1014 return g_utf8_to_ucs4(Str
.toStringz(str), len
, itemsRead
, itemsWritten
, error
);
1018 * Convert a string from UTF-8 to a 32-bit fixed width
1019 * representation as UCS-4, assuming valid UTF-8 input.
1020 * This function is roughly twice as fast as g_utf8_to_ucs4()
1021 * but does no error checking on the input.
1023 * a UTF-8 encoded string
1025 * the maximum length of str to use. If len < 0, then
1026 * the string is nul-terminated.
1028 * location to store the number of characters in the
1031 * a pointer to a newly allocated UCS-4 string.
1032 * This value must be freed with g_free().
1034 public static gunichar
* utf8_ToUcs4_Fast(char[] str, int len
, int* itemsWritten
)
1036 // gunichar* g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written);
1037 return g_utf8_to_ucs4_fast(Str
.toStringz(str), len
, itemsWritten
);
1041 * Convert a string from UTF-16 to UCS-4. The result will be
1042 * terminated with a 0 character.
1044 * a UTF-16 encoded string
1046 * the maximum length (number of gunichar2) of str to use.
1047 * If len < 0, then the string is terminated with a 0 character.
1049 * location to store number of words read, or NULL.
1050 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1051 * returned in case str contains a trailing partial
1052 * character. If an error occurs then the index of the
1053 * invalid input is stored here.
1055 * location to store number of characters written, or NULL.
1056 * The value stored here does not include the trailing
1059 * location to store the error occuring, or NULL to ignore
1060 * errors. Any of the errors in GConvertError other than
1061 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1063 * a pointer to a newly allocated UCS-4 string.
1064 * This value must be freed with g_free(). If an
1065 * error occurs, NULL will be returned and
1068 public static gunichar
* utf16_ToUcs4(gunichar2
* str, int len
, int* itemsRead
, int* itemsWritten
, GError
** error
)
1070 // gunichar* g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
1071 return g_utf16_to_ucs4(str, len
, itemsRead
, itemsWritten
, error
);
1075 * Convert a string from UTF-16 to UTF-8. The result will be
1076 * terminated with a 0 byte.
1077 * Note that the input is expected to be already in native endianness,
1078 * an initial byte-order-mark character is not handled specially.
1079 * g_convert() can be used to convert a byte buffer of UTF-16 data of
1080 * ambiguous endianess.
1082 * a UTF-16 encoded string
1084 * the maximum length (number of gunichar2) of str to use.
1085 * If len < 0, then the string is terminated with a 0 character.
1087 * location to store number of words read, or NULL.
1088 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1089 * returned in case str contains a trailing partial
1090 * character. If an error occurs then the index of the
1091 * invalid input is stored here.
1093 * location to store number of bytes written, or NULL.
1094 * The value stored here does not include the trailing
1097 * location to store the error occuring, or NULL to ignore
1098 * errors. Any of the errors in GConvertError other than
1099 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1101 * a pointer to a newly allocated UTF-8 string.
1102 * This value must be freed with g_free(). If an
1103 * error occurs, NULL will be returned and
1106 public static char[] utf16_ToUtf8(gunichar2
* str, int len
, int* itemsRead
, int* itemsWritten
, GError
** error
)
1108 // gchar* g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
1109 return Str
.toString(g_utf16_to_utf8(str, len
, itemsRead
, itemsWritten
, error
) );
1113 * Convert a string from UCS-4 to UTF-16. A 0 character will be
1114 * added to the result after the converted text.
1116 * a UCS-4 encoded string
1118 * the maximum length (number of characters) of str to use.
1119 * If len < 0, then the string is terminated with a 0 character.
1121 * location to store number of bytes read, or NULL.
1122 * If an error occurs then the index of the invalid input
1125 * location to store number of gunichar2
1126 * written, or NULL. The value stored here does not
1127 * include the trailing 0.
1129 * location to store the error occuring, or NULL to ignore
1130 * errors. Any of the errors in GConvertError other than
1131 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1133 * a pointer to a newly allocated UTF-16 string.
1134 * This value must be freed with g_free(). If an
1135 * error occurs, NULL will be returned and
1138 public static gunichar2
* ucs4_ToUtf16(gunichar
* str, int len
, int* itemsRead
, int* itemsWritten
, GError
** error
)
1140 // gunichar2* g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
1141 return g_ucs4_to_utf16(str, len
, itemsRead
, itemsWritten
, error
);
1145 * Convert a string from a 32-bit fixed width representation as UCS-4.
1146 * to UTF-8. The result will be terminated with a 0 byte.
1148 * a UCS-4 encoded string
1150 * the maximum length (number of characters) of str to use.
1151 * If len < 0, then the string is terminated with a 0 character.
1153 * location to store number of characters read, or NULL.
1155 * location to store number of bytes written or NULL.
1156 * The value here stored does not include the trailing 0
1159 * location to store the error occuring, or NULL to ignore
1160 * errors. Any of the errors in GConvertError other than
1161 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1163 * a pointer to a newly allocated UTF-8 string.
1164 * This value must be freed with g_free(). If an
1165 * error occurs, NULL will be returned and
1166 * error set. In that case, items_read will be
1167 * set to the position of the first invalid input
1170 public static char[] ucs4_ToUtf8(gunichar
* str, int len
, int* itemsRead
, int* itemsWritten
, GError
** error
)
1172 // gchar* g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
1173 return Str
.toString(g_ucs4_to_utf8(str, len
, itemsRead
, itemsWritten
, error
) );
1177 * Converts a single character to UTF-8.
1179 * a Unicode character code
1181 * output buffer, must have at least 6 bytes of space.
1182 * If NULL, the length will be computed and returned
1183 * and nothing will be written to outbuf.
1185 * number of bytes written
1187 * g_locale_to_utf8(), g_locale_from_utf8()
1188 * Convenience functions for converting between UTF-8 and the locale encoding.
1189 * [3] surrogate pairs
1191 public static int unicharToUtf8(gunichar c
, char[] outbuf
)
1193 // gint g_unichar_to_utf8 (gunichar c, gchar *outbuf);
1194 return g_unichar_to_utf8(c
, Str
.toStringz(outbuf
));