alternative to assert
[gtkD.git] / src / glib / Unicode.d
blob7cd86777fa53388cd2dbc5b655fc9ff63fa562c6
1 /*
2 * This file is part of duit.
4 * duit is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 2.1 of the License, or
7 * (at your option) any later version.
9 * duit is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with duit; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 // generated automatically - do not change
20 // find conversion definition on APILookup.txt
21 // implement new conversion functionalities on the wrap.utils pakage
24 * Conversion parameters:
25 * inFile = glib-Unicode-Manipulation.html
26 * outPack = glib
27 * outFile = Unicode
28 * strct =
29 * realStrct=
30 * ctorStrct=
31 * clss = Unicode
32 * interf =
33 * class Code: No
34 * interface Code: No
35 * template for:
36 * extend =
37 * implements:
38 * prefixes:
39 * - g_
40 * omit structs:
41 * omit prefixes:
42 * omit code:
43 * imports:
44 * - glib.ErrorG
45 * - glib.Str
46 * structWrap:
47 * local aliases:
50 module glib.Unicode;
52 private import glib.glibtypes;
54 private import lib.glib;
56 private import glib.ErrorG;
57 private import glib.Str;
59 /**
60 * Description
61 * This section describes a number of functions for dealing with
62 * Unicode characters and strings. There are analogues of the
63 * traditional ctype.h character classification
64 * and case conversion functions, UTF-8 analogues of some string utility
65 * functions, functions to perform normalization, case conversion and
66 * collation on UTF-8 strings and finally functions to convert between
67 * the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
68 * The implementations of the Unicode functions in GLib are based
69 * on the Unicode Character Data tables, which are available from
70 * www.unicode.org.
71 * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
72 * GLib 2.12 supports Unicode 5.0.
74 public class Unicode
77 /**
82 /**
83 * Checks whether ch is a valid Unicode character. Some possible
84 * integer values of ch will not be valid. 0 is considered a valid
85 * character, though it's normally a string terminator.
86 * ch:
87 * a Unicode character
88 * Returns:
89 * TRUE if ch is a valid Unicode character
91 public static int unicharValidate(gunichar ch)
93 // gboolean g_unichar_validate (gunichar ch);
94 return g_unichar_validate(ch);
97 /**
98 * Determines whether a character is alphanumeric.
99 * Given some UTF-8 text, obtain a character value
100 * with g_utf8_get_char().
101 * c:
102 * a Unicode character
103 * Returns:
104 * TRUE if c is an alphanumeric character
106 public static int unicharIsalnum(gunichar c)
108 // gboolean g_unichar_isalnum (gunichar c);
109 return g_unichar_isalnum(c);
113 * Determines whether a character is alphabetic (i.e. a letter).
114 * Given some UTF-8 text, obtain a character value with
115 * g_utf8_get_char().
116 * c:
117 * a Unicode character
118 * Returns:
119 * TRUE if c is an alphabetic character
121 public static int unicharIsalpha(gunichar c)
123 // gboolean g_unichar_isalpha (gunichar c);
124 return g_unichar_isalpha(c);
128 * Determines whether a character is a control character.
129 * Given some UTF-8 text, obtain a character value with
130 * g_utf8_get_char().
131 * c:
132 * a Unicode character
133 * Returns:
134 * TRUE if c is a control character
136 public static int unicharIscntrl(gunichar c)
138 // gboolean g_unichar_iscntrl (gunichar c);
139 return g_unichar_iscntrl(c);
143 * Determines whether a character is numeric (i.e. a digit). This
144 * covers ASCII 0-9 and also digits in other languages/scripts. Given
145 * some UTF-8 text, obtain a character value with g_utf8_get_char().
146 * c:
147 * a Unicode character
148 * Returns:
149 * TRUE if c is a digit
151 public static int unicharIsdigit(gunichar c)
153 // gboolean g_unichar_isdigit (gunichar c);
154 return g_unichar_isdigit(c);
158 * Determines whether a character is printable and not a space
159 * (returns FALSE for control characters, format characters, and
160 * spaces). g_unichar_isprint() is similar, but returns TRUE for
161 * spaces. Given some UTF-8 text, obtain a character value with
162 * g_utf8_get_char().
163 * c:
164 * a Unicode character
165 * Returns:
166 * TRUE if c is printable unless it's a space
168 public static int unicharIsgraph(gunichar c)
170 // gboolean g_unichar_isgraph (gunichar c);
171 return g_unichar_isgraph(c);
175 * Determines whether a character is a lowercase letter.
176 * Given some UTF-8 text, obtain a character value with
177 * g_utf8_get_char().
178 * c:
179 * a Unicode character
180 * Returns:
181 * TRUE if c is a lowercase letter
183 public static int unicharIslower(gunichar c)
185 // gboolean g_unichar_islower (gunichar c);
186 return g_unichar_islower(c);
190 * Determines whether a character is printable.
191 * Unlike g_unichar_isgraph(), returns TRUE for spaces.
192 * Given some UTF-8 text, obtain a character value with
193 * g_utf8_get_char().
194 * c:
195 * a Unicode character
196 * Returns:
197 * TRUE if c is printable
199 public static int unicharIsprint(gunichar c)
201 // gboolean g_unichar_isprint (gunichar c);
202 return g_unichar_isprint(c);
206 * Determines whether a character is punctuation or a symbol.
207 * Given some UTF-8 text, obtain a character value with
208 * g_utf8_get_char().
209 * c:
210 * a Unicode character
211 * Returns:
212 * TRUE if c is a punctuation or symbol character
214 public static int unicharIspunct(gunichar c)
216 // gboolean g_unichar_ispunct (gunichar c);
217 return g_unichar_ispunct(c);
221 * Determines whether a character is a space, tab, or line separator
222 * (newline, carriage return, etc.). Given some UTF-8 text, obtain a
223 * character value with g_utf8_get_char().
224 * (Note: don't use this to do word breaking; you have to use
225 * Pango or equivalent to get word breaking right, the algorithm
226 * is fairly complex.)
227 * c:
228 * a Unicode character
229 * Returns:
230 * TRUE if c is a space character
232 public static int unicharIsspace(gunichar c)
234 // gboolean g_unichar_isspace (gunichar c);
235 return g_unichar_isspace(c);
239 * Determines if a character is uppercase.
240 * c:
241 * a Unicode character
242 * Returns:
243 * TRUE if c is an uppercase character
245 public static int unicharIsupper(gunichar c)
247 // gboolean g_unichar_isupper (gunichar c);
248 return g_unichar_isupper(c);
252 * Determines if a character is a hexidecimal digit.
253 * c:
254 * a Unicode character.
255 * Returns:
256 * TRUE if the character is a hexadecimal digit
258 public static int unicharIsxdigit(gunichar c)
260 // gboolean g_unichar_isxdigit (gunichar c);
261 return g_unichar_isxdigit(c);
265 * Determines if a character is titlecase. Some characters in
266 * Unicode which are composites, such as the DZ digraph
267 * have three case variants instead of just two. The titlecase
268 * form is used at the beginning of a word where only the
269 * first letter is capitalized. The titlecase form of the DZ
270 * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
271 * c:
272 * a Unicode character
273 * Returns:
274 * TRUE if the character is titlecase
276 public static int unicharIstitle(gunichar c)
278 // gboolean g_unichar_istitle (gunichar c);
279 return g_unichar_istitle(c);
283 * Determines if a given character is assigned in the Unicode
284 * standard.
285 * c:
286 * a Unicode character
287 * Returns:
288 * TRUE if the character has an assigned value
290 public static int unicharIsdefined(gunichar c)
292 // gboolean g_unichar_isdefined (gunichar c);
293 return g_unichar_isdefined(c);
297 * Determines if a character is typically rendered in a double-width
298 * cell.
299 * c:
300 * a Unicode character
301 * Returns:
302 * TRUE if the character is wide
304 public static int unicharIswide(gunichar c)
306 // gboolean g_unichar_iswide (gunichar c);
307 return g_unichar_iswide(c);
311 * Determines if a character is typically rendered in a double-width
312 * cell under legacy East Asian locales. If a character is wide according to
313 * g_unichar_iswide(), then it is also reported wide with this function, but
314 * the converse is not necessarily true. See the
315 * Unicode Standard
316 * Annex 11 for details.
317 * c:
318 * a Unicode character
319 * Returns:
320 * TRUE if the character is wide in legacy East Asian locales
321 * Since 2.12
323 public static int unicharIswideCjk(gunichar c)
325 // gboolean g_unichar_iswide_cjk (gunichar c);
326 return g_unichar_iswide_cjk(c);
330 * Converts a character to uppercase.
331 * c:
332 * a Unicode character
333 * Returns:
334 * the result of converting c to uppercase.
335 * If c is not an lowercase or titlecase character,
336 * or has no upper case equivalent c is returned unchanged.
338 public static gunichar unicharToupper(gunichar c)
340 // gunichar g_unichar_toupper (gunichar c);
341 return g_unichar_toupper(c);
345 * Converts a character to lower case.
346 * c:
347 * a Unicode character.
348 * Returns:
349 * the result of converting c to lower case.
350 * If c is not an upperlower or titlecase character,
351 * or has no lowercase equivalent c is returned unchanged.
353 public static gunichar unicharTolower(gunichar c)
355 // gunichar g_unichar_tolower (gunichar c);
356 return g_unichar_tolower(c);
360 * Converts a character to the titlecase.
361 * c:
362 * a Unicode character
363 * Returns:
364 * the result of converting c to titlecase.
365 * If c is not an uppercase or lowercase character,
366 * c is returned unchanged.
368 public static gunichar unicharTotitle(gunichar c)
370 // gunichar g_unichar_totitle (gunichar c);
371 return g_unichar_totitle(c);
375 * Determines the numeric value of a character as a decimal
376 * digit.
377 * c:
378 * a Unicode character
379 * Returns:
380 * If c is a decimal digit (according to
381 * g_unichar_isdigit()), its numeric value. Otherwise, -1.
383 public static int unicharDigitValue(gunichar c)
385 // gint g_unichar_digit_value (gunichar c);
386 return g_unichar_digit_value(c);
390 * Determines the numeric value of a character as a hexidecimal
391 * digit.
392 * c:
393 * a Unicode character
394 * Returns:
395 * If c is a hex digit (according to
396 * g_unichar_isxdigit()), its numeric value. Otherwise, -1.
398 public static int unicharXdigitValue(gunichar c)
400 // gint g_unichar_xdigit_value (gunichar c);
401 return g_unichar_xdigit_value(c);
406 * Classifies a Unicode character by type.
407 * c:
408 * a Unicode character
409 * Returns:
410 * the type of the character.
412 public static GUnicodeType unicharType(gunichar c)
414 // GUnicodeType g_unichar_type (gunichar c);
415 return g_unichar_type(c);
420 * Determines the break type of c. c should be a Unicode character
421 * (to derive a character from UTF-8 encoded text, use
422 * g_utf8_get_char()). The break type is used to find word and line
423 * breaks ("text boundaries"), Pango implements the Unicode boundary
424 * resolution algorithms and normally you would use a function such
425 * as pango_break() instead of caring about break types yourself.
426 * c:
427 * a Unicode character
428 * Returns:
429 * the break type of c
431 public static GUnicodeBreakType unicharBreakType(gunichar c)
433 // GUnicodeBreakType g_unichar_break_type (gunichar c);
434 return g_unichar_break_type(c);
438 * Computes the canonical ordering of a string in-place.
439 * This rearranges decomposed characters in the string
440 * according to their combining classes. See the Unicode
441 * manual for more information.
442 * string:
443 * a UCS-4 encoded string.
444 * len:
445 * the maximum length of string to use.
447 public static void unicodeCanonicalOrdering(gunichar* string, uint len)
449 // void g_unicode_canonical_ordering (gunichar *string, gsize len);
450 g_unicode_canonical_ordering(string, len);
454 * Computes the canonical decomposition of a Unicode character.
455 * ch:
456 * a Unicode character.
457 * result_len:
458 * location to store the length of the return value.
459 * Returns:
460 * a newly allocated string of Unicode characters.
461 * result_len is set to the resulting length of the string.
463 public static gunichar* unicodeCanonicalDecomposition(gunichar ch, uint* resultLen)
465 // gunichar* g_unicode_canonical_decomposition (gunichar ch, gsize *result_len);
466 return g_unicode_canonical_decomposition(ch, resultLen);
470 * In Unicode, some characters are mirrored. This
471 * means that their images are mirrored horizontally in text that is laid
472 * out from right to left. For instance, "(" would become its mirror image,
473 * ")", in right-to-left text.
474 * If ch has the Unicode mirrored property and there is another unicode
475 * character that typically has a glyph that is the mirror image of ch's
476 * glyph and mirrored_ch is set, it puts that character in the address
477 * pointed to by mirrored_ch. Otherwise the original character is put.
478 * ch:
479 * a Unicode character
480 * mirrored_ch:
481 * location to store the mirrored character
482 * Returns:
483 * TRUE if ch has a mirrored character, FALSE otherwise
484 * Since 2.4
486 public static int unicharGetMirrorChar(gunichar ch, gunichar* mirroredCh)
488 // gboolean g_unichar_get_mirror_char (gunichar ch, gunichar *mirrored_ch);
489 return g_unichar_get_mirror_char(ch, mirroredCh);
494 * Looks up the GUnicodeScript for a particular character (as defined
495 * by Unicode Standard Annex 24). No check is made for ch being a
496 * valid Unicode character; if you pass in invalid character, the
497 * result is undefined.
498 * ch:
499 * a Unicode character
500 * Returns:
501 * the GUnicodeScript for the character.
502 * Since 2.14
504 public static GUnicodeScript unicharGetScript(gunichar ch)
506 // GUnicodeScript g_unichar_get_script (gunichar ch);
507 return g_unichar_get_script(ch);
512 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
513 * If p does not point to a valid UTF-8 encoded character, results are
514 * undefined. If you are not sure that the bytes are complete
515 * valid Unicode characters, you should use g_utf8_get_char_validated()
516 * instead.
517 * p:
518 * a pointer to Unicode character encoded as UTF-8
519 * Returns:
520 * the resulting character
522 public static gunichar utf8_GetChar(char[] p)
524 // gunichar g_utf8_get_char (const gchar *p);
525 return g_utf8_get_char(Str.toStringz(p));
529 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
530 * This function checks for incomplete characters, for invalid characters
531 * such as characters that are out of the range of Unicode, and for
532 * overlong encodings of valid characters.
533 * p:
534 * a pointer to Unicode character encoded as UTF-8
535 * max_len:
536 * the maximum number of bytes to read, or -1, for no maximum.
537 * Returns:
538 * the resulting character. If p points to a partial
539 * sequence at the end of a string that could begin a valid
540 * character, returns (gunichar)-2; otherwise, if p does not point
541 * to a valid UTF-8 encoded Unicode character, returns (gunichar)-1.
543 public static gunichar utf8_GetCharValidated(char[] p, int maxLen)
545 // gunichar g_utf8_get_char_validated (const gchar *p, gssize max_len);
546 return g_utf8_get_char_validated(Str.toStringz(p), maxLen);
550 * Converts from an integer character offset to a pointer to a position
551 * within the string.
552 * Since 2.10, this function allows to pass a negative offset to
553 * step backwards. It is usually worth stepping backwards from the end
554 * instead of forwards if offset is in the last fourth of the string,
555 * since moving forward is about 3 times faster than moving backward.
556 * str:
557 * a UTF-8 encoded string
558 * offset:
559 * a character offset within str
560 * Returns:
561 * the resulting pointer
563 public static char[] utf8_OffsetToPointer(char[] str, int offset)
565 // gchar* g_utf8_offset_to_pointer (const gchar *str, glong offset);
566 return Str.toString(g_utf8_offset_to_pointer(Str.toStringz(str), offset) );
570 * Converts from a pointer to position within a string to a integer
571 * character offset.
572 * Since 2.10, this function allows pos to be before str, and returns
573 * a negative offset in this case.
574 * str:
575 * a UTF-8 encoded string
576 * pos:
577 * a pointer to a position within str
578 * Returns:
579 * the resulting character offset
581 public static int utf8_PointerToOffset(char[] str, char[] pos)
583 // glong g_utf8_pointer_to_offset (const gchar *str, const gchar *pos);
584 return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos));
588 * Finds the previous UTF-8 character in the string before p.
589 * p does not have to be at the beginning of a UTF-8 character. No check
590 * is made to see if the character found is actually valid other than
591 * it starts with an appropriate byte. If p might be the first
592 * character of the string, you must use g_utf8_find_prev_char() instead.
593 * p:
594 * a pointer to a position within a UTF-8 encoded string
595 * Returns:
596 * a pointer to the found character.
598 public static char[] utf8_PrevChar(char[] p)
600 // gchar* g_utf8_prev_char (const gchar *p);
601 return Str.toString(g_utf8_prev_char(Str.toStringz(p)) );
605 * Finds the start of the next UTF-8 character in the string after p.
606 * p does not have to be at the beginning of a UTF-8 character. No check
607 * is made to see if the character found is actually valid other than
608 * it starts with an appropriate byte.
609 * p:
610 * a pointer to a position within a UTF-8 encoded string
611 * end:
612 * a pointer to the end of the string, or NULL to indicate
613 * that the string is nul-terminated, in which case
614 * the returned value will be
615 * Returns:
616 * a pointer to the found character or NULL
618 public static char[] utf8_FindNextChar(char[] p, char[] end)
620 // gchar* g_utf8_find_next_char (const gchar *p, const gchar *end);
621 return Str.toString(g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end)) );
625 * Given a position p with a UTF-8 encoded string str, find the start
626 * of the previous UTF-8 character starting before p. Returns NULL if no
627 * UTF-8 characters are present in str before p.
628 * p does not have to be at the beginning of a UTF-8 character. No check
629 * is made to see if the character found is actually valid other than
630 * it starts with an appropriate byte.
631 * str:
632 * pointer to the beginning of a UTF-8 encoded string
633 * p:
634 * pointer to some position within str
635 * Returns:
636 * a pointer to the found character or NULL.
638 public static char[] utf8_FindPrevChar(char[] str, char[] p)
640 // gchar* g_utf8_find_prev_char (const gchar *str, const gchar *p);
641 return Str.toString(g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p)) );
645 * Returns the length of the string in characters.
646 * p:
647 * pointer to the start of a UTF-8 encoded string.
648 * max:
649 * the maximum number of bytes to examine. If max
650 * is less than 0, then the string is assumed to be
651 * nul-terminated. If max is 0, p will not be examined and
652 * may be NULL.
653 * Returns:
654 * the length of the string in characters
656 public static int utf8_Strlen(char[] p, int max)
658 // glong g_utf8_strlen (const gchar *p, gssize max);
659 return g_utf8_strlen(Str.toStringz(p), max);
663 * Like the standard C strncpy() function, but
664 * copies a given number of characters instead of a given number of
665 * bytes. The src string must be valid UTF-8 encoded text.
666 * (Use g_utf8_validate() on all text before trying to use UTF-8
667 * utility functions with it.)
668 * dest:
669 * buffer to fill with characters from src
670 * src:
671 * UTF-8 encoded string
672 * n:
673 * character count
674 * Returns:
675 * dest
677 public static char[] utf8_Strncpy(char[] dest, char[] src, uint n)
679 // gchar* g_utf8_strncpy (gchar *dest, const gchar *src, gsize n);
680 return Str.toString(g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n) );
684 * Finds the leftmost occurrence of the given Unicode character
685 * in a UTF-8 encoded string, while limiting the search to len bytes.
686 * If len is -1, allow unbounded search.
687 * p:
688 * a nul-terminated UTF-8 encoded string
689 * len:
690 * the maximum length of p
691 * c:
692 * a Unicode character
693 * Returns:
694 * NULL if the string does not contain the character,
695 * otherwise, a pointer to the start of the leftmost occurrence of
696 * the character in the string.
698 public static char[] utf8_Strchr(char[] p, int len, gunichar c)
700 // gchar* g_utf8_strchr (const gchar *p, gssize len, gunichar c);
701 return Str.toString(g_utf8_strchr(Str.toStringz(p), len, c) );
705 * Find the rightmost occurrence of the given Unicode character
706 * in a UTF-8 encoded string, while limiting the search to len bytes.
707 * If len is -1, allow unbounded search.
708 * p:
709 * a nul-terminated UTF-8 encoded string
710 * len:
711 * the maximum length of p
712 * c:
713 * a Unicode character
714 * Returns:
715 * NULL if the string does not contain the character,
716 * otherwise, a pointer to the start of the rightmost occurrence of the
717 * character in the string.
719 public static char[] utf8_Strrchr(char[] p, int len, gunichar c)
721 // gchar* g_utf8_strrchr (const gchar *p, gssize len, gunichar c);
722 return Str.toString(g_utf8_strrchr(Str.toStringz(p), len, c) );
726 * Reverses a UTF-8 string. str must be valid UTF-8 encoded text.
727 * (Use g_utf8_validate() on all text before trying to use UTF-8
728 * utility functions with it.)
729 * Note that unlike g_strreverse(), this function returns
730 * newly-allocated memory, which should be freed with g_free() when
731 * no longer needed.
732 * str:
733 * a UTF-8 encoded string
734 * len:
735 * the maximum length of str to use. If len < 0, then
736 * the string is nul-terminated.
737 * Returns:
738 * a newly-allocated string which is the reverse of str.
739 * Since 2.2
741 public static char[] utf8_Strreverse(char[] str, int len)
743 // gchar* g_utf8_strreverse (const gchar *str, gssize len);
744 return Str.toString(g_utf8_strreverse(Str.toStringz(str), len) );
748 * Validates UTF-8 encoded text. str is the text to validate;
749 * if str is nul-terminated, then max_len can be -1, otherwise
750 * max_len should be the number of bytes to validate.
751 * If end is non-NULL, then the end of the valid range
752 * will be stored there (i.e. the start of the first invalid
753 * character if some bytes were invalid, or the end of the text
754 * being validated otherwise).
755 * Note that g_utf8_validate() returns FALSE if max_len is
756 * positive and NUL is met before max_len bytes have been read.
757 * Returns TRUE if all of str was valid. Many GLib and GTK+
758 * routines require valid UTF-8 as input;
759 * so data read from a file or the network should be checked
760 * with g_utf8_validate() before doing anything else with it.
761 * str:
762 * a pointer to character data
763 * max_len:
764 * max bytes to validate, or -1 to go until NUL
765 * end:
766 * return location for end of valid data
767 * Returns:
768 * TRUE if the text was valid UTF-8
770 public static int utf8_Validate(char[] str, int maxLen, char** end)
772 // gboolean g_utf8_validate (const gchar *str, gssize max_len, const gchar **end);
773 return g_utf8_validate(Str.toStringz(str), maxLen, end);
777 * Converts all Unicode characters in the string that have a case
778 * to uppercase. The exact manner that this is done depends
779 * on the current locale, and may result in the number of
780 * characters in the string increasing. (For instance, the
781 * German ess-zet will be changed to SS.)
782 * str:
783 * a UTF-8 encoded string
784 * len:
785 * length of str, in bytes, or -1 if str is nul-terminated.
786 * Returns:
787 * a newly allocated string, with all characters
788 * converted to uppercase.
790 public static char[] utf8_Strup(char[] str, int len)
792 // gchar* g_utf8_strup (const gchar *str, gssize len);
793 return Str.toString(g_utf8_strup(Str.toStringz(str), len) );
797 * Converts all Unicode characters in the string that have a case
798 * to lowercase. The exact manner that this is done depends
799 * on the current locale, and may result in the number of
800 * characters in the string changing.
801 * str:
802 * a UTF-8 encoded string
803 * len:
804 * length of str, in bytes, or -1 if str is nul-terminated.
805 * Returns:
806 * a newly allocated string, with all characters
807 * converted to lowercase.
809 public static char[] utf8_Strdown(char[] str, int len)
811 // gchar* g_utf8_strdown (const gchar *str, gssize len);
812 return Str.toString(g_utf8_strdown(Str.toStringz(str), len) );
816 * Converts a string into a form that is independent of case. The
817 * result will not correspond to any particular case, but can be
818 * compared for equality or ordered with the results of calling
819 * g_utf8_casefold() on other strings.
820 * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
821 * only an approximation to the correct linguistic case insensitive
822 * ordering, though it is a fairly good one. Getting this exactly
823 * right would require a more sophisticated collation function that
824 * takes case sensitivity into account. GLib does not currently
825 * provide such a function.
826 * str:
827 * a UTF-8 encoded string
828 * len:
829 * length of str, in bytes, or -1 if str is nul-terminated.
830 * Returns:
831 * a newly allocated string, that is a
832 * case independent form of str.
834 public static char[] utf8_Casefold(char[] str, int len)
836 // gchar* g_utf8_casefold (const gchar *str, gssize len);
837 return Str.toString(g_utf8_casefold(Str.toStringz(str), len) );
841 * Converts a string into canonical form, standardizing
842 * such issues as whether a character with an accent
843 * is represented as a base character and combining
844 * accent or as a single precomposed character. You
845 * should generally call g_utf8_normalize() before
846 * comparing two Unicode strings.
847 * The normalization mode G_NORMALIZE_DEFAULT only
848 * standardizes differences that do not affect the
849 * text content, such as the above-mentioned accent
850 * representation. G_NORMALIZE_ALL also standardizes
851 * the "compatibility" characters in Unicode, such
852 * as SUPERSCRIPT THREE to the standard forms
853 * (in this case DIGIT THREE). Formatting information
854 * may be lost but for most text operations such
855 * characters should be considered the same.
856 * For example, g_utf8_collate() normalizes
857 * with G_NORMALIZE_ALL as its first step.
858 * G_NORMALIZE_DEFAULT_COMPOSE and G_NORMALIZE_ALL_COMPOSE
859 * are like G_NORMALIZE_DEFAULT and G_NORMALIZE_ALL,
860 * but returned a result with composed forms rather
861 * than a maximally decomposed form. This is often
862 * useful if you intend to convert the string to
863 * a legacy encoding or pass it to a system with
864 * less capable Unicode handling.
865 * str:
866 * a UTF-8 encoded string.
867 * len:
868 * length of str, in bytes, or -1 if str is nul-terminated.
869 * mode:
870 * the type of normalization to perform.
871 * Returns:
872 * a newly allocated string, that is the
873 * normalized form of str.
875 public static char[] utf8_Normalize(char[] str, int len, GNormalizeMode mode)
877 // gchar* g_utf8_normalize (const gchar *str, gssize len, GNormalizeMode mode);
878 return Str.toString(g_utf8_normalize(Str.toStringz(str), len, mode) );
883 * Compares two strings for ordering using the linguistically
884 * correct rules for the current locale. When sorting a large
885 * number of strings, it will be significantly faster to
886 * obtain collation keys with g_utf8_collate_key() and
887 * compare the keys with strcmp() when
888 * sorting instead of sorting the original strings.
889 * str1:
890 * a UTF-8 encoded string
891 * str2:
892 * a UTF-8 encoded string
893 * Returns:
894 * < 0 if str1 compares before str2,
895 * 0 if they compare equal, > 0 if str1 compares after str2.
897 public static int utf8_Collate(char[] str1, char[] str2)
899 // gint g_utf8_collate (const gchar *str1, const gchar *str2);
900 return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2));
904 * Converts a string into a collation key that can be compared
905 * with other collation keys produced by the same function using
906 * strcmp().
907 * The results of comparing the collation keys of two strings
908 * with strcmp() will always be the same as
909 * comparing the two original keys with g_utf8_collate().
910 * str:
911 * a UTF-8 encoded string.
912 * len:
913 * length of str, in bytes, or -1 if str is nul-terminated.
914 * Returns:
915 * a newly allocated string. This string should
916 * be freed with g_free() when you are done with it.
918 public static char[] utf8_CollateKey(char[] str, int len)
920 // gchar* g_utf8_collate_key (const gchar *str, gssize len);
921 return Str.toString(g_utf8_collate_key(Str.toStringz(str), len) );
925 * Converts a string into a collation key that can be compared
926 * with other collation keys produced by the same function using strcmp().
927 * In order to sort filenames correctly, this function treats the dot '.'
928 * as a special case. Most dictionary orderings seem to consider it
929 * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
930 * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
931 * would like to treat numbers intelligently so that "file1" "file10" "file5"
932 * is sorted as "file1" "file5" "file10".
933 * str:
934 * a UTF-8 encoded string.
935 * len:
936 * length of str, in bytes, or -1 if str is nul-terminated.
937 * Returns:
938 * a newly allocated string. This string should
939 * be freed with g_free() when you are done with it.
940 * Since 2.8
942 public static char[] utf8_CollateKeyForFilename(char[] str, int len)
944 // gchar* g_utf8_collate_key_for_filename (const gchar *str, gssize len);
945 return Str.toString(g_utf8_collate_key_for_filename(Str.toStringz(str), len) );
949 * Convert a string from UTF-8 to UTF-16. A 0 character will be
950 * added to the result after the converted text.
951 * str:
952 * a UTF-8 encoded string
953 * len:
954 * the maximum length (number of characters) of str to use.
955 * If len < 0, then the string is nul-terminated.
956 * items_read:
957 * location to store number of bytes read, or NULL.
958 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
959 * returned in case str contains a trailing partial
960 * character. If an error occurs then the index of the
961 * invalid input is stored here.
962 * items_written:
963 * location to store number of gunichar2 written,
964 * or NULL.
965 * The value stored here does not include the trailing 0.
966 * error:
967 * location to store the error occuring, or NULL to ignore
968 * errors. Any of the errors in GConvertError other than
969 * G_CONVERT_ERROR_NO_CONVERSION may occur.
970 * Returns:
971 * a pointer to a newly allocated UTF-16 string.
972 * This value must be freed with g_free(). If an
973 * error occurs, NULL will be returned and
974 * error set.
976 public static gunichar2* utf8_ToUtf16(char[] str, int len, int* itemsRead, int* itemsWritten, GError** error)
978 // gunichar2* g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error);
979 return g_utf8_to_utf16(Str.toStringz(str), len, itemsRead, itemsWritten, error);
983 * Convert a string from UTF-8 to a 32-bit fixed width
984 * representation as UCS-4. A trailing 0 will be added to the
985 * string after the converted text.
986 * str:
987 * a UTF-8 encoded string
988 * len:
989 * the maximum length of str to use. If len < 0, then
990 * the string is nul-terminated.
991 * items_read:
992 * location to store number of bytes read, or NULL.
993 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
994 * returned in case str contains a trailing partial
995 * character. If an error occurs then the index of the
996 * invalid input is stored here.
997 * items_written:
998 * location to store number of characters written or NULL.
999 * The value here stored does not include the trailing 0
1000 * character.
1001 * error:
1002 * location to store the error occuring, or NULL to ignore
1003 * errors. Any of the errors in GConvertError other than
1004 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1005 * Returns:
1006 * a pointer to a newly allocated UCS-4 string.
1007 * This value must be freed with g_free(). If an
1008 * error occurs, NULL will be returned and
1009 * error set.
1011 public static gunichar* utf8_ToUcs4(char[] str, int len, int* itemsRead, int* itemsWritten, GError** error)
1013 // gunichar* g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error);
1014 return g_utf8_to_ucs4(Str.toStringz(str), len, itemsRead, itemsWritten, error);
1018 * Convert a string from UTF-8 to a 32-bit fixed width
1019 * representation as UCS-4, assuming valid UTF-8 input.
1020 * This function is roughly twice as fast as g_utf8_to_ucs4()
1021 * but does no error checking on the input.
1022 * str:
1023 * a UTF-8 encoded string
1024 * len:
1025 * the maximum length of str to use. If len < 0, then
1026 * the string is nul-terminated.
1027 * items_written:
1028 * location to store the number of characters in the
1029 * result, or NULL.
1030 * Returns:
1031 * a pointer to a newly allocated UCS-4 string.
1032 * This value must be freed with g_free().
1034 public static gunichar* utf8_ToUcs4_Fast(char[] str, int len, int* itemsWritten)
1036 // gunichar* g_utf8_to_ucs4_fast (const gchar *str, glong len, glong *items_written);
1037 return g_utf8_to_ucs4_fast(Str.toStringz(str), len, itemsWritten);
1041 * Convert a string from UTF-16 to UCS-4. The result will be
1042 * terminated with a 0 character.
1043 * str:
1044 * a UTF-16 encoded string
1045 * len:
1046 * the maximum length (number of gunichar2) of str to use.
1047 * If len < 0, then the string is terminated with a 0 character.
1048 * items_read:
1049 * location to store number of words read, or NULL.
1050 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1051 * returned in case str contains a trailing partial
1052 * character. If an error occurs then the index of the
1053 * invalid input is stored here.
1054 * items_written:
1055 * location to store number of characters written, or NULL.
1056 * The value stored here does not include the trailing
1057 * 0 character.
1058 * error:
1059 * location to store the error occuring, or NULL to ignore
1060 * errors. Any of the errors in GConvertError other than
1061 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1062 * Returns:
1063 * a pointer to a newly allocated UCS-4 string.
1064 * This value must be freed with g_free(). If an
1065 * error occurs, NULL will be returned and
1066 * error set.
1068 public static gunichar* utf16_ToUcs4(gunichar2* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1070 // gunichar* g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
1071 return g_utf16_to_ucs4(str, len, itemsRead, itemsWritten, error);
1075 * Convert a string from UTF-16 to UTF-8. The result will be
1076 * terminated with a 0 byte.
1077 * Note that the input is expected to be already in native endianness,
1078 * an initial byte-order-mark character is not handled specially.
1079 * g_convert() can be used to convert a byte buffer of UTF-16 data of
1080 * ambiguous endianess.
1081 * str:
1082 * a UTF-16 encoded string
1083 * len:
1084 * the maximum length (number of gunichar2) of str to use.
1085 * If len < 0, then the string is terminated with a 0 character.
1086 * items_read:
1087 * location to store number of words read, or NULL.
1088 * If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be
1089 * returned in case str contains a trailing partial
1090 * character. If an error occurs then the index of the
1091 * invalid input is stored here.
1092 * items_written:
1093 * location to store number of bytes written, or NULL.
1094 * The value stored here does not include the trailing
1095 * 0 byte.
1096 * error:
1097 * location to store the error occuring, or NULL to ignore
1098 * errors. Any of the errors in GConvertError other than
1099 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1100 * Returns:
1101 * a pointer to a newly allocated UTF-8 string.
1102 * This value must be freed with g_free(). If an
1103 * error occurs, NULL will be returned and
1104 * error set.
1106 public static char[] utf16_ToUtf8(gunichar2* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1108 // gchar* g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error);
1109 return Str.toString(g_utf16_to_utf8(str, len, itemsRead, itemsWritten, error) );
1113 * Convert a string from UCS-4 to UTF-16. A 0 character will be
1114 * added to the result after the converted text.
1115 * str:
1116 * a UCS-4 encoded string
1117 * len:
1118 * the maximum length (number of characters) of str to use.
1119 * If len < 0, then the string is terminated with a 0 character.
1120 * items_read:
1121 * location to store number of bytes read, or NULL.
1122 * If an error occurs then the index of the invalid input
1123 * is stored here.
1124 * items_written:
1125 * location to store number of gunichar2
1126 * written, or NULL. The value stored here does not
1127 * include the trailing 0.
1128 * error:
1129 * location to store the error occuring, or NULL to ignore
1130 * errors. Any of the errors in GConvertError other than
1131 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1132 * Returns:
1133 * a pointer to a newly allocated UTF-16 string.
1134 * This value must be freed with g_free(). If an
1135 * error occurs, NULL will be returned and
1136 * error set.
1138 public static gunichar2* ucs4_ToUtf16(gunichar* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1140 // gunichar2* g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
1141 return g_ucs4_to_utf16(str, len, itemsRead, itemsWritten, error);
1145 * Convert a string from a 32-bit fixed width representation as UCS-4.
1146 * to UTF-8. The result will be terminated with a 0 byte.
1147 * str:
1148 * a UCS-4 encoded string
1149 * len:
1150 * the maximum length (number of characters) of str to use.
1151 * If len < 0, then the string is terminated with a 0 character.
1152 * items_read:
1153 * location to store number of characters read, or NULL.
1154 * items_written:
1155 * location to store number of bytes written or NULL.
1156 * The value here stored does not include the trailing 0
1157 * byte.
1158 * error:
1159 * location to store the error occuring, or NULL to ignore
1160 * errors. Any of the errors in GConvertError other than
1161 * G_CONVERT_ERROR_NO_CONVERSION may occur.
1162 * Returns:
1163 * a pointer to a newly allocated UTF-8 string.
1164 * This value must be freed with g_free(). If an
1165 * error occurs, NULL will be returned and
1166 * error set. In that case, items_read will be
1167 * set to the position of the first invalid input
1168 * character.
1170 public static char[] ucs4_ToUtf8(gunichar* str, int len, int* itemsRead, int* itemsWritten, GError** error)
1172 // gchar* g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error);
1173 return Str.toString(g_ucs4_to_utf8(str, len, itemsRead, itemsWritten, error) );
1177 * Converts a single character to UTF-8.
1178 * c:
1179 * a Unicode character code
1180 * outbuf:
1181 * output buffer, must have at least 6 bytes of space.
1182 * If NULL, the length will be computed and returned
1183 * and nothing will be written to outbuf.
1184 * Returns:
1185 * number of bytes written
1186 * See Also
1187 * g_locale_to_utf8(), g_locale_from_utf8()
1188 * Convenience functions for converting between UTF-8 and the locale encoding.
1189 * [3] surrogate pairs
1191 public static int unicharToUtf8(gunichar c, char[] outbuf)
1193 // gint g_unichar_to_utf8 (gunichar c, gchar *outbuf);
1194 return g_unichar_to_utf8(c, Str.toStringz(outbuf));