1 /* nfkc.c Unicode normalization utilities.
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include "stringprep.h"
31 /* This file contains functions from GLIB, including gutf8.c and
32 * gunidecomp.c, all licensed under LGPL and copyright hold by:
34 * Copyright (C) 1999, 2000 Tom Tromey
35 * Copyright 2000 Red Hat, Inc.
38 /* Hacks to make syncing with GLIB code easier. */
41 #define guchar unsigned char
44 #define guint unsigned int
45 #define gushort unsigned short
46 #define gint16 int16_t
47 #define guint16 uint16_t
48 #define gunichar uint32_t
50 #define gssize ssize_t
51 #define g_malloc malloc
54 #define g_set_error(a,b,c,d) ((void) 0)
55 #define g_new(struct_type, n_structs) \
56 ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs))))
57 # if defined (__GNUC__) && !defined (__STRICT_ANSI__) && !defined (__cplusplus)
58 # define G_STMT_START (void)(
61 # if (defined (sun) || defined (__sun__))
62 # define G_STMT_START if (1)
63 # define G_STMT_END else (void)0
65 # define G_STMT_START do
66 # define G_STMT_END while (0)
69 #define g_return_val_if_fail(expr,val) G_STMT_START{ (void)0; }G_STMT_END
70 #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0]))
74 /* Code from GLIB gunicode.h starts here. */
79 G_NORMALIZE_NFD
= G_NORMALIZE_DEFAULT
,
80 G_NORMALIZE_DEFAULT_COMPOSE
,
81 G_NORMALIZE_NFC
= G_NORMALIZE_DEFAULT_COMPOSE
,
83 G_NORMALIZE_NFKD
= G_NORMALIZE_ALL
,
84 G_NORMALIZE_ALL_COMPOSE
,
85 G_NORMALIZE_NFKC
= G_NORMALIZE_ALL_COMPOSE
89 /* Code from GLIB gutf8.c starts here. */
91 #define UTF8_COMPUTE(Char, Mask, Len) \
97 else if ((Char & 0xe0) == 0xc0) \
102 else if ((Char & 0xf0) == 0xe0) \
107 else if ((Char & 0xf8) == 0xf0) \
112 else if ((Char & 0xfc) == 0xf8) \
117 else if ((Char & 0xfe) == 0xfc) \
125 #define UTF8_LENGTH(Char) \
126 ((Char) < 0x80 ? 1 : \
127 ((Char) < 0x800 ? 2 : \
128 ((Char) < 0x10000 ? 3 : \
129 ((Char) < 0x200000 ? 4 : \
130 ((Char) < 0x4000000 ? 5 : 6)))))
133 #define UTF8_GET(Result, Chars, Count, Mask, Len) \
134 (Result) = (Chars)[0] & (Mask); \
135 for ((Count) = 1; (Count) < (Len); ++(Count)) \
137 if (((Chars)[(Count)] & 0xc0) != 0x80) \
143 (Result) |= ((Chars)[(Count)] & 0x3f); \
146 #define UNICODE_VALID(Char) \
147 ((Char) < 0x110000 && \
148 (((Char) & 0xFFFFF800) != 0xD800) && \
149 ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
150 ((Char) & 0xFFFE) != 0xFFFE)
153 static const gchar utf8_skip_data
[256] = {
154 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
168 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
172 const gchar
*const g_utf8_skip
= utf8_skip_data
;
174 #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
178 * @p: pointer to the start of a UTF-8 encoded string.
179 * @max: the maximum number of bytes to examine. If @max
180 * is less than 0, then the string is assumed to be
181 * nul-terminated. If @max is 0, @p will not be examined and
184 * Returns the length of the string in characters.
186 * Return value: the length of the string in characters
189 g_utf8_strlen (const gchar
* p
, gssize max
)
192 const gchar
*start
= p
;
193 g_return_val_if_fail (p
!= NULL
|| max
== 0, 0);
199 p
= g_utf8_next_char (p
);
208 p
= g_utf8_next_char (p
);
210 while (p
- start
< max
&& *p
)
213 p
= g_utf8_next_char (p
);
216 /* only do the last len increment if we got a complete
217 * char (don't count partial chars)
219 if (p
- start
== max
)
228 * @p: a pointer to Unicode character encoded as UTF-8
230 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
231 * If @p does not point to a valid UTF-8 encoded character, results are
232 * undefined. If you are not sure that the bytes are complete
233 * valid Unicode characters, you should use g_utf8_get_char_validated()
236 * Return value: the resulting character
239 g_utf8_get_char (const gchar
* p
)
241 int i
, mask
= 0, len
;
243 unsigned char c
= (unsigned char) *p
;
245 UTF8_COMPUTE (c
, mask
, len
);
247 return (gunichar
) - 1;
248 UTF8_GET (result
, p
, i
, mask
, len
);
255 * @c: a ISO10646 character code
256 * @outbuf: output buffer, must have at least 6 bytes of space.
257 * If %NULL, the length will be computed and returned
258 * and nothing will be written to @outbuf.
260 * Converts a single character to UTF-8.
262 * Return value: number of bytes written
265 g_unichar_to_utf8 (gunichar c
, gchar
* outbuf
)
281 else if (c
< 0x10000)
286 else if (c
< 0x200000)
291 else if (c
< 0x4000000)
304 for (i
= len
- 1; i
> 0; --i
)
306 outbuf
[i
] = (c
& 0x3f) | 0x80;
309 outbuf
[0] = c
| first
;
316 * g_utf8_to_ucs4_fast:
317 * @str: a UTF-8 encoded string
318 * @len: the maximum length of @str to use. If @len < 0, then
319 * the string is nul-terminated.
320 * @items_written: location to store the number of characters in the
323 * Convert a string from UTF-8 to a 32-bit fixed width
324 * representation as UCS-4, assuming valid UTF-8 input.
325 * This function is roughly twice as fast as g_utf8_to_ucs4()
326 * but does no error checking on the input.
328 * Return value: a pointer to a newly allocated UCS-4 string.
329 * This value must be freed with g_free().
332 g_utf8_to_ucs4_fast (const gchar
* str
, glong len
, glong
* items_written
)
339 g_return_val_if_fail (str
!= NULL
, NULL
);
347 p
= g_utf8_next_char (p
);
353 while (p
< str
+ len
&& *p
)
355 p
= g_utf8_next_char (p
);
360 result
= g_new (gunichar
, n_chars
+ 1);
365 for (i
= 0; i
< n_chars
; i
++)
367 gunichar wc
= ((unsigned char *) p
)[0];
402 for (j
= 1; j
< charlen
; j
++)
405 wc
|= ((unsigned char *) p
)[j
] & 0x3f;
422 * @str: a UCS-4 encoded string
423 * @len: the maximum length of @str to use. If @len < 0, then
424 * the string is terminated with a 0 character.
425 * @items_read: location to store number of characters read read, or %NULL.
426 * @items_written: location to store number of bytes written or %NULL.
427 * The value here stored does not include the trailing 0
429 * @error: location to store the error occuring, or %NULL to ignore
430 * errors. Any of the errors in #GConvertError other than
431 * %G_CONVERT_ERROR_NO_CONVERSION may occur.
433 * Convert a string from a 32-bit fixed width representation as UCS-4.
434 * to UTF-8. The result will be terminated with a 0 byte.
436 * Return value: a pointer to a newly allocated UTF-8 string.
437 * This value must be freed with g_free(). If an
438 * error occurs, %NULL will be returned and
442 g_ucs4_to_utf8 (const gunichar
* str
,
444 glong
* items_read
, glong
* items_written
, GError
** error
)
447 gchar
*result
= NULL
;
452 for (i
= 0; len
< 0 || i
< len
; i
++)
457 if (str
[i
] >= 0x80000000)
462 g_set_error (error
, G_CONVERT_ERROR
,
463 G_CONVERT_ERROR_ILLEGAL_SEQUENCE
,
464 _("Character out of range for UTF-8"));
468 result_length
+= UTF8_LENGTH (str
[i
]);
471 result
= g_malloc (result_length
+ 1);
477 while (p
< result
+ result_length
)
478 p
+= g_unichar_to_utf8 (str
[i
++], p
);
483 *items_written
= p
- result
;
492 /* Code from GLIB gunidecomp.c starts here. */
494 #include "gunidecomp.h"
495 #include "gunicomp.h"
497 #define CC_PART1(Page, Char) \
498 ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
499 ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
500 : (cclass_data[combining_class_table_part1[Page]][Char]))
502 #define CC_PART2(Page, Char) \
503 ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
504 ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
505 : (cclass_data[combining_class_table_part2[Page]][Char]))
507 #define COMBINING_CLASS(Char) \
508 (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
509 ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
510 : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
511 ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
514 /* constants for hangul syllable [de]composition */
522 #define NCount (VCount * TCount)
523 #define SCount (LCount * NCount)
526 * g_unicode_canonical_ordering:
527 * @string: a UCS-4 encoded string.
528 * @len: the maximum length of @string to use.
530 * Computes the canonical ordering of a string in-place.
531 * This rearranges decomposed characters in the string
532 * according to their combining classes. See the Unicode
533 * manual for more information.
536 g_unicode_canonical_ordering (gunichar
* string
, gsize len
)
545 last
= COMBINING_CLASS (string
[0]);
546 for (i
= 0; i
< len
- 1; ++i
)
548 int next
= COMBINING_CLASS (string
[i
+ 1]);
549 if (next
!= 0 && last
> next
)
552 /* Percolate item leftward through string. */
553 for (j
= i
+ 1; j
> 0; --j
)
556 if (COMBINING_CLASS (string
[j
- 1]) <= next
)
559 string
[j
] = string
[j
- 1];
563 /* We're re-entering the loop looking at the old
572 /* http://www.unicode.org/unicode/reports/tr15/#Hangul
573 * r should be null or have sufficient space. Calling with r == NULL will
574 * only calculate the result_len; however, a buffer with space for three
575 * characters will always be big enough. */
577 decompose_hangul (gunichar s
, gunichar
* r
, gsize
* result_len
)
579 gint SIndex
= s
- SBase
;
581 /* not a hangul syllable */
582 if (SIndex
< 0 || SIndex
>= SCount
)
590 gunichar L
= LBase
+ SIndex
/ NCount
;
591 gunichar V
= VBase
+ (SIndex
% NCount
) / TCount
;
592 gunichar T
= TBase
+ SIndex
% TCount
;
611 /* returns a pointer to a null-terminated UTF-8 string */
613 find_decomposition (gunichar ch
, gboolean compat
)
616 int end
= G_N_ELEMENTS (decomp_table
);
618 if (ch
>= decomp_table
[start
].ch
&& ch
<= decomp_table
[end
- 1].ch
)
622 int half
= (start
+ end
) / 2;
623 if (ch
== decomp_table
[half
].ch
)
629 offset
= decomp_table
[half
].compat_offset
;
630 if (offset
== G_UNICODE_NOT_PRESENT_OFFSET
)
631 offset
= decomp_table
[half
].canon_offset
;
635 offset
= decomp_table
[half
].canon_offset
;
636 if (offset
== G_UNICODE_NOT_PRESENT_OFFSET
)
640 return &(decomp_expansion_string
[offset
]);
642 else if (half
== start
)
644 else if (ch
> decomp_table
[half
].ch
)
654 /* L,V => LV and LV,T => LVT */
656 combine_hangul (gunichar a
, gunichar b
, gunichar
* result
)
658 gint LIndex
= a
- LBase
;
659 gint SIndex
= a
- SBase
;
661 gint VIndex
= b
- VBase
;
662 gint TIndex
= b
- TBase
;
664 if (0 <= LIndex
&& LIndex
< LCount
&& 0 <= VIndex
&& VIndex
< VCount
)
666 *result
= SBase
+ (LIndex
* VCount
+ VIndex
) * TCount
;
669 else if (0 <= SIndex
&& SIndex
< SCount
&& (SIndex
% TCount
) == 0
670 && 0 <= TIndex
&& TIndex
<= TCount
)
672 *result
= a
+ TIndex
;
679 #define CI(Page, Char) \
680 ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
681 ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
682 : (compose_data[compose_table[Page]][Char]))
684 #define COMPOSE_INDEX(Char) \
685 ((((Char) >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
688 combine (gunichar a
, gunichar b
, gunichar
* result
)
690 gushort index_a
, index_b
;
692 if (combine_hangul (a
, b
, result
))
695 index_a
= COMPOSE_INDEX (a
);
697 if (index_a
>= COMPOSE_FIRST_SINGLE_START
&& index_a
< COMPOSE_SECOND_START
)
699 if (b
== compose_first_single
[index_a
- COMPOSE_FIRST_SINGLE_START
][0])
702 compose_first_single
[index_a
- COMPOSE_FIRST_SINGLE_START
][1];
709 index_b
= COMPOSE_INDEX (b
);
711 if (index_b
>= COMPOSE_SECOND_SINGLE_START
)
714 compose_second_single
[index_b
- COMPOSE_SECOND_SINGLE_START
][0])
717 compose_second_single
[index_b
- COMPOSE_SECOND_SINGLE_START
][1];
724 if (index_a
>= COMPOSE_FIRST_START
&& index_a
< COMPOSE_FIRST_SINGLE_START
725 && index_b
>= COMPOSE_SECOND_START
726 && index_b
< COMPOSE_SECOND_SINGLE_START
)
729 compose_array
[index_a
- COMPOSE_FIRST_START
][index_b
-
730 COMPOSE_SECOND_START
];
743 _g_utf8_normalize_wc (const gchar
* str
, gssize max_len
, GNormalizeMode mode
)
749 gboolean do_compat
= (mode
== G_NORMALIZE_NFKC
|| mode
== G_NORMALIZE_NFKD
);
750 gboolean do_compose
= (mode
== G_NORMALIZE_NFC
|| mode
== G_NORMALIZE_NFKC
);
754 while ((max_len
< 0 || p
< str
+ max_len
) && *p
)
757 gunichar wc
= g_utf8_get_char (p
);
759 if (wc
>= 0xac00 && wc
<= 0xd7af)
762 decompose_hangul (wc
, NULL
, &result_len
);
767 decomp
= find_decomposition (wc
, do_compat
);
770 n_wc
+= g_utf8_strlen (decomp
, -1);
775 p
= g_utf8_next_char (p
);
778 wc_buffer
= g_new (gunichar
, n_wc
+ 1);
785 while ((max_len
< 0 || p
< str
+ max_len
) && *p
)
787 gunichar wc
= g_utf8_get_char (p
);
790 gsize old_n_wc
= n_wc
;
792 if (wc
>= 0xac00 && wc
<= 0xd7af)
795 decompose_hangul (wc
, wc_buffer
+ n_wc
, &result_len
);
800 decomp
= find_decomposition (wc
, do_compat
);
805 for (pd
= decomp
; *pd
!= '\0'; pd
= g_utf8_next_char (pd
))
806 wc_buffer
[n_wc
++] = g_utf8_get_char (pd
);
809 wc_buffer
[n_wc
++] = wc
;
814 cc
= COMBINING_CLASS (wc_buffer
[old_n_wc
]);
818 g_unicode_canonical_ordering (wc_buffer
+ last_start
,
820 last_start
= old_n_wc
;
824 p
= g_utf8_next_char (p
);
829 g_unicode_canonical_ordering (wc_buffer
+ last_start
,
836 /* All decomposed and reordered */
838 if (do_compose
&& n_wc
> 0)
844 for (i
= 0; i
< n_wc
; i
++)
846 int cc
= COMBINING_CLASS (wc_buffer
[i
]);
849 (last_cc
== 0 || last_cc
!= cc
) &&
850 combine (wc_buffer
[last_start
], wc_buffer
[i
],
851 &wc_buffer
[last_start
]))
853 for (j
= i
+ 1; j
< n_wc
; j
++)
854 wc_buffer
[j
- 1] = wc_buffer
[j
];
861 last_cc
= COMBINING_CLASS (wc_buffer
[i
- 1]);
880 * @str: a UTF-8 encoded string.
881 * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
882 * @mode: the type of normalization to perform.
884 * Converts a string into canonical form, standardizing
885 * such issues as whether a character with an accent
886 * is represented as a base character and combining
887 * accent or as a single precomposed character. You
888 * should generally call g_utf8_normalize() before
889 * comparing two Unicode strings.
891 * The normalization mode %G_NORMALIZE_DEFAULT only
892 * standardizes differences that do not affect the
893 * text content, such as the above-mentioned accent
894 * representation. %G_NORMALIZE_ALL also standardizes
895 * the "compatibility" characters in Unicode, such
896 * as SUPERSCRIPT THREE to the standard forms
897 * (in this case DIGIT THREE). Formatting information
898 * may be lost but for most text operations such
899 * characters should be considered the same.
900 * For example, g_utf8_collate() normalizes
901 * with %G_NORMALIZE_ALL as its first step.
903 * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
904 * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
905 * but returned a result with composed forms rather
906 * than a maximally decomposed form. This is often
907 * useful if you intend to convert the string to
908 * a legacy encoding or pass it to a system with
909 * less capable Unicode handling.
911 * Return value: a newly allocated string, that is the
912 * normalized form of @str.
915 g_utf8_normalize (const gchar
* str
, gssize len
, GNormalizeMode mode
)
917 gunichar
*result_wc
= _g_utf8_normalize_wc (str
, len
, mode
);
920 result
= g_ucs4_to_utf8 (result_wc
, -1, NULL
, NULL
, NULL
);
926 /* Public Libidn API starts here. */
929 * stringprep_utf8_to_unichar:
930 * @p: a pointer to Unicode character encoded as UTF-8
932 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
933 * If @p does not point to a valid UTF-8 encoded character, results are
936 * Return value: the resulting character.
939 stringprep_utf8_to_unichar (const char *p
)
941 return g_utf8_get_char (p
);
945 * stringprep_unichar_to_utf8:
946 * @c: a ISO10646 character code
947 * @outbuf: output buffer, must have at least 6 bytes of space.
948 * If %NULL, the length will be computed and returned
949 * and nothing will be written to @outbuf.
951 * Converts a single character to UTF-8.
953 * Return value: number of bytes written.
956 stringprep_unichar_to_utf8 (uint32_t c
, char *outbuf
)
958 return g_unichar_to_utf8 (c
, outbuf
);
962 * stringprep_utf8_to_ucs4:
963 * @str: a UTF-8 encoded string
964 * @len: the maximum length of @str to use. If @len < 0, then
965 * the string is nul-terminated.
966 * @items_written: location to store the number of characters in the
969 * Convert a string from UTF-8 to a 32-bit fixed width
970 * representation as UCS-4, assuming valid UTF-8 input.
971 * This function does no error checking on the input.
973 * Return value: a pointer to a newly allocated UCS-4 string.
974 * This value must be freed with free().
977 stringprep_utf8_to_ucs4 (const char *str
, ssize_t len
, size_t * items_written
)
979 return g_utf8_to_ucs4_fast (str
, (glong
) len
, (glong
*) items_written
);
983 * stringprep_ucs4_to_utf8:
984 * @str: a UCS-4 encoded string
985 * @len: the maximum length of @str to use. If @len < 0, then
986 * the string is terminated with a 0 character.
987 * @items_read: location to store number of characters read read, or %NULL.
988 * @items_written: location to store number of bytes written or %NULL.
989 * The value here stored does not include the trailing 0
992 * Convert a string from a 32-bit fixed width representation as UCS-4.
993 * to UTF-8. The result will be terminated with a 0 byte.
995 * Return value: a pointer to a newly allocated UTF-8 string.
996 * This value must be freed with free(). If an
997 * error occurs, %NULL will be returned and
1001 stringprep_ucs4_to_utf8 (const uint32_t * str
, ssize_t len
,
1002 size_t * items_read
, size_t * items_written
)
1004 return g_ucs4_to_utf8 (str
, len
, (glong
*) items_read
,
1005 (glong
*) items_written
, NULL
);
1009 * stringprep_utf8_nfkc_normalize:
1010 * @str: a UTF-8 encoded string.
1011 * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
1013 * Converts a string into canonical form, standardizing
1014 * such issues as whether a character with an accent
1015 * is represented as a base character and combining
1016 * accent or as a single precomposed character.
1018 * The normalization mode is NFKC (ALL COMPOSE). It standardizes
1019 * differences that do not affect the text content, such as the
1020 * above-mentioned accent representation. It standardizes the
1021 * "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to
1022 * the standard forms (in this case DIGIT THREE). Formatting
1023 * information may be lost but for most text operations such
1024 * characters should be considered the same. It returns a result with
1025 * composed forms rather than a maximally decomposed form.
1027 * Return value: a newly allocated string, that is the
1028 * NFKC normalized form of @str.
1031 stringprep_utf8_nfkc_normalize (const char *str
, ssize_t len
)
1033 return g_utf8_normalize (str
, len
, G_NORMALIZE_NFKC
);
1037 * stringprep_ucs4_nfkc_normalize:
1038 * @str: a Unicode string.
1039 * @len: length of @str array, or -1 if @str is nul-terminated.
1041 * Converts UCS4 string into UTF-8 and runs
1042 * stringprep_utf8_nfkc_normalize().
1044 * Return value: a newly allocated Unicode string, that is the NFKC
1045 * normalized form of @str.
1048 stringprep_ucs4_nfkc_normalize (uint32_t * str
, ssize_t len
)
1051 uint32_t *result_wc
;
1053 p
= stringprep_ucs4_to_utf8 (str
, len
, 0, 0);
1054 result_wc
= _g_utf8_normalize_wc (p
, -1, G_NORMALIZE_NFKC
);