1 /* idna.c Convert to or from IDN strings.
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 * @in: input array with unicode code points.
29 * @inlen: length of input array with unicode code points.
30 * @out: output zero terminated string that must have room for at
31 * least 63 characters plus the terminating zero.
32 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
34 * The ToASCII operation takes a sequence of Unicode code points that make
35 * up one label and transforms it into a sequence of code points in the
36 * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
37 * resulting sequence are equivalent labels.
39 * It is important to note that the ToASCII operation can fail. ToASCII
40 * fails if any step of it fails. If any step of the ToASCII operation
41 * fails on any label in a domain name, that domain name MUST NOT be used
42 * as an internationalized domain name. The method for deadling with this
43 * failure is application-specific.
45 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
46 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
47 * sequence of ASCII code points or a failure condition.
49 * ToASCII never alters a sequence of code points that are all in the ASCII
50 * range to begin with (although it could fail). Applying the ToASCII
51 * operation multiple times has exactly the same effect as applying it just
54 * Return value: Returns 0 on success, or an error code.
57 idna_to_ascii_4i (const uint32_t * in
, size_t inlen
, char *out
, int flags
)
60 uint32_t *src
; /* XXX don't need to copy data? */
65 * ToASCII consists of the following steps:
67 * 1. If all code points in the sequence are in the ASCII range (0..7F)
68 * then skip to step 3.
76 for (i
= 0; i
< inlen
; i
++)
81 src
= malloc (sizeof (in
[0]) * (inlen
+ 1));
83 return IDNA_MALLOC_ERROR
;
85 memcpy (src
, in
, sizeof (in
[0]) * inlen
);
93 * 2. Perform the steps specified in [NAMEPREP] and fail if there is
94 * an error. The AllowUnassigned flag is used in [NAMEPREP].
100 p
= stringprep_ucs4_to_utf8 (in
, inlen
, NULL
, NULL
);
102 return IDNA_MALLOC_ERROR
;
107 len
= 2 * len
+ 10; /* XXX better guess? */
108 p
= realloc (p
, len
);
110 return IDNA_MALLOC_ERROR
;
112 if (flags
& IDNA_ALLOW_UNASSIGNED
)
113 rc
= stringprep_nameprep (p
, len
);
115 rc
= stringprep_nameprep_no_unassigned (p
, len
);
117 while (rc
== STRINGPREP_TOO_SMALL_BUFFER
);
119 if (rc
!= STRINGPREP_OK
)
122 return IDNA_STRINGPREP_ERROR
;
125 src
= stringprep_utf8_to_ucs4 (p
, -1, NULL
);
132 * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
134 * (a) Verify the absence of non-LDH ASCII code points; that is,
135 * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
137 * (b) Verify the absence of leading and trailing hyphen-minus;
138 * that is, the absence of U+002D at the beginning and end of
142 if (flags
& IDNA_USE_STD3_ASCII_RULES
)
146 for (i
= 0; src
[i
]; i
++)
147 if (src
[i
] <= 0x2C || src
[i
] == 0x2E || src
[i
] == 0x2F ||
148 (src
[i
] >= 0x3A && src
[i
] <= 0x40) ||
149 (src
[i
] >= 0x5B && src
[i
] <= 0x60) ||
150 (src
[i
] >= 0x7B && src
[i
] <= 0x7F))
153 return IDNA_CONTAINS_LDH
;
156 if (src
[0] == 0x002D || (i
> 0 && src
[i
- 1] == 0x002D))
159 return IDNA_CONTAINS_MINUS
;
164 * 4. If all code points in the sequence are in the ASCII range
165 * (0..7F), then skip to step 8.
173 for (i
= 0; src
[i
]; i
++)
177 /* copy string to output buffer if we are about to skip to step8 */
188 * 5. Verify that the sequence does NOT begin with the ACE prefix.
197 for (i
= 0; match
&& i
< strlen (IDNA_ACE_PREFIX
); i
++)
198 if (((uint32_t) IDNA_ACE_PREFIX
[i
] & 0xFF) != src
[i
])
203 return IDNA_CONTAINS_ACE_PREFIX
;
208 * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
209 * and fail if there is an error.
211 for (len
= 0; src
[len
]; len
++)
214 outlen
= 63 - strlen (IDNA_ACE_PREFIX
);
215 rc
= punycode_encode (len
, src
, NULL
,
216 &outlen
, &out
[strlen (IDNA_ACE_PREFIX
)]);
218 if (rc
!= PUNYCODE_SUCCESS
)
219 return IDNA_PUNYCODE_ERROR
;
220 out
[strlen (IDNA_ACE_PREFIX
) + outlen
] = '\0';
223 * 7. Prepend the ACE prefix.
226 memcpy (out
, IDNA_ACE_PREFIX
, strlen (IDNA_ACE_PREFIX
));
229 * 8. Verify that the number of code points is in the range 1 to 63
234 if (strlen (out
) < 1 || strlen (out
) > 63)
235 return IDNA_INVALID_LENGTH
;
241 idna_to_unicode_internal (char *utf8in
, size_t utf8len
,
242 uint32_t * out
, size_t * outlen
, int flags
)
248 * 1. If all code points in the sequence are in the ASCII range (0..7F)
249 * then skip to step 3.
257 for (i
= 0; utf8in
[i
]; i
++)
258 if (utf8in
[i
] & ~0x7F)
265 * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
266 * error. (If step 3 of ToASCII is also performed here, it will not
267 * affect the overall behavior of ToUnicode, but it is not
268 * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
271 if (flags
& IDNA_ALLOW_UNASSIGNED
)
272 rc
= stringprep_nameprep (utf8in
, utf8len
);
274 rc
= stringprep_nameprep_no_unassigned (utf8in
, utf8len
);
276 if (rc
!= STRINGPREP_OK
)
277 return IDNA_STRINGPREP_ERROR
;
279 /* 3. Verify that the sequence begins with the ACE prefix, and save a
280 * copy of the sequence.
284 if (memcmp (IDNA_ACE_PREFIX
, utf8in
, strlen (IDNA_ACE_PREFIX
)) != 0)
285 return IDNA_NO_ACE_PREFIX
;
287 /* 4. Remove the ACE prefix.
290 memmove (utf8in
, &utf8in
[strlen (IDNA_ACE_PREFIX
)],
291 strlen (utf8in
) - strlen (IDNA_ACE_PREFIX
) + 1);
293 /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
294 * and fail if there is an error. Save a copy of the result of
298 (*outlen
)--; /* reserve one for the zero */
300 rc
= punycode_decode (strlen (utf8in
), utf8in
, outlen
, out
, NULL
);
301 if (rc
!= PUNYCODE_SUCCESS
)
302 return IDNA_PUNYCODE_ERROR
;
304 out
[*outlen
] = 0; /* add zero */
309 rc
= idna_to_ascii_4i (out
, *outlen
, tmpout
, flags
);
310 if (rc
!= IDNA_SUCCESS
)
313 /* 7. Verify that the result of step 6 matches the saved copy from
314 * step 3, using a case-insensitive ASCII comparison.
317 if (strcasecmp (utf8in
, tmpout
+ strlen (IDNA_ACE_PREFIX
)) != 0)
318 return IDNA_ROUNDTRIP_VERIFY_ERROR
;
320 /* 8. Return the saved copy from step 5.
327 * idna_to_unicode_44i
328 * @in: input array with unicode code points.
329 * @inlen: length of input array with unicode code points.
330 * @out: output array with unicode code points.
331 * @outlen: on input, maximum size of output array with unicode code points,
332 * on exit, actual size of output array with unicode code points.
333 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
335 * The ToUnicode operation takes a sequence of Unicode code points
336 * that make up one label and returns a sequence of Unicode code
337 * points. If the input sequence is a label in ACE form, then the
338 * result is an equivalent internationalized label that is not in ACE
339 * form, otherwise the original sequence is returned unaltered.
341 * ToUnicode never fails. If any step fails, then the original input
342 * sequence is returned immediately in that step.
344 * The ToUnicode output never contains more code points than its
345 * input. Note that the number of octets needed to represent a
346 * sequence of code points depends on the particular character
349 * The inputs to ToUnicode are a sequence of code points, the
350 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
351 * ToUnicode is always a sequence of Unicode code points.
353 * Return value: Returns error condition, but it must only be used for
354 * debugging purposes. The output buffer is always
355 * guaranteed to contain the correct data according to
356 * the specification (sans malloc induced errors). NB!
357 * This means that you normally ignore the return code
358 * from this function, as checking it means breaking the
362 idna_to_unicode_44i (const uint32_t * in
, size_t inlen
,
363 uint32_t * out
, size_t * outlen
, int flags
)
366 size_t outlensave
= *outlen
;
369 p
= stringprep_ucs4_to_utf8 (in
, inlen
, NULL
, NULL
);
371 return IDNA_MALLOC_ERROR
;
373 p
= realloc (p
, BUFSIZ
);
375 return IDNA_MALLOC_ERROR
;
377 rc
= idna_to_unicode_internal (p
, BUFSIZ
, out
, outlen
, flags
);
378 if (rc
!= IDNA_SUCCESS
)
380 memcpy (out
, in
, sizeof (in
[0]) * (inlen
< outlensave
?
381 inlen
: outlensave
));
390 /* Wrappers that handle several labels */
394 * @input: zero terminated input Unicode string.
395 * @output: pointer to newly allocated output string.
396 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
398 * Convert UCS-4 domain name to ASCII string. The domain name may
399 * contain several labels, separated by dots. The output buffer must
400 * be deallocated by the caller.
402 * Return value: Returns IDNA_SUCCESS on success, or error code.
405 idna_to_ascii_4z (const uint32_t * input
, char **output
, int flags
)
407 const uint32_t *start
= input
;
408 const uint32_t *end
= input
;
419 /* 1) Whenever dots are used as label separators, the following
420 characters MUST be recognized as dots: U+002E (full stop),
421 U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
422 U+FF61 (halfwidth ideographic full stop). */
425 *end
!= 0x3002 && *end
!= 0xFF0E && *end
!= 0xFF61; end
++)
428 rc
= idna_to_ascii_4i (start
, end
- start
, buf
, flags
);
429 if (rc
!= IDNA_SUCCESS
)
434 out
= realloc (out
, strlen (out
) + 1 + strlen (buf
) + 1);
436 return IDNA_MALLOC_ERROR
;
442 out
= (char *) strdup (buf
);
444 return IDNA_MALLOC_ERROR
;
458 * @input: zero terminated input UTF-8 string.
459 * @output: pointer to newly allocated output string.
460 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
462 * Convert UTF-8 domain name to ASCII string. The domain name may
463 * contain several labels, separated by dots. The output buffer must
464 * be deallocated by the caller.
466 * Return value: Returns IDNA_SUCCESS on success, or error code.
469 idna_to_ascii_8z (const char *input
, char **output
, int flags
)
475 ucs4
= stringprep_utf8_to_ucs4 (input
, -1, &ucs4len
);
477 return IDNA_ICONV_ERROR
;
479 rc
= idna_to_ascii_4z (ucs4
, output
, flags
);
489 * @input: zero terminated input UTF-8 string.
490 * @output: pointer to newly allocated output string.
491 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
493 * Convert domain name in the locale's encoding to ASCII string. The
494 * domain name may contain several labels, separated by dots. The
495 * output buffer must be deallocated by the caller.
497 * Return value: Returns IDNA_SUCCESS on success, or error code.
500 idna_to_ascii_lz (const char *input
, char **output
, int flags
)
505 utf8
= stringprep_locale_to_utf8 (input
);
507 return IDNA_ICONV_ERROR
;
509 rc
= idna_to_ascii_8z (utf8
, output
, flags
);
517 * idna_to_unicode_4z4z:
518 * @input: zero-terminated Unicode string.
519 * @output: pointer to newly allocated output Unicode string.
520 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
522 * Convert possibly ACE encoded domain name in UCS-4 format into a
523 * UCS-4 string. The domain name may contain several labels,
524 * separated by dots. The output buffer must be deallocated by the
527 * Return value: Returns IDNA_SUCCESS on success, or error code.
530 idna_to_unicode_4z4z (const uint32_t * input
, uint32_t ** output
, int flags
)
532 const uint32_t *start
= input
;
533 const uint32_t *end
= input
;
536 uint32_t *out
= NULL
;
546 /* 1) Whenever dots are used as label separators, the following
547 characters MUST be recognized as dots: U+002E (full stop),
548 U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
549 U+FF61 (halfwidth ideographic full stop). */
552 *end
!= 0x3002 && *end
!= 0xFF0E && *end
!= 0xFF61; end
++)
555 buflen
= end
- start
;
556 buf
= malloc (sizeof (buf
[0]) * (buflen
+ 1));
558 return IDNA_MALLOC_ERROR
;
560 rc
= idna_to_unicode_44i (start
, end
- start
, buf
, &buflen
, flags
);
561 /* don't check rc as per specification! */
565 out
= realloc (out
, sizeof (out
[0]) * (outlen
+ 1 + buflen
+ 1));
567 return IDNA_MALLOC_ERROR
;
568 out
[outlen
++] = 0x002E; /* '.' (full stop) */
569 memcpy (out
+ outlen
, buf
, sizeof (buf
[0]) * buflen
);
591 * idna_to_unicode_8z4z:
592 * @input: zero-terminated UTF-8 string.
593 * @output: pointer to newly allocated output Unicode string.
594 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
596 * Convert possibly ACE encoded domain name in UTF-8 format into a
597 * UCS-4 string. The domain name may contain several labels,
598 * separated by dots. The output buffer must be deallocated by the
601 * Return value: Returns IDNA_SUCCESS on success, or error code.
604 idna_to_unicode_8z4z (const char *input
, uint32_t ** output
, int flags
)
610 ucs4
= stringprep_utf8_to_ucs4 (input
, -1, &ucs4len
);
612 return IDNA_ICONV_ERROR
;
614 rc
= idna_to_unicode_4z4z (ucs4
, output
, flags
);
621 * idna_to_unicode_8z8z:
622 * @input: zero-terminated UTF-8 string.
623 * @output: pointer to newly allocated output UTF-8 string.
624 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
626 * Convert possibly ACE encoded domain name in UTF-8 format into a
627 * UTF-8 string. The domain name may contain several labels,
628 * separated by dots. The output buffer must be deallocated by the
631 * Return value: Returns IDNA_SUCCESS on success, or error code.
634 idna_to_unicode_8z8z (const char *input
, char **output
, int flags
)
639 rc
= idna_to_unicode_8z4z (input
, &ucs4
, flags
);
640 *output
= stringprep_ucs4_to_utf8 (ucs4
, -1, NULL
, NULL
);
644 return IDNA_ICONV_ERROR
;
650 * idna_to_unicode_8zlz:
651 * @input: zero-terminated UTF-8 string.
652 * @output: pointer to newly allocated output string encoded in the
653 * current locale's character set.
654 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
656 * Convert possibly ACE encoded domain name in UTF-8 format into a
657 * string encoded in the current locale's character set. The domain
658 * name may contain several labels, separated by dots. The output
659 * buffer must be deallocated by the caller.
661 * Return value: Returns IDNA_SUCCESS on success, or error code.
664 idna_to_unicode_8zlz (const char *input
, char **output
, int flags
)
669 rc
= idna_to_unicode_8z8z (input
, &utf8
, flags
);
670 *output
= stringprep_utf8_to_locale (utf8
);
674 return IDNA_ICONV_ERROR
;
680 * idna_to_unicode_lzlz:
681 * @input: zero-terminated string encoded in the current locale's
683 * @output: pointer to newly allocated output string encoded in the
684 * current locale's character set.
685 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
687 * Convert possibly ACE encoded domain name in the locale's character
688 * set into a string encoded in the current locale's character set.
689 * The domain name may contain several labels, separated by dots. The
690 * output buffer must be deallocated by the caller.
692 * Return value: Returns IDNA_SUCCESS on success, or error code.
695 idna_to_unicode_lzlz (const char *input
, char **output
, int flags
)
700 utf8
= stringprep_locale_to_utf8 (input
);
702 return IDNA_ICONV_ERROR
;
704 rc
= idna_to_unicode_8zlz (utf8
, output
, flags
);
711 /* Deprecated interfaces */
715 * @in: input array with unicode code points.
716 * @inlen: length of input array with unicode code points.
717 * @out: output zero terminated string that must have room for at
718 * least 63 characters plus the terminating zero.
719 * @allowunassigned: whether to allow unassigned code points.
720 * @usestd3asciirules: whether to check input for STD3 compliance.
722 * The ToASCII operation takes a sequence of Unicode code points that make
723 * up one label and transforms it into a sequence of code points in the
724 * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
725 * resulting sequence are equivalent labels.
727 * It is important to note that the ToASCII operation can fail. ToASCII
728 * fails if any step of it fails. If any step of the ToASCII operation
729 * fails on any label in a domain name, that domain name MUST NOT be used
730 * as an internationalized domain name. The method for deadling with this
731 * failure is application-specific.
733 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
734 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
735 * sequence of ASCII code points or a failure condition.
737 * ToASCII never alters a sequence of code points that are all in the ASCII
738 * range to begin with (although it could fail). Applying the ToASCII
739 * operation multiple times has exactly the same effect as applying it just
742 * Return value: Returns 0 on success, or an error code.
745 idna_to_ascii (const unsigned long *in
, size_t inlen
,
746 char *out
, int allowunassigned
, int usestd3asciirules
)
753 tmp
= malloc (sizeof (tmp
[0]) * inlen
);
755 return IDNA_MALLOC_ERROR
;
758 flags
|= IDNA_ALLOW_UNASSIGNED
;
759 if (usestd3asciirules
)
760 flags
|= IDNA_USE_STD3_ASCII_RULES
;
762 for (i
= 0; i
< inlen
; i
++)
764 rc
= idna_to_ascii_4i (tmp
, inlen
, out
, flags
);
772 * @in: input array with unicode code points.
773 * @inlen: length of input array with unicode code points.
774 * @out: output array with unicode code points.
775 * @outlen: on input, maximum size of output array with unicode code points,
776 * on exit, actual size of output array with unicode code points.
777 * @allowunassigned: whether to allow unassigned code points.
778 * @usestd3asciirules: whether to check input for STD3 compliance.
780 * The ToUnicode operation takes a sequence of Unicode code points
781 * that make up one label and returns a sequence of Unicode code
782 * points. If the input sequence is a label in ACE form, then the
783 * result is an equivalent internationalized label that is not in ACE
784 * form, otherwise the original sequence is returned unaltered.
786 * ToUnicode never fails. If any step fails, then the original input
787 * sequence is returned immediately in that step.
789 * The ToUnicode output never contains more code points than its
790 * input. Note that the number of octets needed to represent a
791 * sequence of code points depends on the particular character
794 * The inputs to ToUnicode are a sequence of code points, the
795 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
796 * ToUnicode is always a sequence of Unicode code points.
798 * Return value: Returns error condition, but it must only be used for
799 * debugging purposes. The output buffer is always
800 * guaranteed to contain the correct data according to
801 * the specification (sans malloc induced errors). NB!
802 * This means that you normally ignore the return code
803 * from this function, as checking it means breaking the
807 idna_to_unicode (const unsigned long *in
, size_t inlen
,
808 unsigned long *out
, size_t * outlen
,
809 int allowunassigned
, int usestd3asciirules
)
817 tmpin
= malloc (sizeof (tmpin
[0]) * inlen
);
819 return IDNA_MALLOC_ERROR
;
820 tmpout
= malloc (sizeof (tmpout
[0]) * *outlen
);
822 return IDNA_MALLOC_ERROR
;
825 flags
|= IDNA_ALLOW_UNASSIGNED
;
826 if (usestd3asciirules
)
827 flags
|= IDNA_USE_STD3_ASCII_RULES
;
829 for (i
= 0; i
< inlen
; i
++)
831 rc
= idna_to_unicode_44i (tmpin
, inlen
, tmpout
, outlen
, flags
);
834 for (i
= 0; i
< *outlen
; i
++)
844 * idna_to_ascii_from_ucs4:
845 * @input: zero terminated input Unicode string.
846 * @output: pointer to newly allocated output string.
847 * @allowunassigned: whether to allow unassigned code points.
848 * @usestd3asciirules: whether to check input for STD3 compliance.
850 * Convert UCS-4 domain name to ASCII string. The domain name may
851 * contain several labels, separated by dots. The output buffer must
852 * be deallocated by the caller.
854 * Return value: Returns IDNA_SUCCESS on success, or error code.
857 idna_to_ascii_from_ucs4 (const unsigned long *input
, char **output
,
858 int allowunassigned
, int usestd3asciirules
)
866 for (inlen
= 0; input
[inlen
]; inlen
++)
868 tmp
= malloc (sizeof (tmp
[0]) * (inlen
+ 1));
870 return IDNA_MALLOC_ERROR
;
873 flags
|= IDNA_ALLOW_UNASSIGNED
;
874 if (usestd3asciirules
)
875 flags
|= IDNA_USE_STD3_ASCII_RULES
;
877 for (i
= 0; i
< inlen
; i
++)
880 rc
= idna_to_ascii_4z (tmp
, output
, flags
);
887 * idna_to_ascii_from_utf8:
888 * @input: zero terminated input UTF-8 string.
889 * @output: pointer to newly allocated output string.
890 * @allowunassigned: whether to allow unassigned code points.
891 * @usestd3asciirules: whether to check input for STD3 compliance.
893 * Convert UTF-8 domain name to ASCII string. The domain name may
894 * contain several labels, separated by dots. The output buffer must
895 * be deallocated by the caller.
897 * Return value: Returns IDNA_SUCCESS on success, or error code.
900 idna_to_ascii_from_utf8 (const char *input
, char **output
,
901 int allowunassigned
, int usestd3asciirules
)
907 flags
|= IDNA_ALLOW_UNASSIGNED
;
908 if (usestd3asciirules
)
909 flags
|= IDNA_USE_STD3_ASCII_RULES
;
911 rc
= idna_to_ascii_8z (input
, output
, flags
);
917 * idna_to_ascii_from_locale:
918 * @input: zero terminated input UTF-8 string.
919 * @output: pointer to newly allocated output string.
920 * @allowunassigned: whether to allow unassigned code points.
921 * @usestd3asciirules: whether to check input for STD3 compliance.
923 * Convert domain name in the locale's encoding to ASCII string. The
924 * domain name may contain several labels, separated by dots. The
925 * output buffer must be deallocated by the caller.
927 * Return value: Returns IDNA_SUCCESS on success, or error code.
930 idna_to_ascii_from_locale (const char *input
, char **output
,
931 int allowunassigned
, int usestd3asciirules
)
937 flags
|= IDNA_ALLOW_UNASSIGNED
;
938 if (usestd3asciirules
)
939 flags
|= IDNA_USE_STD3_ASCII_RULES
;
941 rc
= idna_to_ascii_lz (input
, output
, flags
);
947 * idna_to_unicode_ucs4_from_ucs4:
948 * @input: zero-terminated Unicode string.
949 * @output: pointer to newly allocated output Unicode string.
950 * @allowunassigned: whether to allow unassigned code points.
951 * @usestd3asciirules: whether to check input for STD3 compliance.
953 * Convert possibly ACE encoded domain name in UCS-4 format into a
954 * UCS-4 string. The domain name may contain several labels,
955 * separated by dots. The output buffer must be deallocated by the
958 * Return value: Returns IDNA_SUCCESS on success, or error code.
961 idna_to_unicode_ucs4_from_ucs4 (const unsigned long *input
,
962 unsigned long **output
,
963 int allowunassigned
, int usestd3asciirules
)
965 size_t inlen
, tmpoutlen
;
972 for (inlen
= 0; input
[inlen
]; inlen
++)
974 tmpin
= malloc (sizeof (tmpin
[0]) * (inlen
+ 1));
976 return IDNA_MALLOC_ERROR
;
979 flags
|= IDNA_ALLOW_UNASSIGNED
;
980 if (usestd3asciirules
)
981 flags
|= IDNA_USE_STD3_ASCII_RULES
;
983 for (i
= 0; i
< inlen
; i
++)
986 rc
= idna_to_unicode_4z4z (tmpin
, &tmpout
, flags
);
989 for (tmpoutlen
= 0; tmpout
[tmpoutlen
]; tmpoutlen
++)
992 *output
= malloc (sizeof (output
[0]) * (tmpoutlen
+ 1));
994 return IDNA_MALLOC_ERROR
;
996 for (i
= 0; i
< tmpoutlen
; i
++)
997 (*output
)[i
] = tmpout
[i
];
1004 * idna_to_unicode_ucs4_from_utf8:
1005 * @input: zero-terminated UTF-8 string.
1006 * @output: pointer to newly allocated output Unicode string.
1007 * @allowunassigned: whether to allow unassigned code points.
1008 * @usestd3asciirules: whether to check input for STD3 compliance.
1010 * Convert possibly ACE encoded domain name in UTF-8 format into a
1011 * UCS-4 string. The domain name may contain several labels,
1012 * separated by dots. The output buffer must be deallocated by the
1015 * Return value: Returns IDNA_SUCCESS on success, or error code.
1018 idna_to_unicode_ucs4_from_utf8 (const char *input
, unsigned long **output
,
1019 int allowunassigned
, int usestd3asciirules
)
1021 size_t tmpinlen
, tmpoutlen
;
1028 tmpin
= stringprep_utf8_to_ucs4 (input
, -1, &tmpinlen
);
1030 return IDNA_ICONV_ERROR
;
1032 if (allowunassigned
)
1033 flags
|= IDNA_ALLOW_UNASSIGNED
;
1034 if (usestd3asciirules
)
1035 flags
|= IDNA_USE_STD3_ASCII_RULES
;
1037 rc
= idna_to_unicode_4z4z (tmpin
, &tmpout
, flags
);
1040 for (tmpoutlen
= 0; tmpout
[tmpoutlen
]; tmpoutlen
++)
1043 *output
= malloc (sizeof (output
[0]) * (tmpoutlen
+ 1));
1045 return IDNA_MALLOC_ERROR
;
1047 for (i
= 0; i
< tmpoutlen
; i
++)
1048 (*output
)[i
] = tmpout
[i
];
1055 * idna_to_unicode_utf8_from_utf8:
1056 * @input: zero-terminated UTF-8 string.
1057 * @output: pointer to newly allocated output UTF-8 string.
1058 * @allowunassigned: whether to allow unassigned code points.
1059 * @usestd3asciirules: whether to check input for STD3 compliance.
1061 * Convert possibly ACE encoded domain name in UTF-8 format into a
1062 * UTF-8 string. The domain name may contain several labels,
1063 * separated by dots. The output buffer must be deallocated by the
1066 * Return value: Returns IDNA_SUCCESS on success, or error code.
1069 idna_to_unicode_utf8_from_utf8 (const char *input
, char **output
,
1070 int allowunassigned
, int usestd3asciirules
)
1075 if (allowunassigned
)
1076 flags
|= IDNA_ALLOW_UNASSIGNED
;
1077 if (usestd3asciirules
)
1078 flags
|= IDNA_USE_STD3_ASCII_RULES
;
1080 rc
= idna_to_unicode_8z8z (input
, output
, flags
);
1086 * idna_to_unicode_locale_from_utf8:
1087 * @input: zero-terminated UTF-8 string.
1088 * @output: pointer to newly allocated output string encoded in the
1089 * current locale's character set.
1090 * @allowunassigned: whether to allow unassigned code points.
1091 * @usestd3asciirules: whether to check input for STD3 compliance.
1093 * Convert possibly ACE encoded domain name in UTF-8 format into a
1094 * string encoded in the current locale's character set. The
1095 * The domain name may contain several labels, separated by dots. The
1096 * output buffer must be deallocated by the caller.
1098 * Return value: Returns IDNA_SUCCESS on success, or error code.
1101 idna_to_unicode_locale_from_utf8 (const char *input
, char **output
,
1102 int allowunassigned
, int usestd3asciirules
)
1107 if (allowunassigned
)
1108 flags
|= IDNA_ALLOW_UNASSIGNED
;
1109 if (usestd3asciirules
)
1110 flags
|= IDNA_USE_STD3_ASCII_RULES
;
1112 rc
= idna_to_unicode_8zlz (input
, output
, flags
);
1118 * idna_to_unicode_locale_from_locale:
1119 * @input: zero-terminated string encoded in the current locale's
1121 * @output: pointer to newly allocated output string encoded in the
1122 * current locale's character set.
1123 * @allowunassigned: whether to allow unassigned code points.
1124 * @usestd3asciirules: whether to check input for STD3 compliance.
1126 * Convert possibly ACE encoded domain name in the locale's character
1127 * set into a string encoded in the current locale's character set.
1128 * The domain name may contain several labels, separated by dots. The
1129 * output buffer must be deallocated by the caller.
1131 * Return value: Returns IDNA_SUCCESS on success, or error code.
1134 idna_to_unicode_locale_from_locale (const char *input
, char **output
,
1135 int allowunassigned
,
1136 int usestd3asciirules
)
1141 if (allowunassigned
)
1142 flags
|= IDNA_ALLOW_UNASSIGNED
;
1143 if (usestd3asciirules
)
1144 flags
|= IDNA_USE_STD3_ASCII_RULES
;
1146 rc
= idna_to_unicode_lzlz (input
, output
, flags
);
1152 /* Deprecated interfaces (even older) */
1157 * @input: zero terminated input Unicode string.
1158 * @output: pointer to newly allocated output string.
1160 * Convert UCS-4 domain name to ASCII string. The AllowUnassigned
1161 * flag is false and std3asciirules flag is false. The domain name
1162 * may contain several labels, separated by dots. The output buffer
1163 * must be deallocated by the caller.
1165 * This function is deprecated in favor of idna_to_ascii_from_ucs4()
1166 * and will be removed in future versions.
1168 * Return value: Returns IDNA_SUCCESS on success, or error code.
1171 idna_ucs4_to_ace (const unsigned long *input
, char **output
)
1173 return idna_to_ascii_from_ucs4 (input
, output
, 0, 0);
1178 * @input: zero terminated input UTF-8 string.
1179 * @output: pointer to newly allocated output string.
1181 * Convert UTF-8 domain name to ASCII string. The AllowUnassigned
1182 * flag is false and std3asciirules flag is false. The domain name
1183 * may contain several labels, separated by dots. The output buffer
1184 * must be deallocated by the caller.
1186 * This function is deprecated in favor of idna_to_ascii_from_utf8()
1187 * and will be removed in future versions.
1189 * Return value: Returns IDNA_SUCCESS on success, or error code.
1192 idna_utf8_to_ace (const char *input
, char **output
)
1194 return idna_to_ascii_from_utf8 (input
, output
, 0, 0);
1198 * idna_locale_to_ace:
1199 * @input: zero terminated input UTF-8 string.
1200 * @output: pointer to newly allocated output string.
1202 * Convert domain name in the locale's encoding to ASCII string. The
1203 * AllowUnassigned flag is false and std3asciirules flag is false.
1204 * The domain name may contain several labels, separated by dots. The
1205 * output buffer must be deallocated by the caller.
1207 * This function is deprecated in favor of idna_to_ascii_from_locale()
1208 * and will be removed in future versions.
1210 * Return value: Returns IDNA_SUCCESS on success, or error code.
1213 idna_locale_to_ace (const char *input
, char **output
)
1215 return idna_to_ascii_from_locale (input
, output
, 0, 0);
1219 * idna_ucs4ace_to_ucs4:
1220 * @input: zero-terminated Unicode string.
1221 * @output: pointer to newly allocated output Unicode string.
1223 * Convert possibly ACE encoded domain name in UCS-4 format into a
1224 * UCS-4 string. The AllowUnassigned flag is false and std3asciirules
1225 * flag is false. The domain name may contain several labels,
1226 * separated by dots. The output buffer must be deallocated by the
1229 * This function is deprecated in favor of
1230 * idna_to_unicode_ucs4_from_ucs4() and will be removed in future
1233 * Return value: Returns IDNA_SUCCESS on success, or error code.
1236 idna_ucs4ace_to_ucs4 (const unsigned long *input
, unsigned long **output
)
1238 return idna_to_unicode_ucs4_from_ucs4 (input
, output
, 0, 0);
1242 * idna_utf8ace_to_ucs4:
1243 * @input: zero-terminated UTF-8 string.
1244 * @output: pointer to newly allocated output Unicode string.
1246 * Convert possibly ACE encoded domain name in UTF-8 format into a
1247 * UCS-4 string. The AllowUnassigned flag is false and std3asciirules
1248 * flag is false. The domain name may contain several labels,
1249 * separated by dots. The output buffer must be deallocated by the
1252 * This function is deprecated in favor of
1253 * idna_to_unicode_ucs4_from_utf8() and will be removed in future
1256 * Return value: Returns IDNA_SUCCESS on success, or error code.
1259 idna_utf8ace_to_ucs4 (const char *input
, unsigned long **output
)
1261 return idna_to_unicode_ucs4_from_utf8 (input
, output
, 0, 0);
1265 * idna_utf8ace_to_utf8:
1266 * @input: zero-terminated UTF-8 string.
1267 * @output: pointer to newly allocated output UTF-8 string.
1269 * Convert possibly ACE encoded domain name in UTF-8 format into a
1270 * UTF-8 string. The AllowUnassigned flag is false and std3asciirules
1271 * flag is false. The domain name may contain several labels,
1272 * separated by dots. The output buffer must be deallocated by the
1275 * This function is deprecated in favor of
1276 * idna_to_unicode_utf8_from_utf8() and will be removed in future
1279 * Return value: Returns IDNA_SUCCESS on success, or error code.
1282 idna_utf8ace_to_utf8 (const char *input
, char **output
)
1284 return idna_to_unicode_utf8_from_utf8 (input
, output
, 0, 0);
1288 * idna_utf8ace_to_locale:
1289 * @input: zero-terminated UTF-8 string.
1290 * @output: pointer to newly allocated output string encoded in the
1291 * current locale's character set.
1293 * Convert possibly ACE encoded domain name in UTF-8 format into a
1294 * string encoded in the current locale's character set. The
1295 * AllowUnassigned flag is false and std3asciirules flag is false.
1296 * The domain name may contain several labels, separated by dots. The
1297 * output buffer must be deallocated by the caller.
1299 * This function is deprecated in favor of
1300 * idna_to_unicode_locale_from_utf8() and will be removed in future
1303 * Return value: Returns IDNA_SUCCESS on success, or error code.
1306 idna_utf8ace_to_locale (const char *input
, char **output
)
1308 return idna_to_unicode_locale_from_utf8 (input
, output
, 0, 0);
1312 * idna_localeace_to_locale:
1313 * @input: zero-terminated string encoded in the current locale's
1315 * @output: pointer to newly allocated output string encoded in the
1316 * current locale's character set.
1318 * Convert possibly ACE encoded domain name in the locale's character
1319 * set into a string encoded in the current locale's character set.
1320 * The AllowUnassigned flag is false and std3asciirules flag is false.
1321 * The domain name may contain several labels, separated by dots. The
1322 * output buffer must be deallocated by the caller.
1324 * This function is deprecated in favor of
1325 * idna_to_unicode_locale_from_locale() and will be removed in future
1328 * Return value: Returns IDNA_SUCCESS on success, or error code.
1331 idna_localeace_to_locale (const char *input
, char **output
)
1333 return idna_to_unicode_locale_from_locale (input
, output
, 0, 0);