(Tld_table): Change valid definition.
[libidn.git] / lib / idna.c
blob69c928fc42c35b8fe99555ce574cdf2899dcb2b1
1 /* idna.c Convert to or from IDN strings.
2 * Copyright (C) 2002, 2003, 2004 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #if HAVE_CONFIG_H
23 # include "config.h"
24 #endif
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stringprep.h>
29 #include <punycode.h>
31 #include "idna.h"
33 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
34 (c) == 0xFF0E || (c) == 0xFF61)
36 /* Core functions */
38 /**
39 * idna_to_ascii_4i
40 * @in: input array with unicode code points.
41 * @inlen: length of input array with unicode code points.
42 * @out: output zero terminated string that must have room for at
43 * least 63 characters plus the terminating zero.
44 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
46 * The ToASCII operation takes a sequence of Unicode code points that make
47 * up one label and transforms it into a sequence of code points in the
48 * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
49 * resulting sequence are equivalent labels.
51 * It is important to note that the ToASCII operation can fail. ToASCII
52 * fails if any step of it fails. If any step of the ToASCII operation
53 * fails on any label in a domain name, that domain name MUST NOT be used
54 * as an internationalized domain name. The method for deadling with this
55 * failure is application-specific.
57 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
58 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
59 * sequence of ASCII code points or a failure condition.
61 * ToASCII never alters a sequence of code points that are all in the ASCII
62 * range to begin with (although it could fail). Applying the ToASCII
63 * operation multiple times has exactly the same effect as applying it just
64 * once.
66 * Return value: Returns 0 on success, or an error code.
68 int
69 idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
71 size_t len, outlen;
72 uint32_t *src; /* XXX don't need to copy data? */
73 int rc;
76 * ToASCII consists of the following steps:
78 * 1. If all code points in the sequence are in the ASCII range (0..7F)
79 * then skip to step 3.
83 size_t i;
84 int inasciirange;
86 inasciirange = 1;
87 for (i = 0; i < inlen; i++)
88 if (in[i] > 0x7F)
89 inasciirange = 0;
90 if (inasciirange)
92 src = malloc (sizeof (in[0]) * (inlen + 1));
93 if (src == NULL)
94 return IDNA_MALLOC_ERROR;
96 memcpy (src, in, sizeof (in[0]) * inlen);
97 src[inlen] = 0;
99 goto step3;
104 * 2. Perform the steps specified in [NAMEPREP] and fail if there is
105 * an error. The AllowUnassigned flag is used in [NAMEPREP].
109 char *p;
111 p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
112 if (p == NULL)
113 return IDNA_MALLOC_ERROR;
115 len = strlen (p);
118 len = 2 * len + 10; /* XXX better guess? */
119 p = realloc (p, len);
120 if (p == NULL)
121 return IDNA_MALLOC_ERROR;
123 if (flags & IDNA_ALLOW_UNASSIGNED)
124 rc = stringprep_nameprep (p, len);
125 else
126 rc = stringprep_nameprep_no_unassigned (p, len);
128 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
130 if (rc != STRINGPREP_OK)
132 free (p);
133 return IDNA_STRINGPREP_ERROR;
136 src = stringprep_utf8_to_ucs4 (p, -1, NULL);
138 free (p);
141 step3:
143 * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
145 * (a) Verify the absence of non-LDH ASCII code points; that is,
146 * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
148 * (b) Verify the absence of leading and trailing hyphen-minus;
149 * that is, the absence of U+002D at the beginning and end of
150 * the sequence.
153 if (flags & IDNA_USE_STD3_ASCII_RULES)
155 size_t i;
157 for (i = 0; src[i]; i++)
158 if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
159 (src[i] >= 0x3A && src[i] <= 0x40) ||
160 (src[i] >= 0x5B && src[i] <= 0x60) ||
161 (src[i] >= 0x7B && src[i] <= 0x7F))
163 free (src);
164 return IDNA_CONTAINS_NON_LDH;
167 if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
169 free (src);
170 return IDNA_CONTAINS_MINUS;
175 * 4. If all code points in the sequence are in the ASCII range
176 * (0..7F), then skip to step 8.
180 size_t i;
181 int inasciirange;
183 inasciirange = 1;
184 for (i = 0; src[i]; i++)
186 if (src[i] > 0x7F)
187 inasciirange = 0;
188 /* copy string to output buffer if we are about to skip to step8 */
189 if (i < 64)
190 out[i] = src[i];
192 if (i < 64)
193 out[i] = '\0';
194 if (inasciirange)
195 goto step8;
199 * 5. Verify that the sequence does NOT begin with the ACE prefix.
204 size_t i;
205 int match;
207 match = 1;
208 for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
209 if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
210 match = 0;
211 if (match)
213 free (src);
214 return IDNA_CONTAINS_ACE_PREFIX;
219 * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
220 * and fail if there is an error.
222 for (len = 0; src[len]; len++)
224 src[len] = '\0';
225 outlen = 63 - strlen (IDNA_ACE_PREFIX);
226 rc = punycode_encode (len, src, NULL,
227 &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
228 if (rc != PUNYCODE_SUCCESS)
230 free (src);
231 return IDNA_PUNYCODE_ERROR;
233 out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
236 * 7. Prepend the ACE prefix.
239 memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
242 * 8. Verify that the number of code points is in the range 1 to 63
243 * inclusive (0 is excluded).
246 step8:
247 free (src);
248 if (strlen (out) < 1 || strlen (out) > 63)
249 return IDNA_INVALID_LENGTH;
251 return IDNA_SUCCESS;
254 /* ToUnicode(). May realloc() utf8in. */
255 static int
256 idna_to_unicode_internal (char *utf8in,
257 uint32_t * out, size_t * outlen, int flags)
259 int rc;
260 char tmpout[64];
261 size_t utf8len = strlen (utf8in) + 1;
262 size_t addlen = 0;
265 * ToUnicode consists of the following steps:
267 * 1. If the sequence contains any code points outside the ASCII range
268 * (0..7F) then proceed to step 2, otherwise skip to step 3.
272 size_t i;
273 int inasciirange;
275 inasciirange = 1;
276 for (i = 0; utf8in[i]; i++)
277 if (utf8in[i] & ~0x7F)
278 inasciirange = 0;
279 if (inasciirange)
280 goto step3;
284 * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
285 * error. (If step 3 of ToASCII is also performed here, it will not
286 * affect the overall behavior of ToUnicode, but it is not
287 * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
291 utf8in = realloc (utf8in, utf8len + addlen);
292 if (!utf8in)
293 return IDNA_MALLOC_ERROR;
294 if (flags & IDNA_ALLOW_UNASSIGNED)
295 rc = stringprep_nameprep (utf8in, utf8len + addlen);
296 else
297 rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
298 addlen += 1;
300 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
302 if (rc != STRINGPREP_OK)
303 return IDNA_STRINGPREP_ERROR;
305 /* 3. Verify that the sequence begins with the ACE prefix, and save a
306 * copy of the sequence.
309 step3:
310 if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
311 return IDNA_NO_ACE_PREFIX;
313 /* 4. Remove the ACE prefix.
316 memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
317 strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
319 /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
320 * and fail if there is an error. Save a copy of the result of
321 * this step.
324 (*outlen)--; /* reserve one for the zero */
326 rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
327 if (rc != PUNYCODE_SUCCESS)
328 return IDNA_PUNYCODE_ERROR;
330 out[*outlen] = 0; /* add zero */
332 /* 6. Apply ToASCII.
335 rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
336 if (rc != IDNA_SUCCESS)
337 return rc;
339 /* 7. Verify that the result of step 6 matches the saved copy from
340 * step 3, using a case-insensitive ASCII comparison.
343 if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
344 return IDNA_ROUNDTRIP_VERIFY_ERROR;
346 /* 8. Return the saved copy from step 5.
349 return IDNA_SUCCESS;
353 * idna_to_unicode_44i
354 * @in: input array with unicode code points.
355 * @inlen: length of input array with unicode code points.
356 * @out: output array with unicode code points.
357 * @outlen: on input, maximum size of output array with unicode code points,
358 * on exit, actual size of output array with unicode code points.
359 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
361 * The ToUnicode operation takes a sequence of Unicode code points
362 * that make up one label and returns a sequence of Unicode code
363 * points. If the input sequence is a label in ACE form, then the
364 * result is an equivalent internationalized label that is not in ACE
365 * form, otherwise the original sequence is returned unaltered.
367 * ToUnicode never fails. If any step fails, then the original input
368 * sequence is returned immediately in that step.
370 * The Punycode decoder can never output more code points than it
371 * inputs, but Nameprep can, and therefore ToUnicode can. Note that
372 * the number of octets needed to represent a sequence of code points
373 * depends on the particular character encoding used.
375 * The inputs to ToUnicode are a sequence of code points, the
376 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
377 * ToUnicode is always a sequence of Unicode code points.
379 * Return value: Returns error condition, but it must only be used for
380 * debugging purposes. The output buffer is always
381 * guaranteed to contain the correct data according to
382 * the specification (sans malloc induced errors). NB!
383 * This means that you normally ignore the return code
384 * from this function, as checking it means breaking the
385 * standard.
388 idna_to_unicode_44i (const uint32_t * in, size_t inlen,
389 uint32_t * out, size_t * outlen, int flags)
391 int rc;
392 size_t outlensave = *outlen;
393 char *p;
395 p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
396 if (p == NULL)
397 return IDNA_MALLOC_ERROR;
399 rc = idna_to_unicode_internal (p, out, outlen, flags);
400 if (rc != IDNA_SUCCESS)
402 memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
403 inlen : outlensave));
404 *outlen = inlen;
407 free (p);
409 return rc;
412 /* Wrappers that handle several labels */
415 * idna_to_ascii_4z:
416 * @input: zero terminated input Unicode string.
417 * @output: pointer to newly allocated output string.
418 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
420 * Convert UCS-4 domain name to ASCII string. The domain name may
421 * contain several labels, separated by dots. The output buffer must
422 * be deallocated by the caller.
424 * Return value: Returns IDNA_SUCCESS on success, or error code.
427 idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
429 const uint32_t *start = input;
430 const uint32_t *end = input;
431 char buf[64];
432 char *out = NULL;
433 int rc;
435 /* 1) Whenever dots are used as label separators, the following
436 characters MUST be recognized as dots: U+002E (full stop),
437 U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
438 U+FF61 (halfwidth ideographic full stop). */
440 if (input[0] == 0)
442 /* Handle implicit zero-length root label. */
443 *output = malloc (1);
444 if (!*output)
445 return IDNA_MALLOC_ERROR;
446 strcpy (*output, "");
447 return IDNA_SUCCESS;
450 if (DOTP (input[0]) && input[1] == 0)
452 /* Handle explicit zero-length root label. */
453 *output = malloc (2);
454 if (!*output)
455 return IDNA_MALLOC_ERROR;
456 strcpy (*output, ".");
457 return IDNA_SUCCESS;
460 *output = NULL;
463 end = start;
465 for (; *end && !DOTP (*end); end++)
468 if (*end == '\0' && start == end)
470 /* Handle explicit zero-length root label. */
471 buf[0] = '\0';
473 else
475 rc = idna_to_ascii_4i (start, end - start, buf, flags);
476 if (rc != IDNA_SUCCESS)
477 return rc;
480 if (out)
482 out = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
483 if (!out)
484 return IDNA_MALLOC_ERROR;
485 strcat (out, ".");
486 strcat (out, buf);
488 else
490 out = (char *) malloc (strlen (buf) + 1);
491 if (!out)
492 return IDNA_MALLOC_ERROR;
493 strcpy (out, buf);
496 start = end + 1;
498 while (*end);
500 *output = out;
502 return IDNA_SUCCESS;
506 * idna_to_ascii_8z:
507 * @input: zero terminated input UTF-8 string.
508 * @output: pointer to newly allocated output string.
509 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
511 * Convert UTF-8 domain name to ASCII string. The domain name may
512 * contain several labels, separated by dots. The output buffer must
513 * be deallocated by the caller.
515 * Return value: Returns IDNA_SUCCESS on success, or error code.
518 idna_to_ascii_8z (const char *input, char **output, int flags)
520 uint32_t *ucs4;
521 size_t ucs4len;
522 int rc;
524 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
525 if (!ucs4)
526 return IDNA_ICONV_ERROR;
528 rc = idna_to_ascii_4z (ucs4, output, flags);
530 free (ucs4);
532 return rc;
537 * idna_to_ascii_lz:
538 * @input: zero terminated input UTF-8 string.
539 * @output: pointer to newly allocated output string.
540 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
542 * Convert domain name in the locale's encoding to ASCII string. The
543 * domain name may contain several labels, separated by dots. The
544 * output buffer must be deallocated by the caller.
546 * Return value: Returns IDNA_SUCCESS on success, or error code.
549 idna_to_ascii_lz (const char *input, char **output, int flags)
551 char *utf8;
552 int rc;
554 utf8 = stringprep_locale_to_utf8 (input);
555 if (!utf8)
556 return IDNA_ICONV_ERROR;
558 rc = idna_to_ascii_8z (utf8, output, flags);
560 free (utf8);
562 return rc;
566 * idna_to_unicode_4z4z:
567 * @input: zero-terminated Unicode string.
568 * @output: pointer to newly allocated output Unicode string.
569 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
571 * Convert possibly ACE encoded domain name in UCS-4 format into a
572 * UCS-4 string. The domain name may contain several labels,
573 * separated by dots. The output buffer must be deallocated by the
574 * caller.
576 * Return value: Returns IDNA_SUCCESS on success, or error code.
579 idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
581 const uint32_t *start = input;
582 const uint32_t *end = input;
583 uint32_t *buf;
584 size_t buflen;
585 uint32_t *out = NULL;
586 size_t outlen = 0;
587 int rc;
589 *output = NULL;
593 end = start;
595 for (; *end && !DOTP (*end); end++)
598 buflen = end - start;
599 buf = malloc (sizeof (buf[0]) * (buflen + 1));
600 if (!buf)
601 return IDNA_MALLOC_ERROR;
603 rc = idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
604 /* don't check rc as per specification! */
606 if (out)
608 out = realloc (out, sizeof (out[0]) * (outlen + 1 + buflen + 1));
609 if (!out)
610 return IDNA_MALLOC_ERROR;
611 out[outlen++] = 0x002E; /* '.' (full stop) */
612 memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
613 outlen += buflen;
614 out[outlen] = 0x0;
615 free (buf);
617 else
619 out = buf;
620 outlen = buflen;
621 out[outlen] = 0x0;
624 start = end + 1;
626 while (*end);
628 *output = out;
630 return IDNA_SUCCESS;
634 * idna_to_unicode_8z4z:
635 * @input: zero-terminated UTF-8 string.
636 * @output: pointer to newly allocated output Unicode string.
637 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
639 * Convert possibly ACE encoded domain name in UTF-8 format into a
640 * UCS-4 string. The domain name may contain several labels,
641 * separated by dots. The output buffer must be deallocated by the
642 * caller.
644 * Return value: Returns IDNA_SUCCESS on success, or error code.
647 idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
649 uint32_t *ucs4;
650 size_t ucs4len;
651 int rc;
653 ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
654 if (!ucs4)
655 return IDNA_ICONV_ERROR;
657 rc = idna_to_unicode_4z4z (ucs4, output, flags);
658 free (ucs4);
660 return rc;
664 * idna_to_unicode_8z8z:
665 * @input: zero-terminated UTF-8 string.
666 * @output: pointer to newly allocated output UTF-8 string.
667 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
669 * Convert possibly ACE encoded domain name in UTF-8 format into a
670 * UTF-8 string. The domain name may contain several labels,
671 * separated by dots. The output buffer must be deallocated by the
672 * caller.
674 * Return value: Returns IDNA_SUCCESS on success, or error code.
677 idna_to_unicode_8z8z (const char *input, char **output, int flags)
679 uint32_t *ucs4;
680 int rc;
682 rc = idna_to_unicode_8z4z (input, &ucs4, flags);
683 *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
684 free (ucs4);
686 if (!*output)
687 return IDNA_ICONV_ERROR;
689 return rc;
693 * idna_to_unicode_8zlz:
694 * @input: zero-terminated UTF-8 string.
695 * @output: pointer to newly allocated output string encoded in the
696 * current locale's character set.
697 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
699 * Convert possibly ACE encoded domain name in UTF-8 format into a
700 * string encoded in the current locale's character set. The domain
701 * name may contain several labels, separated by dots. The output
702 * buffer must be deallocated by the caller.
704 * Return value: Returns IDNA_SUCCESS on success, or error code.
707 idna_to_unicode_8zlz (const char *input, char **output, int flags)
709 char *utf8;
710 int rc;
712 rc = idna_to_unicode_8z8z (input, &utf8, flags);
713 *output = stringprep_utf8_to_locale (utf8);
714 free (utf8);
716 if (!*output)
717 return IDNA_ICONV_ERROR;
719 return rc;
723 * idna_to_unicode_lzlz:
724 * @input: zero-terminated string encoded in the current locale's
725 * character set.
726 * @output: pointer to newly allocated output string encoded in the
727 * current locale's character set.
728 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
730 * Convert possibly ACE encoded domain name in the locale's character
731 * set into a string encoded in the current locale's character set.
732 * The domain name may contain several labels, separated by dots. The
733 * output buffer must be deallocated by the caller.
735 * Return value: Returns IDNA_SUCCESS on success, or error code.
738 idna_to_unicode_lzlz (const char *input, char **output, int flags)
740 char *utf8;
741 int rc;
743 utf8 = stringprep_locale_to_utf8 (input);
744 if (!utf8)
745 return IDNA_ICONV_ERROR;
747 rc = idna_to_unicode_8zlz (utf8, output, flags);
748 free (utf8);
750 return rc;
754 * IDNA_ACE_PREFIX
756 * The IANA allocated prefix to use for IDNA. "xn--"
760 * Idna_rc:
761 * @IDNA_SUCCESS: Successful operation. This value is guaranteed to
762 * always be zero, the remaining ones are only guaranteed to hold
763 * non-zero values, for logical comparison purposes.
764 * @IDNA_STRINGPREP_ERROR: Error during string preparation.
765 * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
766 * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
767 * the string contains non-LDH ASCII characters.
768 * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
769 * the string contains a leading or trailing hyphen-minus (U+002D).
770 * @IDNA_INVALID_LENGTH: The final output string is not within the
771 * (inclusive) range 1 to 63 characters.
772 * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
773 * (for ToUnicode).
774 * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
775 * string does not equal the input.
776 * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
777 * ToASCII).
778 * @IDNA_ICONV_ERROR: Could not convert string in locale encoding.
779 * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
780 * fatal error).
782 * Enumerated return codes of idna_to_ascii_4i(),
783 * idna_to_unicode_44i() functions (and functions derived from those
784 * functions). The value 0 is guaranteed to always correspond to
785 * success.
790 * Idna_flags:
791 * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
792 * Unicode code points.
793 * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
794 * rules (i.e., normal host name rules).
796 * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.