1 /* stringprep.c Core stringprep implementation.
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include "stringprep.h"
32 stringprep_find_character_in_table (uint32_t ucs4
,
33 Stringprep_table_element
* table
)
37 for (i
= 0; table
[i
].start
; i
++)
38 if (ucs4
>= table
[i
].start
&&
39 ucs4
<= (table
[i
].end
? table
[i
].end
: table
[i
].start
))
46 stringprep_find_string_in_table (uint32_t * ucs4
,
49 Stringprep_table_element
* table
)
54 for (j
= 0; j
< ucs4len
; j
++)
55 if ((pos
= stringprep_find_character_in_table (ucs4
[j
], table
)) != -1)
66 stringprep_apply_table_to_string (uint32_t * ucs4
,
69 Stringprep_table_element
* table
)
74 while ((pos
= stringprep_find_string_in_table (ucs4
, *ucs4len
,
77 for (maplen
= STRINGPREP_MAX_MAP_CHARS
;
78 maplen
> 0 && table
[i
].map
[maplen
- 1] == 0; maplen
--)
81 if (*ucs4len
- 1 + maplen
>= maxucs4len
)
82 return STRINGPREP_TOO_SMALL_BUFFER
;
84 memmove (&ucs4
[pos
+ maplen
], &ucs4
[pos
+ 1],
85 sizeof (uint32_t) * (*ucs4len
- pos
- 1));
86 memcpy (&ucs4
[pos
], table
[i
].map
, sizeof (uint32_t) * maplen
);
87 *ucs4len
= *ucs4len
- 1 + maplen
;
93 #define INVERTED(x) ((x) & ((~0UL) >> 1))
94 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
95 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
96 ( INVERTED(profileflags) && (profileflags & flags)))
100 * @ucs4: input/output array with string to prepare.
101 * @len: on input, length of input array with Unicode code points,
102 * on exit, length of output array with Unicode code points.
103 * @maxucs4len: maximum length of input/output array.
104 * @flags: stringprep profile flags, or 0.
105 * @profile: pointer to stringprep profile to use.
107 * Prepare the input UCS-4 string according to the stringprep profile,
108 * and write back the result to the input string.
110 * The input is not required to be zero terminated (@ucs4[@len] = 0).
111 * The output will not be zero terminated unless @ucs4[@len] = 0.
112 * Instead, see stringprep_4zi() if your input is zero terminated or
113 * if you want the output to be.
115 * Since the stringprep operation can expand the string, @maxucs4len
116 * indicate how large the buffer holding the string is. This function
117 * will not read or write to code points outside that size.
119 * The @flags are one of Stringprep_profile_flags, or 0.
121 * The @profile contain the instructions to perform. Your application
122 * can define new profiles, possibly re-using the generic stringprep
123 * tables that always will be part of the library, or use one of the
124 * currently supported profiles.
126 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
129 stringprep_4i (uint32_t * ucs4
, size_t * len
, size_t maxucs4len
,
130 Stringprep_profile_flags flags
, Stringprep_profile
* profile
)
134 size_t ucs4len
= *len
;
137 for (i
= 0; profile
[i
].operation
; i
++)
139 switch (profile
[i
].operation
)
141 case STRINGPREP_NFKC
:
145 if (UNAPPLICAPLEFLAGS (flags
, profile
[i
].flags
))
148 if (flags
& STRINGPREP_NO_NFKC
&& !profile
[i
].flags
)
149 /* Profile requires NFKC, but callee asked for no NFKC. */
150 return STRINGPREP_FLAG_ERROR
;
152 q
= stringprep_ucs4_nfkc_normalize (ucs4
, ucs4len
);
154 return STRINGPREP_NFKC_FAILED
;
156 for (ucs4len
= 0; q
[ucs4len
]; ucs4len
++)
159 if (ucs4len
>= maxucs4len
)
162 return STRINGPREP_TOO_SMALL_BUFFER
;
165 memcpy (ucs4
, q
, ucs4len
* sizeof (ucs4
[0]));
171 case STRINGPREP_PROHIBIT_TABLE
:
172 k
= stringprep_find_string_in_table (ucs4
, ucs4len
,
173 NULL
, profile
[i
].table
);
175 return STRINGPREP_CONTAINS_PROHIBITED
;
178 case STRINGPREP_UNASSIGNED_TABLE
:
179 if (UNAPPLICAPLEFLAGS (flags
, profile
[i
].flags
))
181 if (flags
& STRINGPREP_NO_UNASSIGNED
)
183 k
= stringprep_find_string_in_table
184 (ucs4
, ucs4len
, NULL
, profile
[i
].table
);
186 return STRINGPREP_CONTAINS_UNASSIGNED
;
190 case STRINGPREP_MAP_TABLE
:
191 if (UNAPPLICAPLEFLAGS (flags
, profile
[i
].flags
))
193 rc
= stringprep_apply_table_to_string
194 (ucs4
, &ucs4len
, maxucs4len
, profile
[i
].table
);
195 if (rc
!= STRINGPREP_OK
)
199 case STRINGPREP_BIDI_PROHIBIT_TABLE
:
200 case STRINGPREP_BIDI_RAL_TABLE
:
201 case STRINGPREP_BIDI_L_TABLE
:
204 case STRINGPREP_BIDI
:
206 int done_prohibited
= 0;
209 int contains_ral
= -1;
212 for (j
= 0; profile
[j
].operation
; j
++)
213 if (profile
[j
].operation
== STRINGPREP_BIDI_PROHIBIT_TABLE
)
216 k
= stringprep_find_string_in_table (ucs4
, ucs4len
,
220 return STRINGPREP_BIDI_CONTAINS_PROHIBITED
;
222 else if (profile
[j
].operation
== STRINGPREP_BIDI_RAL_TABLE
)
225 if (stringprep_find_string_in_table
226 (ucs4
, ucs4len
, NULL
, profile
[j
].table
) != -1)
229 else if (profile
[j
].operation
== STRINGPREP_BIDI_L_TABLE
)
232 if (stringprep_find_string_in_table
233 (ucs4
, ucs4len
, NULL
, profile
[j
].table
) != -1)
237 if (!done_prohibited
|| !done_ral
|| !done_l
)
238 return STRINGPREP_PROFILE_ERROR
;
240 if (contains_ral
!= -1 && contains_l
!= -1)
241 return STRINGPREP_BIDI_BOTH_L_AND_RAL
;
243 if (contains_ral
!= -1)
245 if (!(stringprep_find_character_in_table
246 (ucs4
[0], profile
[contains_ral
].table
) != -1 &&
247 stringprep_find_character_in_table
248 (ucs4
[ucs4len
- 1], profile
[contains_ral
].table
) != -1))
249 return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL
;
255 return STRINGPREP_PROFILE_ERROR
;
262 return STRINGPREP_OK
;
266 stringprep_4zi_1 (uint32_t * ucs4
, size_t ucs4len
, size_t maxucs4len
,
267 Stringprep_profile_flags flags
,
268 Stringprep_profile
* profile
)
272 rc
= stringprep_4i (ucs4
, &ucs4len
, maxucs4len
, flags
, profile
);
273 if (rc
!= STRINGPREP_OK
)
276 if (ucs4len
>= maxucs4len
)
277 return STRINGPREP_TOO_SMALL_BUFFER
;
281 return STRINGPREP_OK
;
286 * @ucs4: input/output array with zero terminated string to prepare.
287 * @maxucs4len: maximum length of input/output array.
288 * @flags: stringprep profile flags, or 0.
289 * @profile: pointer to stringprep profile to use.
291 * Prepare the input zero terminated UCS-4 string according to the
292 * stringprep profile, and write back the result to the input string.
294 * Since the stringprep operation can expand the string, @maxucs4len
295 * indicate how large the buffer holding the string is. This function
296 * will not read or write to code points outside that size.
298 * The @flags are one of Stringprep_profile_flags, or 0.
300 * The @profile contain the instructions to perform. Your application
301 * can define new profiles, possibly re-using the generic stringprep
302 * tables that always will be part of the library, or use one of the
303 * currently supported profiles.
305 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
308 stringprep_4zi (uint32_t * ucs4
, size_t maxucs4len
,
309 Stringprep_profile_flags flags
, Stringprep_profile
* profile
)
313 for (ucs4len
= 0; ucs4len
< maxucs4len
&& ucs4
[ucs4len
] != 0; ucs4len
++)
316 return stringprep_4zi_1 (ucs4
, ucs4len
, maxucs4len
, flags
, profile
);
321 * @in: input/ouput array with string to prepare.
322 * @maxlen: maximum length of input/output array.
323 * @flags: stringprep profile flags, or 0.
324 * @profile: pointer to stringprep profile to use.
326 * Prepare the input zero terminated UTF-8 string according to the
327 * stringprep profile, and write back the result to the input string.
329 * Note that you must convert strings entered in the systems locale
330 * into UTF-8 before using this function, see
331 * stringprep_locale_to_utf8().
333 * Since the stringprep operation can expand the string, @maxlen
334 * indicate how large the buffer holding the string is. This function
335 * will not read or write to characters outside that size.
337 * The @flags are one of Stringprep_profile_flags, or 0.
339 * The @profile contain the instructions to perform. Your application
340 * can define new profiles, possibly re-using the generic stringprep
341 * tables that always will be part of the library, or use one of the
342 * currently supported profiles.
344 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
347 stringprep (char *in
,
349 Stringprep_profile_flags flags
, Stringprep_profile
* profile
)
353 uint32_t *ucs4
= NULL
;
354 size_t ucs4len
, maxucs4len
, adducs4len
= 50;
360 ucs4
= stringprep_utf8_to_ucs4 (in
, -1, &ucs4len
);
361 maxucs4len
= ucs4len
+ adducs4len
;
362 ucs4
= realloc (ucs4
, maxucs4len
* sizeof (uint32_t));
364 return STRINGPREP_MALLOC_ERROR
;
366 rc
= stringprep_4i (ucs4
, &ucs4len
, maxucs4len
, flags
, profile
);
369 while (rc
== STRINGPREP_TOO_SMALL_BUFFER
);
370 if (rc
!= STRINGPREP_OK
)
376 utf8
= stringprep_ucs4_to_utf8 (ucs4
, ucs4len
, 0, 0);
380 return STRINGPREP_MALLOC_ERROR
;
383 if (strlen (utf8
) >= maxlen
)
387 return STRINGPREP_TOO_SMALL_BUFFER
;
390 strcpy (in
, utf8
); /* flawfinder: ignore */
392 return STRINGPREP_OK
;
396 * stringprep_profile:
397 * @in: input array with UTF-8 string to prepare.
398 * @out: output variable with pointer to newly allocate string.
399 * @profile: name of stringprep profile to use.
400 * @flags: stringprep profile flags, or 0.
402 * Prepare the input zero terminated UTF-8 string according to the
403 * stringprep profile, and return the result in a newly allocated
406 * Note that you must convert strings entered in the systems locale
407 * into UTF-8 before using this function, see
408 * stringprep_locale_to_utf8().
410 * The output @out variable must be deallocated by the caller.
412 * The @flags are one of Stringprep_profile_flags, or 0.
414 * The @profile specifies the name of the stringprep profile to use.
415 * It must be one of the internally supported stringprep profiles.
417 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
420 stringprep_profile (const char *in
,
421 char **out
, char *profile
, Stringprep_profile_flags flags
)
423 const Stringprep_profiles
*p
;
425 size_t len
= strlen (in
) + 1;
428 for (p
= &stringprep_profiles
[0]; p
->name
; p
++)
429 if (strcmp (p
->name
, profile
) == 0)
432 if (!p
|| !p
->name
|| !p
->tables
)
433 return STRINGPREP_UNKNOWN_PROFILE
;
439 str
= (char *) malloc (len
);
441 return STRINGPREP_MALLOC_ERROR
;
445 rc
= stringprep (str
, len
, flags
, p
->tables
);
448 while (rc
== STRINGPREP_TOO_SMALL_BUFFER
);
450 if (rc
== STRINGPREP_OK
)
458 /*! \mainpage GNU Internationalized Domain Name Library
460 * \section intro Introduction
462 * GNU Libidn is an implementation of the Stringprep, Punycode and IDNA
463 * specifications defined by the IETF Internationalized Domain Names
464 * (IDN) working group, used for internationalized domain names. The
465 * package is available under the GNU Lesser General Public License.
467 * The library contains a generic Stringprep implementation that does
468 * Unicode 3.2 NFKC normalization, mapping and prohibitation of
469 * characters, and bidirectional character handling. Profiles for iSCSI,
470 * Kerberos 5, Nameprep, SASL and XMPP are included. Punycode and ASCII
471 * Compatible Encoding (ACE) via IDNA are supported.
473 * The Stringprep API consists of two main functions, one for converting
474 * data from the system's native representation into UTF-8, and one
475 * function to perform the Stringprep processing. Adding a new
476 * Stringprep profile for your application within the API is
477 * straightforward. The Punycode API consists of one encoding function
478 * and one decoding function. The IDNA API consists of the ToASCII and
479 * ToUnicode functions, as well as an high-level interface for converting
480 * entire domain names to and from the ACE encoded form.
482 * The library is used by, e.g., GNU SASL and Shishi to process user
483 * names and passwords. Libidn can be built into GNU Libc to enable a
484 * new system-wide getaddrinfo() flag for IDN processing.
486 * Libidn is developed for the GNU/Linux system, but runs on over 20 Unix
487 * platforms (including Solaris, IRIX, AIX, and Tru64) and Windows.
488 * Libidn is written in C and (parts of) the API is accessible from C,
489 * C++, Emacs Lisp, Python and Java.
491 * The project web page:\n
492 * http://www.gnu.org/software/libidn/
494 * The software archive:\n
495 * ftp://alpha.gnu.org/pub/gnu/libidn/
497 * For more information see:\n
498 * http://www.ietf.org/html.charters/idn-charter.html\n
499 * http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)\n
500 * http://www.ietf.org/rfc/rfc3490.txt (idna specification)\n
501 * http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)\n
502 * http://www.ietf.org/rfc/rfc3492.txt (punycode specification)\n
503 * http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt\n
504 * http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt\n
505 * http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt\n
506 * http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt\n
507 * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt\n
508 * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt\n
510 * Further information and paid contract development:\n
511 * Simon Josefsson <simon@josefsson.org>
513 * \section examples Examples
516 * \include example3.c
517 * \include example4.c
523 * String defined via CPP denoting the header file version number.
524 * Used together with stringprep_check_version() to verify header file
525 * and run-time library consistency.
529 * STRINGPREP_MAX_MAP_CHARS
531 * Maximum number of code points that can replace a single code point,
532 * during stringprep mapping.
537 * @STRINGPREP_OK: Successful operation. This value is guaranteed to
538 * always be zero, the remaining ones are only guaranteed to hold
539 * non-zero values, for logical comparison purposes.
540 * @STRINGPREP_CONTAINS_UNASSIGNED: String contain unassigned Unicode
541 * code points, which is forbidden by the profile.
542 * @STRINGPREP_CONTAINS_PROHIBITED: String contain code points
543 * prohibited by the profile.
544 * @STRINGPREP_BIDI_BOTH_L_AND_RAL: String contain code points with
545 * conflicting bidirection category.
546 * @STRINGPREP_BIDI_LEADTRAIL_NOT_RAL: Leading and trailing character
547 * in string not of proper bidirectional category.
548 * @STRINGPREP_BIDI_CONTAINS_PROHIBITED: Contains prohibited code
549 * points detected by bidirectional code.
550 * @STRINGPREP_TOO_SMALL_BUFFER: Buffer handed to function was too
551 * small. This usually indicate a problem in the calling
553 * @STRINGPREP_PROFILE_ERROR: The stringprep profile was inconsistent.
554 * This usually indicate an internal error in the library.
555 * @STRINGPREP_FLAG_ERROR: The supplied flag conflicted with profile.
556 * This usually indicate a problem in the calling application.
557 * @STRINGPREP_UNKNOWN_PROFILE: The supplied profile name was not
558 * known to the library.
559 * @STRINGPREP_NFKC_FAILED: The Unicode NFKC operation failed. This
560 * usually indicate an internal error in the library.
561 * @STRINGPREP_MALLOC_ERROR: The malloc() was out of memory. This is
562 * usually a fatal error.
564 * Enumerated return codes of stringprep(), stringprep_profile()
565 * functions (and macros using those functions). The value 0 is
566 * guaranteed to always correspond to success.
570 * Stringprep_profile_flags:
571 * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as
572 * selecting the non-NFKC case folding tables. Usually the profile
573 * specifies BIDI and NFKC settings, and applications should not
574 * override it unless in special situations.
575 * @STRINGPREP_NO_BIDI: Disable the BIDI step. Usually the profile
576 * specifies BIDI and NFKC settings, and applications should not
577 * override it unless in special situations.
578 * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if
579 * string contains unassigned characters according to profile.
581 * Stringprep profile flags.
585 * Stringprep_profile_steps:
587 * Various steps in the stringprep algorithm. You really want to
588 * study the source code to understand this one. Only useful if you
589 * want to add another profile.
593 * stringprep_nameprep:
594 * @in: input/ouput array with string to prepare.
595 * @maxlen: maximum length of input/output array.
597 * Prepare the input UTF-8 string according to the nameprep profile.
598 * The AllowUnassigned flag is true, use
599 * stringprep_nameprep_no_unassigned() if you want a false
600 * AllowUnassigned. Returns 0 iff successful, or an error code.
604 * stringprep_nameprep_no_unassigned:
605 * @in: input/ouput array with string to prepare.
606 * @maxlen: maximum length of input/output array.
608 * Prepare the input UTF-8 string according to the nameprep profile.
609 * The AllowUnassigned flag is false, use stringprep_nameprep() for
610 * true AllowUnassigned. Returns 0 iff successful, or an error code.
615 * @in: input/ouput array with string to prepare.
616 * @maxlen: maximum length of input/output array.
618 * Prepare the input UTF-8 string according to the draft iSCSI
619 * stringprep profile. Returns 0 iff successful, or an error code.
623 * stringprep_kerberos5:
624 * @in: input/ouput array with string to prepare.
625 * @maxlen: maximum length of input/output array.
627 * Prepare the input UTF-8 string according to the draft Kerberos5
628 * stringprep profile. Returns 0 iff successful, or an error code.
633 * @in: input/ouput array with string to prepare.
634 * @maxlen: maximum length of input/output array.
636 * Prepare the input UTF-8 string according to the draft SASL
637 * ANONYMOUS profile. Returns 0 iff successful, or an error code.
641 * stringprep_xmpp_nodeprep:
642 * @in: input/ouput array with string to prepare.
643 * @maxlen: maximum length of input/output array.
645 * Prepare the input UTF-8 string according to the draft XMPP node
646 * identifier profile. Returns 0 iff successful, or an error code.
650 * stringprep_xmpp_resourceprep:
651 * @in: input/ouput array with string to prepare.
652 * @maxlen: maximum length of input/output array.
654 * Prepare the input UTF-8 string according to the draft XMPP resource
655 * identifier profile. Returns 0 iff successful, or an error code.