(stringprep_profile): Fix warning.
[libidn.git] / lib / stringprep.c
blobffd28da2683b179a1925aeaed2c205c81e65c0c2
1 /* stringprep.c Core stringprep implementation.
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #if HAVE_CONFIG_H
23 # include "config.h"
24 #endif
26 #include <stdlib.h>
27 #include <string.h>
29 #include "stringprep.h"
31 static ssize_t
32 stringprep_find_character_in_table (uint32_t ucs4,
33 Stringprep_table_element * table)
35 ssize_t i;
37 for (i = 0; table[i].start; i++)
38 if (ucs4 >= table[i].start &&
39 ucs4 <= (table[i].end ? table[i].end : table[i].start))
40 return i;
42 return -1;
45 static ssize_t
46 stringprep_find_string_in_table (uint32_t * ucs4,
47 size_t ucs4len,
48 size_t * tablepos,
49 Stringprep_table_element * table)
51 size_t j;
52 ssize_t pos;
54 for (j = 0; j < ucs4len; j++)
55 if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
57 if (tablepos)
58 *tablepos = pos;
59 return j;
62 return -1;
65 static int
66 stringprep_apply_table_to_string (uint32_t * ucs4,
67 size_t * ucs4len,
68 size_t maxucs4len,
69 Stringprep_table_element * table)
71 ssize_t pos;
72 size_t i, maplen;
74 while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
75 &i, table)) != -1)
77 for (maplen = STRINGPREP_MAX_MAP_CHARS;
78 maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
81 if (*ucs4len - 1 + maplen >= maxucs4len)
82 return STRINGPREP_TOO_SMALL_BUFFER;
84 memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
85 sizeof (uint32_t) * (*ucs4len - pos - 1));
86 memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
87 *ucs4len = *ucs4len - 1 + maplen;
90 return STRINGPREP_OK;
93 #define INVERTED(x) ((x) & ((~0UL) >> 1))
94 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
95 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
96 ( INVERTED(profileflags) && (profileflags & flags)))
98 /**
99 * stringprep_4i:
100 * @ucs4: input/output array with string to prepare.
101 * @len: on input, length of input array with Unicode code points,
102 * on exit, length of output array with Unicode code points.
103 * @maxucs4len: maximum length of input/output array.
104 * @flags: stringprep profile flags, or 0.
105 * @profile: pointer to stringprep profile to use.
107 * Prepare the input UCS-4 string according to the stringprep profile,
108 * and write back the result to the input string.
110 * The input is not required to be zero terminated (@ucs4[@len] = 0).
111 * The output will not be zero terminated unless @ucs4[@len] = 0.
112 * Instead, see stringprep_4zi() if your input is zero terminated or
113 * if you want the output to be.
115 * Since the stringprep operation can expand the string, @maxucs4len
116 * indicate how large the buffer holding the string is. This function
117 * will not read or write to code points outside that size.
119 * The @flags are one of Stringprep_profile_flags, or 0.
121 * The @profile contain the instructions to perform. Your application
122 * can define new profiles, possibly re-using the generic stringprep
123 * tables that always will be part of the library, or use one of the
124 * currently supported profiles.
126 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
129 stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
130 Stringprep_profile_flags flags, Stringprep_profile * profile)
132 size_t i, j;
133 ssize_t k;
134 size_t ucs4len = *len;
135 int rc;
137 for (i = 0; profile[i].operation; i++)
139 switch (profile[i].operation)
141 case STRINGPREP_NFKC:
143 uint32_t *q = 0;
145 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
146 break;
148 if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
149 /* Profile requires NFKC, but callee asked for no NFKC. */
150 return STRINGPREP_FLAG_ERROR;
152 q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
153 if (!q)
154 return STRINGPREP_NFKC_FAILED;
156 for (ucs4len = 0; q[ucs4len]; ucs4len++)
159 if (ucs4len >= maxucs4len)
161 free (q);
162 return STRINGPREP_TOO_SMALL_BUFFER;
165 memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
167 free (q);
169 break;
171 case STRINGPREP_PROHIBIT_TABLE:
172 k = stringprep_find_string_in_table (ucs4, ucs4len,
173 NULL, profile[i].table);
174 if (k != -1)
175 return STRINGPREP_CONTAINS_PROHIBITED;
176 break;
178 case STRINGPREP_UNASSIGNED_TABLE:
179 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
180 break;
181 if (flags & STRINGPREP_NO_UNASSIGNED)
183 k = stringprep_find_string_in_table
184 (ucs4, ucs4len, NULL, profile[i].table);
185 if (k != -1)
186 return STRINGPREP_CONTAINS_UNASSIGNED;
188 break;
190 case STRINGPREP_MAP_TABLE:
191 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
192 break;
193 rc = stringprep_apply_table_to_string
194 (ucs4, &ucs4len, maxucs4len, profile[i].table);
195 if (rc != STRINGPREP_OK)
196 return rc;
197 break;
199 case STRINGPREP_BIDI_PROHIBIT_TABLE:
200 case STRINGPREP_BIDI_RAL_TABLE:
201 case STRINGPREP_BIDI_L_TABLE:
202 break;
204 case STRINGPREP_BIDI:
206 int done_prohibited = 0;
207 int done_ral = 0;
208 int done_l = 0;
209 int contains_ral = -1;
210 int contains_l = -1;
212 for (j = 0; profile[j].operation; j++)
213 if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
215 done_prohibited = 1;
216 k = stringprep_find_string_in_table (ucs4, ucs4len,
217 NULL,
218 profile[j].table);
219 if (k != -1)
220 return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
222 else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
224 done_ral = 1;
225 if (stringprep_find_string_in_table
226 (ucs4, ucs4len, NULL, profile[j].table) != -1)
227 contains_ral = j;
229 else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
231 done_l = 1;
232 if (stringprep_find_string_in_table
233 (ucs4, ucs4len, NULL, profile[j].table) != -1)
234 contains_l = j;
237 if (!done_prohibited || !done_ral || !done_l)
238 return STRINGPREP_PROFILE_ERROR;
240 if (contains_ral != -1 && contains_l != -1)
241 return STRINGPREP_BIDI_BOTH_L_AND_RAL;
243 if (contains_ral != -1)
245 if (!(stringprep_find_character_in_table
246 (ucs4[0], profile[contains_ral].table) != -1 &&
247 stringprep_find_character_in_table
248 (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
249 return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
252 break;
254 default:
255 return STRINGPREP_PROFILE_ERROR;
256 break;
260 *len = ucs4len;
262 return STRINGPREP_OK;
265 static int
266 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
267 Stringprep_profile_flags flags,
268 Stringprep_profile * profile)
270 int rc;
272 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
273 if (rc != STRINGPREP_OK)
274 return rc;
276 if (ucs4len >= maxucs4len)
277 return STRINGPREP_TOO_SMALL_BUFFER;
279 ucs4[ucs4len] = 0;
281 return STRINGPREP_OK;
285 * stringprep_4zi:
286 * @ucs4: input/output array with zero terminated string to prepare.
287 * @maxucs4len: maximum length of input/output array.
288 * @flags: stringprep profile flags, or 0.
289 * @profile: pointer to stringprep profile to use.
291 * Prepare the input zero terminated UCS-4 string according to the
292 * stringprep profile, and write back the result to the input string.
294 * Since the stringprep operation can expand the string, @maxucs4len
295 * indicate how large the buffer holding the string is. This function
296 * will not read or write to code points outside that size.
298 * The @flags are one of Stringprep_profile_flags, or 0.
300 * The @profile contain the instructions to perform. Your application
301 * can define new profiles, possibly re-using the generic stringprep
302 * tables that always will be part of the library, or use one of the
303 * currently supported profiles.
305 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
308 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
309 Stringprep_profile_flags flags, Stringprep_profile * profile)
311 size_t ucs4len;
313 for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
316 return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
320 * stringprep:
321 * @in: input/ouput array with string to prepare.
322 * @maxlen: maximum length of input/output array.
323 * @flags: stringprep profile flags, or 0.
324 * @profile: pointer to stringprep profile to use.
326 * Prepare the input zero terminated UTF-8 string according to the
327 * stringprep profile, and write back the result to the input string.
329 * Note that you must convert strings entered in the systems locale
330 * into UTF-8 before using this function, see
331 * stringprep_locale_to_utf8().
333 * Since the stringprep operation can expand the string, @maxlen
334 * indicate how large the buffer holding the string is. This function
335 * will not read or write to characters outside that size.
337 * The @flags are one of Stringprep_profile_flags, or 0.
339 * The @profile contain the instructions to perform. Your application
340 * can define new profiles, possibly re-using the generic stringprep
341 * tables that always will be part of the library, or use one of the
342 * currently supported profiles.
344 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
347 stringprep (char *in,
348 size_t maxlen,
349 Stringprep_profile_flags flags, Stringprep_profile * profile)
351 int rc;
352 char *utf8 = NULL;
353 uint32_t *ucs4 = NULL;
354 size_t ucs4len, maxucs4len, adducs4len = 50;
358 if (ucs4)
359 free (ucs4);
360 ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
361 maxucs4len = ucs4len + adducs4len;
362 ucs4 = realloc (ucs4, maxucs4len * sizeof (uint32_t));
363 if (!ucs4)
364 return STRINGPREP_MALLOC_ERROR;
366 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
367 adducs4len += 50;
369 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
370 if (rc != STRINGPREP_OK)
372 free (ucs4);
373 return rc;
376 utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
377 if (!utf8)
379 free (ucs4);
380 return STRINGPREP_MALLOC_ERROR;
383 if (strlen (utf8) >= maxlen)
385 free (utf8);
386 free (ucs4);
387 return STRINGPREP_TOO_SMALL_BUFFER;
390 strcpy (in, utf8); /* flawfinder: ignore */
392 return STRINGPREP_OK;
396 * stringprep_profile:
397 * @in: input array with UTF-8 string to prepare.
398 * @out: output variable with pointer to newly allocate string.
399 * @profile: name of stringprep profile to use.
400 * @flags: stringprep profile flags, or 0.
402 * Prepare the input zero terminated UTF-8 string according to the
403 * stringprep profile, and return the result in a newly allocated
404 * variable.
406 * Note that you must convert strings entered in the systems locale
407 * into UTF-8 before using this function, see
408 * stringprep_locale_to_utf8().
410 * The output @out variable must be deallocated by the caller.
412 * The @flags are one of Stringprep_profile_flags, or 0.
414 * The @profile specifies the name of the stringprep profile to use.
415 * It must be one of the internally supported stringprep profiles.
417 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
420 stringprep_profile (const char *in,
421 char **out, char *profile, Stringprep_profile_flags flags)
423 const Stringprep_profiles *p;
424 char *str = NULL;
425 size_t len = strlen (in) + 1;
426 int rc;
428 for (p = &stringprep_profiles[0]; p->name; p++)
429 if (strcmp (p->name, profile) == 0)
430 break;
432 if (!p || !p->name || !p->tables)
433 return STRINGPREP_UNKNOWN_PROFILE;
437 if (str)
438 free (str);
439 str = (char *) malloc (len);
440 if (str == NULL)
441 return STRINGPREP_MALLOC_ERROR;
443 strcpy (str, in);
445 rc = stringprep (str, len, flags, p->tables);
446 len += 50;
448 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
450 if (rc == STRINGPREP_OK)
451 *out = str;
452 else
453 free (str);
455 return rc;
458 /*! \mainpage GNU Internationalized Domain Name Library
460 * \section intro Introduction
462 * GNU Libidn is an implementation of the Stringprep, Punycode and IDNA
463 * specifications defined by the IETF Internationalized Domain Names
464 * (IDN) working group, used for internationalized domain names. The
465 * package is available under the GNU Lesser General Public License.
467 * The library contains a generic Stringprep implementation that does
468 * Unicode 3.2 NFKC normalization, mapping and prohibitation of
469 * characters, and bidirectional character handling. Profiles for iSCSI,
470 * Kerberos 5, Nameprep, SASL and XMPP are included. Punycode and ASCII
471 * Compatible Encoding (ACE) via IDNA are supported.
473 * The Stringprep API consists of two main functions, one for converting
474 * data from the system's native representation into UTF-8, and one
475 * function to perform the Stringprep processing. Adding a new
476 * Stringprep profile for your application within the API is
477 * straightforward. The Punycode API consists of one encoding function
478 * and one decoding function. The IDNA API consists of the ToASCII and
479 * ToUnicode functions, as well as an high-level interface for converting
480 * entire domain names to and from the ACE encoded form.
482 * The library is used by, e.g., GNU SASL and Shishi to process user
483 * names and passwords. Libidn can be built into GNU Libc to enable a
484 * new system-wide getaddrinfo() flag for IDN processing.
486 * Libidn is developed for the GNU/Linux system, but runs on over 20 Unix
487 * platforms (including Solaris, IRIX, AIX, and Tru64) and Windows.
488 * Libidn is written in C and (parts of) the API is accessible from C,
489 * C++, Emacs Lisp, Python and Java.
491 * The project web page:\n
492 * http://www.gnu.org/software/libidn/
494 * The software archive:\n
495 * ftp://alpha.gnu.org/pub/gnu/libidn/
497 * For more information see:\n
498 * http://www.ietf.org/html.charters/idn-charter.html\n
499 * http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)\n
500 * http://www.ietf.org/rfc/rfc3490.txt (idna specification)\n
501 * http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)\n
502 * http://www.ietf.org/rfc/rfc3492.txt (punycode specification)\n
503 * http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt\n
504 * http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt\n
505 * http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt\n
506 * http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt\n
507 * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt\n
508 * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt\n
510 * Further information and paid contract development:\n
511 * Simon Josefsson <simon@josefsson.org>
513 * \section examples Examples
515 * \include example.c
516 * \include example3.c
517 * \include example4.c
521 * STRINGPREP_VERSION
523 * String defined via CPP denoting the header file version number.
524 * Used together with stringprep_check_version() to verify header file
525 * and run-time library consistency.
529 * STRINGPREP_MAX_MAP_CHARS
531 * Maximum number of code points that can replace a single code point,
532 * during stringprep mapping.
536 * Stringprep_rc:
537 * @STRINGPREP_OK: Successful operation. This value is guaranteed to
538 * always be zero, the remaining ones are only guaranteed to hold
539 * non-zero values, for logical comparison purposes.
540 * @STRINGPREP_CONTAINS_UNASSIGNED: String contain unassigned Unicode
541 * code points, which is forbidden by the profile.
542 * @STRINGPREP_CONTAINS_PROHIBITED: String contain code points
543 * prohibited by the profile.
544 * @STRINGPREP_BIDI_BOTH_L_AND_RAL: String contain code points with
545 * conflicting bidirection category.
546 * @STRINGPREP_BIDI_LEADTRAIL_NOT_RAL: Leading and trailing character
547 * in string not of proper bidirectional category.
548 * @STRINGPREP_BIDI_CONTAINS_PROHIBITED: Contains prohibited code
549 * points detected by bidirectional code.
550 * @STRINGPREP_TOO_SMALL_BUFFER: Buffer handed to function was too
551 * small. This usually indicate a problem in the calling
552 * application.
553 * @STRINGPREP_PROFILE_ERROR: The stringprep profile was inconsistent.
554 * This usually indicate an internal error in the library.
555 * @STRINGPREP_FLAG_ERROR: The supplied flag conflicted with profile.
556 * This usually indicate a problem in the calling application.
557 * @STRINGPREP_UNKNOWN_PROFILE: The supplied profile name was not
558 * known to the library.
559 * @STRINGPREP_NFKC_FAILED: The Unicode NFKC operation failed. This
560 * usually indicate an internal error in the library.
561 * @STRINGPREP_MALLOC_ERROR: The malloc() was out of memory. This is
562 * usually a fatal error.
564 * Enumerated return codes of stringprep(), stringprep_profile()
565 * functions (and macros using those functions). The value 0 is
566 * guaranteed to always correspond to success.
570 * Stringprep_profile_flags:
571 * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as
572 * selecting the non-NFKC case folding tables. Usually the profile
573 * specifies BIDI and NFKC settings, and applications should not
574 * override it unless in special situations.
575 * @STRINGPREP_NO_BIDI: Disable the BIDI step. Usually the profile
576 * specifies BIDI and NFKC settings, and applications should not
577 * override it unless in special situations.
578 * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if
579 * string contains unassigned characters according to profile.
581 * Stringprep profile flags.
585 * Stringprep_profile_steps:
587 * Various steps in the stringprep algorithm. You really want to
588 * study the source code to understand this one. Only useful if you
589 * want to add another profile.
593 * stringprep_nameprep:
594 * @in: input/ouput array with string to prepare.
595 * @maxlen: maximum length of input/output array.
597 * Prepare the input UTF-8 string according to the nameprep profile.
598 * The AllowUnassigned flag is true, use
599 * stringprep_nameprep_no_unassigned() if you want a false
600 * AllowUnassigned. Returns 0 iff successful, or an error code.
604 * stringprep_nameprep_no_unassigned:
605 * @in: input/ouput array with string to prepare.
606 * @maxlen: maximum length of input/output array.
608 * Prepare the input UTF-8 string according to the nameprep profile.
609 * The AllowUnassigned flag is false, use stringprep_nameprep() for
610 * true AllowUnassigned. Returns 0 iff successful, or an error code.
614 * stringprep_iscsi:
615 * @in: input/ouput array with string to prepare.
616 * @maxlen: maximum length of input/output array.
618 * Prepare the input UTF-8 string according to the draft iSCSI
619 * stringprep profile. Returns 0 iff successful, or an error code.
623 * stringprep_kerberos5:
624 * @in: input/ouput array with string to prepare.
625 * @maxlen: maximum length of input/output array.
627 * Prepare the input UTF-8 string according to the draft Kerberos5
628 * stringprep profile. Returns 0 iff successful, or an error code.
632 * stringprep_plain:
633 * @in: input/ouput array with string to prepare.
634 * @maxlen: maximum length of input/output array.
636 * Prepare the input UTF-8 string according to the draft SASL
637 * ANONYMOUS profile. Returns 0 iff successful, or an error code.
641 * stringprep_xmpp_nodeprep:
642 * @in: input/ouput array with string to prepare.
643 * @maxlen: maximum length of input/output array.
645 * Prepare the input UTF-8 string according to the draft XMPP node
646 * identifier profile. Returns 0 iff successful, or an error code.
650 * stringprep_xmpp_resourceprep:
651 * @in: input/ouput array with string to prepare.
652 * @maxlen: maximum length of input/output array.
654 * Prepare the input UTF-8 string according to the draft XMPP resource
655 * identifier profile. Returns 0 iff successful, or an error code.