Add.
[libidn.git] / lib / stringprep.c
blob0bbff56ba5f19400269cd2e559ecc9636faa25a8
1 /* stringprep.c Core stringprep implementation.
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "internal.h"
24 static ssize_t
25 stringprep_find_character_in_table (uint32_t ucs4,
26 Stringprep_table_element * table)
28 ssize_t i;
30 for (i = 0; table[i].start; i++)
31 if (ucs4 >= table[i].start &&
32 ucs4 <= (table[i].end ? table[i].end : table[i].start))
33 return i;
35 return -1;
38 static ssize_t
39 stringprep_find_string_in_table (uint32_t * ucs4,
40 size_t ucs4len,
41 size_t * tablepos,
42 Stringprep_table_element * table)
44 size_t j;
45 ssize_t pos;
47 for (j = 0; j < ucs4len; j++)
48 if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
50 if (tablepos)
51 *tablepos = pos;
52 return j;
55 return -1;
58 static int
59 stringprep_apply_table_to_string (uint32_t * ucs4,
60 size_t * ucs4len,
61 size_t maxucs4len,
62 Stringprep_table_element * table,
63 const char *tablename)
65 ssize_t pos;
66 size_t i, maplen;
68 while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
69 &i, table)) != -1)
71 for (maplen = STRINGPREP_MAX_MAP_CHARS;
72 maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
75 if (*ucs4len - 1 + maplen >= maxucs4len)
76 return STRINGPREP_TOO_SMALL_BUFFER;
78 memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
79 *ucs4len * sizeof (uint32_t) - (&ucs4[pos + 1] - ucs4));
80 memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
81 *ucs4len = *ucs4len - 1 + maplen;
84 return STRINGPREP_OK;
87 #define INVERTED(x) ((x) & ((~0UL) >> 1))
88 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
89 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
90 ( INVERTED(profileflags) && (profileflags & flags)))
92 /**
93 * stringprep:
94 * @in: input/ouput array with string to prepare.
95 * @maxlen: maximum length of input/output array.
96 * @flags: optional stringprep profile flags.
97 * @profile: pointer to stringprep profile to use.
99 * Prepare the input UTF-8 string according to the stringprep profile.
100 * Normally application programmers use stringprep profile macros such
101 * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
102 * calling this function directly.
104 * Since the stringprep operation can expand the string, @maxlen
105 * indicate how large the buffer holding the string is. The @flags
106 * are one of Stringprep_profile_flags, or 0. The profile indicates
107 * processing details specific to that profile. Your application can
108 * define new profiles, possibly re-using the generic stringprep
109 * tables that always will be part of the library.
111 * Note that you must convert strings entered in the systems locale
112 * into UTF-8 before using this function.
114 * Return value: Returns 0 iff successful, or an error code.
117 stringprep (char *in,
118 size_t maxlen,
119 Stringprep_profile_flags flags, Stringprep_profile * profile)
121 size_t i, j;
122 ssize_t k;
123 int rc;
124 char *p = 0;
125 uint32_t *q = 0;
126 uint32_t *ucs4;
127 size_t ucs4len, maxucs4len;
129 ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
130 maxucs4len = 4 * ucs4len + 10; /* XXX */
131 ucs4 = realloc (ucs4, 1 + maxucs4len * sizeof (uint32_t));
132 if (!ucs4)
134 rc = STRINGPREP_MALLOC_ERROR;
135 goto done;
138 for (i = 0; profile[i].operation; i++)
140 switch (profile[i].operation)
142 case STRINGPREP_NFKC:
143 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
145 break;
148 if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
150 /* Profile requires NFKC, but callee asked for no NFKC. */
151 rc = STRINGPREP_FLAG_ERROR;
152 goto done;
155 q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
157 if (!q)
159 rc = STRINGPREP_NFKC_FAILED;
160 goto done;
163 for (j = 0; q[j]; j++)
166 free (ucs4);
167 ucs4 = q;
168 ucs4len = j;
169 q = 0;
170 break;
172 case STRINGPREP_PROHIBIT_TABLE:
173 k = stringprep_find_string_in_table (ucs4, ucs4len,
174 NULL, profile[i].table);
175 if (k != -1)
177 rc = STRINGPREP_CONTAINS_PROHIBITED;
178 goto done;
180 break;
182 case STRINGPREP_UNASSIGNED_TABLE:
183 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
184 break;
185 if (flags & STRINGPREP_NO_UNASSIGNED)
187 k = stringprep_find_string_in_table
188 (ucs4, ucs4len, NULL, profile[i].table);
189 if (k != -1)
191 rc = STRINGPREP_CONTAINS_UNASSIGNED;
192 goto done;
195 break;
197 case STRINGPREP_MAP_TABLE:
198 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
199 break;
200 rc = stringprep_apply_table_to_string
201 (ucs4, &ucs4len, maxucs4len, profile[i].table, profile[i].name);
202 if (rc != STRINGPREP_OK)
203 goto done;
204 break;
206 case STRINGPREP_BIDI_PROHIBIT_TABLE:
207 case STRINGPREP_BIDI_RAL_TABLE:
208 case STRINGPREP_BIDI_L_TABLE:
209 break;
211 case STRINGPREP_BIDI:
213 int done_prohibited = 0;
214 int done_ral = 0;
215 int done_l = 0;
216 int contains_ral = -1;
217 int contains_l = -1;
219 for (j = 0; profile[j].operation; j++)
220 if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
222 done_prohibited = 1;
223 k = stringprep_find_string_in_table (ucs4, ucs4len,
224 NULL,
225 profile[j].table);
226 if (k != -1)
228 rc = STRINGPREP_BIDI_CONTAINS_PROHIBITED;
229 goto done;
232 else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
234 done_ral = 1;
235 if (stringprep_find_string_in_table
236 (ucs4, ucs4len, NULL, profile[j].table) != -1)
237 contains_ral = j;
239 else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
241 done_l = 1;
242 if (stringprep_find_string_in_table
243 (ucs4, ucs4len, NULL, profile[j].table) != -1)
244 contains_l = j;
247 if (!done_prohibited || !done_ral || !done_l)
249 rc = STRINGPREP_PROFILE_ERROR;
250 goto done;
253 if (contains_ral != -1 && contains_l != -1)
255 rc = STRINGPREP_BIDI_BOTH_L_AND_RAL;
256 goto done;
259 if (contains_ral != -1)
261 if (!(stringprep_find_character_in_table
262 (ucs4[0], profile[contains_ral].table) != -1 &&
263 stringprep_find_character_in_table
264 (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
266 rc = STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
267 goto done;
271 break;
273 default:
274 rc = STRINGPREP_PROFILE_ERROR;
275 goto done;
276 break;
280 p = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
282 if (strlen (p) >= maxlen)
284 rc = STRINGPREP_TOO_SMALL_BUFFER;
285 goto done;
288 strcpy (in, p); /* flawfinder: ignore */
290 rc = STRINGPREP_OK;
292 done:
293 if (p)
294 free (p);
295 if (q)
296 free (q);
297 if (ucs4)
298 free (ucs4);
299 return rc;
303 * stringprep_profile:
304 * @in: input/ouput array with string to prepare.
305 * @out: output variable with newly allocate string.
306 * @flags: optional stringprep profile flags.
307 * @profile: name of stringprep profile to use.
309 * Prepare the input UTF-8 string according to the stringprep profile.
310 * Normally application programmers use stringprep profile macros such
311 * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
312 * calling this function directly.
314 * Note that you must convert strings entered in the systems locale
315 * into UTF-8 before using this function.
317 * The output @out variable must be deallocated by the caller.
319 * Return value: Returns 0 iff successful, or an error code.
322 stringprep_profile (char *in,
323 char **out, char *profile, Stringprep_profile_flags flags)
325 Stringprep_profiles *p;
326 char *str;
327 size_t len;
328 int rc;
330 for (p = &stringprep_profiles[0]; p->name; p++)
331 if (strcmp (p->name, profile) == 0)
332 break;
334 if (!p || !p->name || !p->tables)
335 return STRINGPREP_UNKNOWN_PROFILE;
337 len = strlen (in) + BUFSIZ;
338 str = (char *) malloc (len);
339 if (str == NULL)
340 return STRINGPREP_MALLOC_ERROR;
342 strcpy (str, in);
344 rc = stringprep (str, len, flags, p->tables);
346 if (rc == STRINGPREP_OK)
347 *out = str;
348 else
349 free (str);
351 return rc;
355 * STRINGPREP_VERSION
357 * String defined via CPP denoting the header file version number.
358 * Used together with stringprep_check_version() to verify header file
359 * and run-time library consistency.
363 * STRINGPREP_MAX_MAP_CHARS
365 * Maximum number of code points that can replace a single code point,
366 * during stringprep mapping.
370 * Stringprep_rc
372 * Enumerated return codes of stringprep(), stringprep_profile()
373 * functions (and macros using those functions). The value 0 is
374 * guaranteed to always correspond to success.
378 * Stringprep_profile_flags:
379 * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as
380 * selecting the non-NFKC case folding tables. Usually the profile
381 * specifies BIDI and NFKC settings, and applications should not
382 * override it unless in special situations.
383 * @STRINGPREP_NO_BIDI: Disable the BIDI step. Usually the profile
384 * specifies BIDI and NFKC settings, and applications should not
385 * override it unless in special situations.
386 * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if
387 * string contains unassigned characters according to profile.
389 * Stringprep profile flags.
393 * stringprep_nameprep:
394 * @in: input/ouput array with string to prepare.
395 * @maxlen: maximum length of input/output array.
397 * Prepare the input UTF-8 string according to the nameprep profile.
398 * The AllowUnassigned flag is true, use
399 * stringprep_nameprep_no_unassigned() for false AllowUnassigned.
400 * Returns 0 iff successful, or an error code.
404 * stringprep_nameprep_no_unassigned:
405 * @in: input/ouput array with string to prepare.
406 * @maxlen: maximum length of input/output array.
408 * Prepare the input UTF-8 string according to the nameprep profile.
409 * The AllowUnassigned flag is false, use stringprep_nameprep() for
410 * true AllowUnassigned. Returns 0 iff successful, or an error code.
414 * stringprep_iscsi:
415 * @in: input/ouput array with string to prepare.
416 * @maxlen: maximum length of input/output array.
418 * Prepare the input UTF-8 string according to the draft iSCSI
419 * stringprep profile. Returns 0 iff successful, or an error code.
423 * stringprep_kerberos5:
424 * @in: input/ouput array with string to prepare.
425 * @maxlen: maximum length of input/output array.
427 * Prepare the input UTF-8 string according to the draft Kerberos5
428 * stringprep profile. Returns 0 iff successful, or an error code.
432 * stringprep_plain:
433 * @in: input/ouput array with string to prepare.
434 * @maxlen: maximum length of input/output array.
436 * Prepare the input UTF-8 string according to the draft SASL
437 * ANONYMOUS profile. Returns 0 iff successful, or an error code.
441 * stringprep_xmpp_nodeprep:
442 * @in: input/ouput array with string to prepare.
443 * @maxlen: maximum length of input/output array.
445 * Prepare the input UTF-8 string according to the draft XMPP node
446 * identifier profile. Returns 0 iff successful, or an error code.
450 * stringprep_xmpp_resourceprep:
451 * @in: input/ouput array with string to prepare.
452 * @maxlen: maximum length of input/output array.
454 * Prepare the input UTF-8 string according to the draft XMPP resource
455 * identifier profile. Returns 0 iff successful, or an error code.
459 * stringprep_generic:
460 * @in: input/ouput array with string to prepare.
461 * @maxlen: maximum length of input/output array.
463 * Prepare the input UTF-8 string according to a hypotetical "generic"
464 * stringprep profile. This is mostly used for debugging or when
465 * constructing new stringprep profiles. Returns 0 iff successful, or
466 * an error code.