Sync with RFC.
[libidn.git] / stringprep.c
blobc396c8ace4b2ba9198a1726167433af1efbcd3e3
1 /* stringprep.c Core stringprep implementation.
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "internal.h"
24 static int
25 stringprep_find_character_in_table (unsigned long ucs4,
26 Stringprep_table_element * table)
28 int i;
30 for (i = 0; table[i].start; i++)
31 if (ucs4 >= table[i].start &&
32 ucs4 <= (table[i].end ? table[i].end : table[i].start))
33 return i;
35 return -1;
38 static ssize_t
39 stringprep_find_string_in_table (unsigned long *ucs4,
40 size_t ucs4len,
41 int *tablepos,
42 Stringprep_table_element * table)
44 size_t j;
45 int pos;
47 for (j = 0; j < ucs4len; j++)
48 if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
50 if (tablepos)
51 *tablepos = pos;
52 return j;
55 return -1;
58 static int
59 stringprep_apply_table_to_string (unsigned long *ucs4,
60 size_t *ucs4len,
61 size_t maxucs4len,
62 Stringprep_table_element * table,
63 const char *tablename)
65 int i;
66 ssize_t pos;
67 size_t maplen;
68 #ifdef DRAFT
69 int modified = 0;
70 #endif
72 while ((pos = stringprep_find_string_in_table
73 (ucs4, *ucs4len, &i, table)) != -1)
75 for(maplen = STRINGPREP_MAX_MAP_CHARS;
76 maplen > 0 && table[i].map[maplen-1] == 0;
77 maplen--)
80 if (*ucs4len - 1 + maplen >= maxucs4len)
81 return STRINGPREP_TOO_SMALL_BUFFER;
83 #ifdef DRAFT
84 modified = 1;
86 if (table[i].end)
87 printf ("Table %s maps U+%04lx (in range %04lx-%04lx) to",
88 tablename, ucs4[pos], table[i].start, table[i].end);
89 else
90 printf ("Table %s maps U+%04lx to", tablename, ucs4[pos]);
91 if (maplen == 0)
92 printf(" nothing");
93 else
95 int n;
97 for (n = 0; n < maplen; n++)
98 printf(" U+%04lx", table[i].map[n]);
100 printf(".\n");
101 #endif
103 memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
104 *ucs4len * sizeof (unsigned long) - (&ucs4[pos + 1] - ucs4));
105 memcpy (&ucs4[pos], table[i].map, sizeof (unsigned long) * maplen);
106 *ucs4len = *ucs4len - 1 + maplen;
109 #ifdef DRAFT
110 if (modified)
112 int j;
113 for (j = 0; j < *ucs4len; j++)
115 printf ("U+%04lx ", ucs4[j]);
116 if ((j+1)%8 == 0)
117 printf("\n\t");
119 printf ("\n");
121 #endif
123 return STRINGPREP_OK;
126 #define INVERTED(x) ((x) & ((~0UL) >> 1))
127 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
128 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
129 ( INVERTED(profileflags) && (profileflags & flags)))
132 * stringprep:
133 * @in: input/ouput array with string to prepare.
134 * @maxlen: maximum length of input/output array.
135 * @flags: optional stringprep profile flags.
136 * @profile: pointer to stringprep profile to use.
138 * Prepare the input UTF-8 string according to the stringprep profile.
139 * Normally application programmers use stringprep profile macros such
140 * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
141 * calling this function directly.
143 * Since the stringprep operation can expand the string, @maxlen
144 * indicate how large the buffer holding the string is. The @flags
145 * are one of Stringprep_profile_flags, or 0. The profile indicates
146 * processing details, see the profile header files, such as
147 * stringprep_generic.h and stringprep_nameprep.h for two examples.
148 * Your application can define new profiles, possibly re-using the
149 * generic stringprep tables that always will be part of the library.
150 * Note that you must convert strings entered in the systems locale
151 * into UTF-8 before using this function.
153 * Return value: Returns 0 iff successful, or an error code.
156 stringprep (char *in, size_t maxlen, int flags, Stringprep_profile * profile)
158 int i, j;
159 int rc;
160 char *p = 0;
161 unsigned long *q = 0;
162 unsigned long *ucs4;
163 size_t ucs4len, maxucs4len;
165 ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
166 maxucs4len = 4 * ucs4len + 10; /* XXX */
167 ucs4 = realloc (ucs4, 1 + maxucs4len * sizeof (unsigned long));
168 if (!ucs4)
170 rc = STRINGPREP_MALLOC_ERROR;
171 goto done;
174 #ifdef DRAFT
176 int j;
177 printf ("input: ");
178 for (j = 0; j < ucs4len; j++)
180 printf ("U+%04lx ", ucs4[j]);
181 if ((j+1)%8 == 0)
182 printf("\n\t");
184 printf ("\n");
185 printf ("\n");
187 #endif
189 for (i = 0; profile[i].operation; i++)
191 switch (profile[i].operation)
193 case STRINGPREP_NFKC:
194 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
196 #ifdef DRAFT
197 printf("Unicode normalization with form KC not used.\n");
198 #endif
199 break;
202 if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
204 /* Profile requires NFKC, but callee asked for no NFKC. */
205 rc = STRINGPREP_FLAG_ERROR;
206 goto done;
209 q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
211 if (!q)
213 rc = STRINGPREP_NFKC_FAILED;
214 goto done;
217 for (j = 0; q[j]; j++)
220 #ifdef DRAFT
221 if (ucs4len != j || memcmp(ucs4, q, sizeof(ucs4[0]) * ucs4len) != 0)
223 int n;
224 printf("Unicode normalization with form KC maps string into:\n");
225 for (n = 0; n < j; n++)
227 printf ("U+%04lx ", q[n]);
228 if ((n+1)%8 == 0)
229 printf("\n\t");
231 printf ("\n");
233 #endif
235 free (ucs4);
236 ucs4 = q;
237 ucs4len = j;
238 q = 0;
239 break;
241 case STRINGPREP_PROHIBIT_TABLE:
242 j = stringprep_find_string_in_table (ucs4, ucs4len,
243 NULL, profile[i].table);
244 if (j != -1)
246 #ifdef DRAFT
247 printf("Table %s prohibits string (character U+%04lx).\n",
248 profile[i].name, ucs4[j]);
249 #endif
250 rc = STRINGPREP_CONTAINS_PROHIBITED;
251 goto done;
253 break;
255 case STRINGPREP_UNASSIGNED_TABLE:
256 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
257 break;
258 if (flags & STRINGPREP_NO_UNASSIGNED)
260 j = stringprep_find_string_in_table
261 (ucs4, ucs4len, NULL, profile[i].table);
262 if (j != -1)
264 #ifdef DRAFT
265 printf("Table %s prohibits string (unassigned "
266 "character U+%04lx).\n", profile[i].name, ucs4[j]);
267 #endif
268 rc = STRINGPREP_CONTAINS_UNASSIGNED;
269 goto done;
272 break;
274 case STRINGPREP_MAP_TABLE:
275 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
276 break;
277 rc = stringprep_apply_table_to_string
278 (ucs4, &ucs4len, maxucs4len, profile[i].table, profile[i].name);
279 if (rc != STRINGPREP_OK)
280 goto done;
281 break;
283 case STRINGPREP_BIDI_PROHIBIT_TABLE:
284 case STRINGPREP_BIDI_RAL_TABLE:
285 case STRINGPREP_BIDI_L_TABLE:
286 break;
288 case STRINGPREP_BIDI:
290 int done_prohibited = 0;
291 int done_ral = 0;
292 int done_l = 0;
293 int contains_ral = -1;
294 int contains_l = -1;
295 int k;
297 for (j = 0; profile[j].operation; j++)
298 if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
300 done_prohibited = 1;
301 k = stringprep_find_string_in_table (ucs4, ucs4len,
302 NULL, profile[j].table);
303 if (k != -1)
305 #ifdef DRAFT
306 printf("Table %s prohibits string "
307 "(bidi, character U+%04lx).\n",
308 profile[i].name, ucs4[j]);
309 #endif
310 rc = STRINGPREP_BIDI_CONTAINS_PROHIBITED;
311 goto done;
314 else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
316 done_ral = 1;
317 if (stringprep_find_string_in_table
318 (ucs4, ucs4len, NULL, profile[j].table) != -1)
319 contains_ral = j;
321 else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
323 done_l = 1;
324 if (stringprep_find_string_in_table
325 (ucs4, ucs4len, NULL, profile[j].table) != -1)
326 contains_l = j;
329 if (!done_prohibited || !done_ral || !done_l)
331 rc = STRINGPREP_PROFILE_ERROR;
332 goto done;
335 if (contains_ral != -1 && contains_l != -1)
337 #ifdef DRAFT
338 printf("String contains both L and RAL characters.\n");
339 #endif
340 rc = STRINGPREP_BIDI_BOTH_L_AND_RAL;
341 goto done;
344 if (contains_ral != -1)
346 if (!(stringprep_find_character_in_table
347 (ucs4[0], profile[contains_ral].table) != -1 &&
348 stringprep_find_character_in_table
349 (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
351 #ifdef DRAFT
352 printf("Bidi string does not start/end with "
353 "RAL characters.\n");
354 #endif
355 rc = STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
356 goto done;
360 break;
362 default:
363 rc = STRINGPREP_PROFILE_ERROR;
364 goto done;
365 break;
369 #ifdef DRAFT
371 int j;
372 printf ("\n");
373 printf ("output: ");
374 for (j = 0; j < ucs4len; j++)
376 printf ("U+%04lx ", ucs4[j]);
377 if ((j+1)%8 == 0)
378 printf("\n\t");
380 printf ("\n");
382 #endif
384 p = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
386 if (strlen (p) >= maxlen)
388 rc = STRINGPREP_TOO_SMALL_BUFFER;
389 goto done;
392 strcpy (in, p); /* flawfinder: ignore */
394 rc = STRINGPREP_OK;
396 done:
397 if (p)
398 free (p);
399 if (q)
400 free (q);
401 if (ucs4)
402 free (ucs4);
403 return rc;
407 * stringprep_profile:
408 * @in: input/ouput array with string to prepare.
409 * @out: output variable with newly allocate string.
410 * @flags: optional stringprep profile flags.
411 * @profile: name of stringprep profile to use.
413 * Prepare the input UTF-8 string according to the stringprep profile.
414 * Normally application programmers use stringprep profile macros such
415 * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
416 * calling this function directly.
418 * Note that you must convert strings entered in the systems locale
419 * into UTF-8 before using this function.
421 * The output @out variable must be deallocated by the caller.
423 * Return value: Returns 0 iff successful, or an error code.
426 stringprep_profile (char *in, char **out, char *profile, int flags)
428 Stringprep_profiles *p;
429 char *str;
430 size_t len;
431 int rc;
433 for (p = &stringprep_profiles[0]; p->name; p++)
434 if (strcmp(p->name, profile) == 0)
435 break;
437 if (!p || !p->name || !p->tables)
438 return STRINGPREP_UNKNOWN_PROFILE;
440 len = strlen(in) + BUFSIZ;
441 str = (char*) malloc(len);
442 if (str == NULL)
443 return STRINGPREP_MALLOC_ERROR;
445 strcpy(str, in);
447 rc = stringprep (str, len, flags, p->tables);
449 if (rc == STRINGPREP_OK)
450 *out = str;
452 return rc;