1 /* stringprep.c Core stringprep implementation.
2 * Copyright (C) 2002, 2003 Simon Josefsson
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 stringprep_find_character_in_table (unsigned long ucs4
,
26 Stringprep_table_element
* table
)
30 for (i
= 0; table
[i
].start
; i
++)
31 if (ucs4
>= table
[i
].start
&&
32 ucs4
<= (table
[i
].end
? table
[i
].end
: table
[i
].start
))
39 stringprep_find_string_in_table (unsigned long *ucs4
,
42 Stringprep_table_element
* table
)
47 for (j
= 0; j
< ucs4len
; j
++)
48 if ((pos
= stringprep_find_character_in_table (ucs4
[j
], table
)) != -1)
59 stringprep_apply_table_to_string (unsigned long *ucs4
,
62 Stringprep_table_element
* table
,
63 const char *tablename
)
72 while ((pos
= stringprep_find_string_in_table
73 (ucs4
, *ucs4len
, &i
, table
)) != -1)
75 for(maplen
= STRINGPREP_MAX_MAP_CHARS
;
76 maplen
> 0 && table
[i
].map
[maplen
-1] == 0;
80 if (*ucs4len
- 1 + maplen
>= maxucs4len
)
81 return STRINGPREP_TOO_SMALL_BUFFER
;
87 printf ("Table %s maps U+%04lx (in range %04lx-%04lx) to",
88 tablename
, ucs4
[pos
], table
[i
].start
, table
[i
].end
);
90 printf ("Table %s maps U+%04lx to", tablename
, ucs4
[pos
]);
97 for (n
= 0; n
< maplen
; n
++)
98 printf(" U+%04lx", table
[i
].map
[n
]);
103 memmove (&ucs4
[pos
+ maplen
], &ucs4
[pos
+ 1],
104 *ucs4len
* sizeof (unsigned long) - (&ucs4
[pos
+ 1] - ucs4
));
105 memcpy (&ucs4
[pos
], table
[i
].map
, sizeof (unsigned long) * maplen
);
106 *ucs4len
= *ucs4len
- 1 + maplen
;
113 for (j
= 0; j
< *ucs4len
; j
++)
115 printf ("U+%04lx ", ucs4
[j
]);
123 return STRINGPREP_OK
;
126 #define INVERTED(x) ((x) & ((~0UL) >> 1))
127 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
128 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
129 ( INVERTED(profileflags) && (profileflags & flags)))
133 * @in: input/ouput array with string to prepare.
134 * @maxlen: maximum length of input/output array.
135 * @flags: optional stringprep profile flags.
136 * @profile: pointer to stringprep profile to use.
138 * Prepare the input UTF-8 string according to the stringprep profile.
139 * Normally application programmers use stringprep profile macros such
140 * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
141 * calling this function directly.
143 * Since the stringprep operation can expand the string, @maxlen
144 * indicate how large the buffer holding the string is. The @flags
145 * are one of Stringprep_profile_flags, or 0. The profile indicates
146 * processing details, see the profile header files, such as
147 * stringprep_generic.h and stringprep_nameprep.h for two examples.
148 * Your application can define new profiles, possibly re-using the
149 * generic stringprep tables that always will be part of the library.
150 * Note that you must convert strings entered in the systems locale
151 * into UTF-8 before using this function.
153 * Return value: Returns 0 iff successful, or an error code.
156 stringprep (char *in
, size_t maxlen
, int flags
, Stringprep_profile
* profile
)
161 unsigned long *q
= 0;
163 size_t ucs4len
, maxucs4len
;
165 ucs4
= stringprep_utf8_to_ucs4 (in
, -1, &ucs4len
);
166 maxucs4len
= 4 * ucs4len
+ 10; /* XXX */
167 ucs4
= realloc (ucs4
, 1 + maxucs4len
* sizeof (unsigned long));
170 rc
= STRINGPREP_MALLOC_ERROR
;
177 printf ("input (length %d):\n\t", ucs4len
);
178 for (j
= 0; j
< ucs4len
; j
++)
180 printf ("U+%04lx ", ucs4
[j
]);
189 for (i
= 0; profile
[i
].operation
; i
++)
191 switch (profile
[i
].operation
)
193 case STRINGPREP_NFKC
:
194 if (UNAPPLICAPLEFLAGS (flags
, profile
[i
].flags
))
197 printf("Unicode normalization with form KC not used.\n");
202 if (flags
& STRINGPREP_NO_NFKC
&& !profile
[i
].flags
)
204 /* Profile requires NFKC, but callee asked for no NFKC. */
205 rc
= STRINGPREP_FLAG_ERROR
;
209 q
= stringprep_ucs4_nfkc_normalize (ucs4
, ucs4len
);
213 rc
= STRINGPREP_NFKC_FAILED
;
217 for (j
= 0; q
[j
]; j
++)
221 if (ucs4len
!= j
|| memcmp(ucs4
, q
, sizeof(ucs4
[0]) * ucs4len
) != 0)
224 printf("Unicode normalization with form KC maps string into:\n");
225 for (n
= 0; n
< j
; n
++)
227 printf ("U+%04lx ", q
[n
]);
241 case STRINGPREP_PROHIBIT_TABLE
:
242 j
= stringprep_find_string_in_table (ucs4
, ucs4len
,
243 NULL
, profile
[i
].table
);
247 printf("Table %s prohibits string (character U+%04lx).\n",
248 profile
[i
].name
, ucs4
[j
]);
250 rc
= STRINGPREP_CONTAINS_PROHIBITED
;
255 case STRINGPREP_UNASSIGNED_TABLE
:
256 if (UNAPPLICAPLEFLAGS (flags
, profile
[i
].flags
))
258 if (flags
& STRINGPREP_NO_UNASSIGNED
)
260 j
= stringprep_find_string_in_table
261 (ucs4
, ucs4len
, NULL
, profile
[i
].table
);
265 printf("Table %s prohibits string (unassigned "
266 "character U+%04lx).\n", profile
[i
].name
, ucs4
[j
]);
268 rc
= STRINGPREP_CONTAINS_UNASSIGNED
;
274 case STRINGPREP_MAP_TABLE
:
275 if (UNAPPLICAPLEFLAGS (flags
, profile
[i
].flags
))
277 rc
= stringprep_apply_table_to_string
278 (ucs4
, &ucs4len
, maxucs4len
, profile
[i
].table
, profile
[i
].name
);
279 if (rc
!= STRINGPREP_OK
)
283 case STRINGPREP_BIDI_PROHIBIT_TABLE
:
284 case STRINGPREP_BIDI_RAL_TABLE
:
285 case STRINGPREP_BIDI_L_TABLE
:
288 case STRINGPREP_BIDI
:
290 int done_prohibited
= 0;
293 int contains_ral
= -1;
297 for (j
= 0; profile
[j
].operation
; j
++)
298 if (profile
[j
].operation
== STRINGPREP_BIDI_PROHIBIT_TABLE
)
301 k
= stringprep_find_string_in_table (ucs4
, ucs4len
,
302 NULL
, profile
[j
].table
);
306 printf("Table %s prohibits string "
307 "(bidi, character U+%04lx).\n",
308 profile
[i
].name
, ucs4
[j
]);
310 rc
= STRINGPREP_BIDI_CONTAINS_PROHIBITED
;
314 else if (profile
[j
].operation
== STRINGPREP_BIDI_RAL_TABLE
)
317 if (stringprep_find_string_in_table
318 (ucs4
, ucs4len
, NULL
, profile
[j
].table
) != -1)
321 else if (profile
[j
].operation
== STRINGPREP_BIDI_L_TABLE
)
324 if (stringprep_find_string_in_table
325 (ucs4
, ucs4len
, NULL
, profile
[j
].table
) != -1)
329 if (!done_prohibited
|| !done_ral
|| !done_l
)
331 rc
= STRINGPREP_PROFILE_ERROR
;
335 if (contains_ral
!= -1 && contains_l
!= -1)
338 printf("String contains both L and RAL characters.\n");
340 rc
= STRINGPREP_BIDI_BOTH_L_AND_RAL
;
344 if (contains_ral
!= -1)
346 if (!(stringprep_find_character_in_table
347 (ucs4
[0], profile
[contains_ral
].table
) != -1 &&
348 stringprep_find_character_in_table
349 (ucs4
[ucs4len
- 1], profile
[contains_ral
].table
) != -1))
352 printf("Bidi string does not start/end with "
353 "RAL characters.\n");
355 rc
= STRINGPREP_BIDI_LEADTRAIL_NOT_RAL
;
363 rc
= STRINGPREP_PROFILE_ERROR
;
373 printf ("output (length %d):\n\t", ucs4len
);
374 for (j
= 0; j
< ucs4len
; j
++)
376 printf ("U+%04lx ", ucs4
[j
]);
384 p
= stringprep_ucs4_to_utf8 (ucs4
, ucs4len
, 0, 0);
386 if (strlen (p
) >= maxlen
)
388 rc
= STRINGPREP_TOO_SMALL_BUFFER
;
392 strcpy (in
, p
); /* flawfinder: ignore */
407 * stringprep_profile:
408 * @in: input/ouput array with string to prepare.
409 * @out: output variable with newly allocate string.
410 * @flags: optional stringprep profile flags.
411 * @profile: name of stringprep profile to use.
413 * Prepare the input UTF-8 string according to the stringprep profile.
414 * Normally application programmers use stringprep profile macros such
415 * as stringprep_nameprep(), stringprep_kerberos5() etc instead of
416 * calling this function directly.
418 * Note that you must convert strings entered in the systems locale
419 * into UTF-8 before using this function.
421 * The output @out variable must be deallocated by the caller.
423 * Return value: Returns 0 iff successful, or an error code.
426 stringprep_profile (char *in
, char **out
, char *profile
, int flags
)
428 Stringprep_profiles
*p
;
433 for (p
= &stringprep_profiles
[0]; p
->name
; p
++)
434 if (strcmp(p
->name
, profile
) == 0)
437 if (!p
|| !p
->name
|| !p
->tables
)
438 return STRINGPREP_UNKNOWN_PROFILE
;
440 len
= strlen(in
) + BUFSIZ
;
441 str
= (char*) malloc(len
);
443 return STRINGPREP_MALLOC_ERROR
;
447 rc
= stringprep (str
, len
, flags
, p
->tables
);
449 if (rc
== STRINGPREP_OK
)