Sync Citrus iconv support with NetBSD.
[dragonfly/netmp.git] / lib / libc / citrus / modules / citrus_ues.c
blob8cea936c14204a48686dbb589f4a956f781069ae
1 /* $NetBSD: citrus_ues.c,v 1.1 2006/11/13 15:16:31 tnozaki Exp $ */
2 /* $DragonFly: src/lib/libc/citrus/modules/citrus_ues.c,v 1.1 2008/04/10 10:21:02 hasso Exp $ */
4 /*-
5 * Copyright (c)2006 Citrus Project,
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
30 #include <assert.h>
31 #include <errno.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <stdint.h>
35 #include <stdlib.h>
36 #include <limits.h>
37 #include <wchar.h>
39 #include "citrus_namespace.h"
40 #include "citrus_types.h"
41 #include "citrus_bcs.h"
42 #include "citrus_module.h"
43 #include "citrus_ctype.h"
44 #include "citrus_stdenc.h"
45 #include "citrus_ues.h"
47 typedef struct {
48 int mode;
49 #define MODE_C99 1
50 size_t mb_cur_max;
51 } _UESEncodingInfo;
53 typedef struct {
54 int chlen;
55 char ch[12];
56 } _UESState;
58 typedef struct {
59 _UESEncodingInfo ei;
60 struct {
61 /* for future multi-locale facility */
62 _UESState s_mblen;
63 _UESState s_mbrlen;
64 _UESState s_mbrtowc;
65 _UESState s_mbtowc;
66 _UESState s_mbsrtowcs;
67 _UESState s_wcrtomb;
68 _UESState s_wcsrtombs;
69 _UESState s_wctomb;
70 } states;
71 } _UESCTypeInfo;
73 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
74 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
76 #define _FUNCNAME(m) _citrus_UES_##m
77 #define _ENCODING_INFO _UESEncodingInfo
78 #define _CTYPE_INFO _UESCTypeInfo
79 #define _ENCODING_STATE _UESState
80 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max
81 #define _ENCODING_IS_STATE_DEPENDENT 0
82 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
84 static __inline void
85 /*ARGSUSED*/
86 _citrus_UES_init_state(_UESEncodingInfo * __restrict ei,
87 _UESState * __restrict psenc)
89 psenc->chlen = 0;
92 static __inline void
93 /*ARGSUSED*/
94 _citrus_UES_pack_state(_UESEncodingInfo * __restrict ei,
95 void *__restrict pspriv, const _UESState * __restrict psenc)
97 /* ei seem to be unused */
98 _DIAGASSERT(pspriv != NULL);
99 _DIAGASSERT(psenc != NULL);
101 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
104 static __inline void
105 /*ARGSUSED*/
106 _citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei,
107 _UESState * __restrict psenc, const void * __restrict pspriv)
109 /* ei seem to be unused */
110 _DIAGASSERT(psenc != NULL);
111 _DIAGASSERT(pspriv != NULL);
113 memcpy((void *)psenc, pspriv, sizeof(*psenc));
116 static __inline int
117 to_int(int ch)
119 if (ch >= '0' && ch <= '9')
120 return ch - '0';
121 else if (ch >= 'A' && ch <= 'F')
122 return (ch - 'A') + 10;
123 else if (ch >= 'a' && ch <= 'f')
124 return (ch - 'a') + 10;
125 return -1;
128 #define ESCAPE '\\'
129 #define UCS2_ESC 'u'
130 #define UCS4_ESC 'U'
132 #define UCS2_BIT 16
133 #define UCS4_BIT 32
134 #define BMP_MAX UINT32_C(0xFFFF)
135 #define UCS2_MAX UINT32_C(0x10FFFF)
136 #define UCS4_MAX UINT32_C(0x7FFFFFFF)
138 static const char *xdig = "0123456789abcdef";
140 static __inline int
141 to_str(char *s, wchar_t wc, int bit)
143 char *p;
145 p = s;
146 *p++ = ESCAPE;
147 switch (bit) {
148 case UCS2_BIT:
149 *p++ = UCS2_ESC;
150 break;
151 case UCS4_BIT:
152 *p++ = UCS4_ESC;
153 break;
154 default:
155 abort();
157 do {
158 *p++ = xdig[(wc >> (bit -= 4)) & 0xF];
159 } while (bit > 0);
160 return p - s;
163 static __inline int
164 is_hi_surrogate(wchar_t wc)
166 return wc >= 0xD800 && wc <= 0xDBFF;
169 static __inline int
170 is_lo_surrogate(wchar_t wc)
172 return wc >= 0xDC00 && wc <= 0xDFFF;
175 static __inline wchar_t
176 surrogate_to_ucs(wchar_t hi, wchar_t lo)
178 _DIAGASSERT(is_hi_surrogate(hi));
179 _DIAGASSERT(is_lo_surrogate(lo));
181 hi -= 0xD800;
182 lo -= 0xDC00;
183 return (hi << 10 | lo) + 0x10000;
186 static __inline void
187 ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo)
189 _DIAGASSERT(hi != NULL);
190 _DIAGASSERT(lo != NULL);
191 _DIAGASSERT(wc >= 0x10000);
193 wc -= 0x10000;
194 *hi = (wc >> 10) + 0xD800;
195 *lo = (wc & 0x3FF) + 0xDC00;
198 static __inline int
199 is_basic(wchar_t wc)
201 return (uint32_t)wc <= 0x9F &&
202 wc != 0x24 && wc != 0x40 && wc != 0x60;
205 static int
206 _citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei,
207 wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
208 _UESState * __restrict psenc, size_t * __restrict nresult)
210 const char *s0;
211 int ch, head, tail, i, num;
212 wchar_t hi, wc;
214 _DIAGASSERT(ei != NULL);
215 /* pwc may be null */
216 _DIAGASSERT(s != NULL);
217 _DIAGASSERT(psenc != NULL);
218 _DIAGASSERT(nresult != NULL);
220 if (*s == NULL) {
221 _citrus_UES_init_state(ei, psenc);
222 *nresult = 0;
223 return 0;
225 s0 = *s;
227 hi = (wchar_t)0;
228 tail = 0;
230 surrogate:
231 wc = (wchar_t)0;
232 head = tail;
233 if (psenc->chlen == head) {
234 if (n-- < 1)
235 goto restart;
236 psenc->ch[psenc->chlen++] = *s0++;
238 ch = (unsigned char)psenc->ch[head++];
239 if (ch == ESCAPE) {
240 if (psenc->chlen == head) {
241 if (n-- < 1)
242 goto restart;
243 psenc->ch[psenc->chlen++] = *s0++;
245 switch (psenc->ch[head]) {
246 case UCS2_ESC:
247 tail += 6;
248 break;
249 case UCS4_ESC:
250 if (ei->mode & MODE_C99) {
251 tail = 10;
252 break;
254 /*FALLTHROUGH*/
255 default:
256 tail = 0;
258 ++head;
260 for (; head < tail; ++head) {
261 if (psenc->chlen == head) {
262 if (n-- < 1) {
263 restart:
264 *s = s0;
265 *nresult = (size_t)-2;
266 return 0;
268 psenc->ch[psenc->chlen++] = *s0++;
270 num = to_int((int)(unsigned char)psenc->ch[head]);
271 if (num < 0) {
272 tail = 0;
273 break;
275 wc = (wc << 4) | num;
277 head = 0;
278 switch (tail) {
279 case 0:
280 break;
281 case 6:
282 if (hi != (wchar_t)0)
283 break;
284 if ((ei->mode & MODE_C99) == 0) {
285 if (is_hi_surrogate(wc) != 0) {
286 hi = wc;
287 goto surrogate;
289 if ((uint32_t)wc <= 0x7F /* XXX */ ||
290 is_lo_surrogate(wc) != 0)
291 break;
292 goto done;
294 /*FALLTHROUGH*/
295 case 10:
296 if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX &&
297 is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0)
298 goto done;
299 *nresult = (size_t)-1;
300 return EILSEQ;
301 case 12:
302 if (is_lo_surrogate(wc) == 0)
303 break;
304 wc = surrogate_to_ucs(hi, wc);
305 goto done;
307 ch = (unsigned char)psenc->ch[0];
308 head = psenc->chlen;
309 if (--head > 0)
310 memmove(&psenc->ch[0], &psenc->ch[1], head);
311 wc = (wchar_t)ch;
312 done:
313 psenc->chlen = head;
314 if (pwc != NULL)
315 *pwc = wc;
316 *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s));
317 *s = s0;
319 return 0;
322 static int
323 _citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei,
324 char * __restrict s, size_t n, wchar_t wc,
325 _UESState * __restrict psenc, size_t * __restrict nresult)
327 wchar_t hi, lo;
329 if (psenc->chlen != 0)
330 return EINVAL;
332 if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) {
333 if (n-- < 1)
334 goto e2big;
335 psenc->ch[psenc->chlen++] = (char)wc;
336 } else if ((uint32_t)wc <= BMP_MAX) {
337 if (n < 6)
338 goto e2big;
339 psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT);
340 } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) {
341 if (n < 12)
342 goto e2big;
343 ucs_to_surrogate(wc, &hi, &lo);
344 psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT);
345 psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT);
346 } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) {
347 if (n < 10)
348 goto e2big;
349 psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT);
350 } else {
351 *nresult = (size_t)-1;
352 return EILSEQ;
354 memcpy(s, psenc->ch, psenc->chlen);
355 *nresult = psenc->chlen;
356 psenc->chlen = 0;
358 return 0;
360 e2big:
361 *nresult = (size_t)-1;
362 return E2BIG;
365 /*ARGSUSED*/
366 _citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei,
367 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
369 /* ei seem to be unused */
370 _DIAGASSERT(csid != NULL);
371 _DIAGASSERT(idx != NULL);
373 *csid = 0;
374 *idx = (_index_t)wc;
376 return 0;
379 static __inline int
380 /*ARGSUSED*/
381 _citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei,
382 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
384 /* ei seem to be unused */
385 _DIAGASSERT(wc != NULL);
387 if (csid != 0)
388 return EILSEQ;
389 *wc = (wchar_t)idx;
391 return 0;
394 static __inline int
395 /*ARGSUSED*/
396 _citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei,
397 _UESState * __restrict psenc, int * __restrict rstate)
399 _DIAGASSERT(psenc != NULL);
400 _DIAGASSERT(rstate != NULL);
402 if (psenc->chlen == 0)
403 *rstate = _STDENC_SDGEN_INITIAL;
404 else
405 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; /* XXX */
407 return 0;
410 static void
411 /*ARGSUSED*/
412 _citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei)
414 /* ei seems to be unused */
417 static int
418 /*ARGSUSED*/
419 _citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei,
420 const void * __restrict var, size_t lenvar)
422 const char *p;
424 _DIAGASSERT(ei != NULL);
426 p = var;
427 #define MATCH(x, act) \
428 do { \
429 if (lenvar >= (sizeof(#x)-1) && \
430 _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \
431 act; \
432 lenvar -= sizeof(#x)-1; \
433 p += sizeof(#x)-1; \
435 } while (/*CONSTCOND*/0)
436 memset((void *)ei, 0, sizeof(*ei));
437 while (lenvar > 0) {
438 switch (_bcs_toupper(*p)) {
439 case 'C':
440 MATCH(C99, ei->mode |= MODE_C99);
441 break;
443 ++p;
444 --lenvar;
446 ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12;
448 return 0;
451 /* ----------------------------------------------------------------------
452 * public interface for ctype
455 _CITRUS_CTYPE_DECLS(UES);
456 _CITRUS_CTYPE_DEF_OPS(UES);
458 #include "citrus_ctype_template.h"
460 /* ----------------------------------------------------------------------
461 * public interface for stdenc
464 _CITRUS_STDENC_DECLS(UES);
465 _CITRUS_STDENC_DEF_OPS(UES);
467 #include "citrus_stdenc_template.h"