Sync Citrus iconv support with NetBSD.
[dragonfly.git] / lib / libc / citrus / modules / citrus_hz.c
blob8230c91b11820e002f5336cbc4ed2f3a22868c77
1 /* $NetBSD: citrus_hz.c,v 1.1 2006/11/22 23:38:27 tnozaki Exp $ */
2 /* $DragonFly: src/lib/libc/citrus/modules/citrus_hz.c,v 1.1 2008/04/10 10:21:01 hasso Exp $ */
4 /*-
5 * Copyright (c)2004, 2006 Citrus Project,
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
31 #include <sys/queue.h>
32 #include <sys/types.h>
33 #include <assert.h>
34 #include <errno.h>
35 #include <string.h>
36 #include <stdint.h>
37 #include <stdlib.h>
38 #include <stddef.h>
39 #include <locale.h>
40 #include <limits.h>
41 #include <wchar.h>
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_bcs.h"
46 #include "citrus_module.h"
47 #include "citrus_ctype.h"
48 #include "citrus_stdenc.h"
50 #include "citrus_hz.h"
51 #include "citrus_prop.h"
54 * wchar_t mapping:
56 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx
57 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx
58 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
61 #define ESCAPE_CHAR '~'
63 typedef enum {
64 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4
65 } charset_t;
67 typedef struct {
68 int start, end, width;
69 } range_t;
71 static const range_t ranges[] = {
72 #define RANGE(start, end) { start, end, (end - start) + 1 }
73 /* CTRL */ RANGE(0x00, 0x1F),
74 /* ASCII */ RANGE(0x20, 0x7F),
75 /* GB2312 */ RANGE(0x21, 0x7E),
76 /* CS94 */ RANGE(0x21, 0x7E),
77 /* CS96 */ RANGE(0x20, 0x7F),
78 #undef RANGE
81 typedef struct escape_t escape_t;
82 typedef struct {
83 charset_t charset;
84 size_t length;
85 #define ROWCOL_MAX 3
86 escape_t *escape;
87 } graphic_t;
89 typedef TAILQ_HEAD(escape_list, escape_t) escape_list;
90 struct escape_t {
91 TAILQ_ENTRY(escape_t) entry;
92 int ch;
93 graphic_t *left, *right;
94 escape_list *set;
97 #define GL(escape) ((escape)->left)
98 #define GR(escape) ((escape)->right)
99 #define SET(escape) ((escape)->set)
100 #define ESC(escape) ((escape)->ch)
101 #define INIT(escape) (TAILQ_FIRST(SET(escape)))
103 static __inline escape_t *
104 find_escape(escape_list *set, int ch)
106 escape_t *escape;
108 _DIAGASSERT(set != NULL);
110 TAILQ_FOREACH(escape, set, entry) {
111 if (ESC(escape) == ch)
112 break;
115 return escape;
118 typedef struct {
119 escape_list e0, e1;
120 graphic_t *ascii, *gb2312;
121 } _HZEncodingInfo;
123 #define E0SET(ei) (&(ei)->e0)
124 #define E1SET(ei) (&(ei)->e1)
125 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei)))
126 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei)))
128 typedef struct {
129 int chlen;
130 char ch[ROWCOL_MAX];
131 escape_t *inuse;
132 } _HZState;
134 typedef struct {
135 _HZEncodingInfo ei;
136 struct {
137 /* for future multi-locale facility */
138 _HZState s_mblen;
139 _HZState s_mbrlen;
140 _HZState s_mbrtowc;
141 _HZState s_mbtowc;
142 _HZState s_mbsrtowcs;
143 _HZState s_wcrtomb;
144 _HZState s_wcsrtombs;
145 _HZState s_wctomb;
146 } states;
147 } _HZCTypeInfo;
149 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
150 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
152 #define _FUNCNAME(m) _citrus_HZ_##m
153 #define _ENCODING_INFO _HZEncodingInfo
154 #define _CTYPE_INFO _HZCTypeInfo
155 #define _ENCODING_STATE _HZState
156 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
157 #define _ENCODING_IS_STATE_DEPENDENT 1
158 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL)
160 static __inline void
161 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,
162 _HZState * __restrict psenc)
164 _DIAGASSERT(ei != NULL);
165 _DIAGASSERT(psenc != NULL);
167 psenc->chlen = 0;
168 psenc->inuse = INIT0(ei);
171 static __inline void
172 /*ARGSUSED*/
173 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei,
174 void *__restrict pspriv, const _HZState * __restrict psenc)
176 /* ei may be unused */
177 _DIAGASSERT(pspriv != NULL);
178 _DIAGASSERT(psenc != NULL);
180 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
183 static __inline void
184 /*ARGSUSED*/
185 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei,
186 _HZState * __restrict psenc, const void * __restrict pspriv)
188 /* ei may be unused */
189 _DIAGASSERT(psenc != NULL);
190 _DIAGASSERT(pspriv != NULL);
192 memcpy((void *)psenc, pspriv, sizeof(*psenc));
195 static int
196 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,
197 wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
198 _HZState * __restrict psenc, size_t * __restrict nresult)
200 const char *s0;
201 wchar_t wc;
202 int bit, head, tail, len, ch;
203 graphic_t *graphic;
204 escape_t *candidate, *init;
205 const range_t *range;
207 _DIAGASSERT(ei != NULL);
208 /* pwc may be null */
209 _DIAGASSERT(s != NULL);
210 _DIAGASSERT(psenc != NULL);
211 _DIAGASSERT(nresult != NULL);
213 if (*s == NULL) {
214 _citrus_HZ_init_state(ei, psenc);
215 *nresult = 1;
216 return 0;
218 s0 = *s;
219 if (psenc->chlen < 0 || psenc->inuse == NULL)
220 return EINVAL;
222 wc = (wchar_t)0;
223 bit = head = tail = 0;
224 graphic = NULL;
225 for (len = 0; len <= MB_LEN_MAX; /**/) {
226 if (psenc->chlen == tail) {
227 if (n-- < 1) {
228 *s = s0;
229 *nresult = (size_t)-2;
230 return 0;
232 psenc->ch[psenc->chlen++] = *s0++;
233 ++len;
235 ch = (unsigned char)psenc->ch[tail++];
236 if (tail == 1) {
237 if ((ch & ~0x80) <= 0x1F) {
238 if (psenc->inuse != INIT0(ei))
239 break;
240 wc = (wchar_t)ch;
241 goto done;
243 if (ch & 0x80) {
244 graphic = GR(psenc->inuse);
245 bit = 0x80;
246 ch &= ~0x80;
247 } else {
248 graphic = GL(psenc->inuse);
249 if (ch == ESCAPE_CHAR)
250 continue;
251 bit = 0x0;
253 if (graphic == NULL)
254 break;
255 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) {
256 if (tail < psenc->chlen)
257 return EINVAL;
258 if (ch == ESCAPE_CHAR) {
259 ++head;
260 } else if (ch == '\n') {
261 if (psenc->inuse != INIT0(ei))
262 break;
263 tail = psenc->chlen = 0;
264 continue;
265 } else {
266 candidate = NULL;
267 init = INIT0(ei);
268 _DIAGASSERT(init != NULL);
269 if (psenc->inuse == init) {
270 init = INIT1(ei);
271 } else if (INIT(psenc->inuse) == init) {
272 if (ESC(init) != ch)
273 break;
274 candidate = init;
276 if (candidate == NULL) {
277 candidate = find_escape(
278 SET(psenc->inuse), ch);
279 if (candidate == NULL) {
280 if (init == NULL ||
281 ESC(init) != ch)
282 break;
283 candidate = init;
286 psenc->inuse = candidate;
287 tail = psenc->chlen = 0;
288 continue;
290 } else if (ch & 0x80) {
291 if (graphic != GR(psenc->inuse))
292 break;
293 ch &= ~0x80;
294 } else {
295 if (graphic != GL(psenc->inuse))
296 break;
298 _DIAGASSERT(graphic != NULL);
299 range = &ranges[(size_t)graphic->charset];
300 if (range->start > ch || range->end < ch)
301 break;
302 wc <<= 8;
303 wc |= ch;
304 if (graphic->length == (tail - head)) {
305 if (graphic->charset > GB2312)
306 bit |= ESC(psenc->inuse) << 24;
307 wc |= bit;
308 goto done;
311 *nresult = (size_t)-1;
312 return EILSEQ;
313 done:
314 if (tail < psenc->chlen)
315 return EINVAL;
316 *s = s0;
317 if (pwc != NULL)
318 *pwc = wc;
319 psenc->chlen = 0;
320 *nresult = (wc == 0) ? 0 : len;
322 return 0;
325 static int
326 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,
327 char * __restrict s, size_t n, wchar_t wc,
328 _HZState * __restrict psenc, size_t * __restrict nresult)
330 int bit, ch;
331 escape_t *candidate, *init;
332 graphic_t *graphic;
333 size_t len;
334 const range_t *range;
336 _DIAGASSERT(ei != NULL);
337 _DIAGASSERT(s != NULL);
338 _DIAGASSERT(psenc != NULL);
339 _DIAGASSERT(nresult != NULL);
341 if (psenc->chlen != 0 || psenc->inuse == NULL)
342 return EINVAL;
343 if (wc & 0x80) {
344 bit = 0x80;
345 wc &= ~0x80;
346 } else {
347 bit = 0x0;
349 if ((uint32_t)wc <= 0x1F) {
350 candidate = INIT0(ei);
351 graphic = (bit == 0)
352 ? candidate->left : candidate->right;
353 if (graphic == NULL)
354 goto ilseq;
355 range = &ranges[(size_t)CTRL];
356 len = 1;
357 } else if ((uint32_t)wc <= 0x7F) {
358 graphic = ei->ascii;
359 if (graphic == NULL)
360 goto ilseq;
361 candidate = graphic->escape;
362 range = &ranges[(size_t)graphic->charset];
363 len = graphic->length;
364 } else if ((uint32_t)wc <= 0x7F7F) {
365 graphic = ei->gb2312;
366 if (graphic == NULL)
367 goto ilseq;
368 candidate = graphic->escape;
369 range = &ranges[(size_t)graphic->charset];
370 len = graphic->length;
371 } else {
372 ch = (wc >> 24) & 0xFF;
373 candidate = find_escape(E0SET(ei), ch);
374 if (candidate == NULL) {
375 candidate = find_escape(E1SET(ei), ch);
376 if (candidate == NULL)
377 goto ilseq;
379 wc &= ~0xFF000000;
380 graphic = (bit == 0)
381 ? candidate->left : candidate->right;
382 if (graphic == NULL)
383 goto ilseq;
384 range = &ranges[(size_t)graphic->charset];
385 len = graphic->length;
387 if (psenc->inuse != candidate) {
388 init = INIT0(ei);
389 if (SET(psenc->inuse) == SET(candidate)) {
390 if (INIT(psenc->inuse) != init ||
391 psenc->inuse == init || candidate == init)
392 init = NULL;
393 } else if (candidate == (init = INIT(candidate))) {
394 init = NULL;
396 if (init != NULL) {
397 if (n < 2)
398 return E2BIG;
399 n -= 2;
400 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
401 psenc->ch[psenc->chlen++] = ESC(init);
403 if (n < 2)
404 return E2BIG;
405 n -= 2;
406 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
407 psenc->ch[psenc->chlen++] = ESC(candidate);
408 psenc->inuse = candidate;
410 if (n < len)
411 return E2BIG;
412 while (len-- > 0) {
413 ch = (wc >> (len * 8)) & 0xFF;
414 if (range->start > ch || range->end < ch)
415 goto ilseq;
416 psenc->ch[psenc->chlen++] = ch | bit;
418 memcpy(s, psenc->ch, psenc->chlen);
419 *nresult = psenc->chlen;
420 psenc->chlen = 0;
422 return 0;
424 ilseq:
425 *nresult = (size_t)-1;
426 return EILSEQ;
429 static __inline int
430 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,
431 char * __restrict s, size_t n, _HZState * __restrict psenc,
432 size_t * __restrict nresult)
434 escape_t *candidate;
436 _DIAGASSERT(ei != NULL);
437 _DIAGASSERT(s != NULL);
438 _DIAGASSERT(psenc != NULL);
439 _DIAGASSERT(nresult != NULL);
441 if (psenc->chlen != 0 || psenc->inuse == NULL)
442 return EINVAL;
443 candidate = INIT0(ei);
444 if (psenc->inuse != candidate) {
445 if (n < 2)
446 return E2BIG;
447 n -= 2;
448 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
449 psenc->ch[psenc->chlen++] = ESC(candidate);
451 if (n < 1)
452 return E2BIG;
453 if (psenc->chlen > 0)
454 memcpy(s, psenc->ch, psenc->chlen);
455 *nresult = psenc->chlen;
456 _citrus_HZ_init_state(ei, psenc);
458 return 0;
461 static __inline int
462 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,
463 _HZState * __restrict psenc, int * __restrict rstate)
465 _DIAGASSERT(ei != NULL);
466 _DIAGASSERT(psenc != NULL);
467 _DIAGASSERT(rstate != NULL);
469 if (psenc->chlen < 0 || psenc->inuse == NULL)
470 return EINVAL;
471 *rstate = (psenc->chlen == 0)
472 ? ((psenc->inuse == INIT0(ei))
473 ? _STDENC_SDGEN_INITIAL
474 : _STDENC_SDGEN_STABLE)
475 : ((psenc->ch[0] == ESCAPE_CHAR)
476 ? _STDENC_SDGEN_INCOMPLETE_SHIFT
477 : _STDENC_SDGEN_INCOMPLETE_CHAR);
479 return 0;
482 static __inline int
483 /*ARGSUSED*/
484 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei,
485 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
487 int bit;
489 _DIAGASSERT(csid != NULL);
490 _DIAGASSERT(idx != NULL);
492 if (wc & 0x80) {
493 bit = 0x80;
494 wc &= ~0x80;
495 } else {
496 bit = 0x0;
498 if ((uint32_t)wc <= 0x7F) {
499 *csid = (_csid_t)bit;
500 *idx = (_index_t)wc;
501 } else if ((uint32_t)wc <= 0x7F7F) {
502 *csid = (_csid_t)(bit | 0x8000);
503 *idx = (_index_t)wc;
504 } else {
505 *csid = (_index_t)(wc & ~0x00FFFF7F);
506 *idx = (_csid_t)(wc & 0x00FFFF7F);
509 return 0;
512 static __inline int
513 /*ARGSUSED*/
514 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei,
515 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
517 _DIAGASSERT(ei != NULL);
518 _DIAGASSERT(wc != NULL);
520 *wc = (wchar_t)idx;
521 switch (csid) {
522 case 0x80:
523 case 0x8080:
524 *wc |= (wchar_t)0x80;
525 /*FALLTHROUGH*/
526 case 0x0:
527 case 0x8000:
528 break;
529 default:
530 *wc |= (wchar_t)csid;
533 return 0;
536 static void
537 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei)
539 escape_t *escape;
541 _DIAGASSERT(ei != NULL);
542 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) {
543 TAILQ_REMOVE(E0SET(ei), escape, entry);
544 free(GL(escape));
545 free(GR(escape));
546 free(escape);
548 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) {
549 TAILQ_REMOVE(E1SET(ei), escape, entry);
550 free(GL(escape));
551 free(GR(escape));
552 free(escape);
556 static int
557 _citrus_HZ_parse_char(void **context, const char *name, const char *s)
559 void **p;
560 escape_t *escape;
562 _DIAGASSERT(context != NULL && *context != NULL);
563 _DIAGASSERT(name != NULL);
564 _DIAGASSERT(s != NULL);
566 p = (void **)*context;
567 escape = (escape_t *)p[0];
568 if (escape->ch != '\0')
569 return EINVAL;
570 escape->ch = *s++;
571 if (escape->ch == ESCAPE_CHAR || *s != '\0')
572 return EINVAL;
574 return 0;
577 static int
578 _citrus_HZ_parse_graphic(void **context, const char *name, const char *s)
580 void **p;
581 _HZEncodingInfo *ei;
582 escape_t *escape;
583 graphic_t *graphic;
585 _DIAGASSERT(context != NULL && *context != NULL);
586 _DIAGASSERT(name != NULL);
587 _DIAGASSERT(s != NULL);
589 p = (void **)*context;
590 escape = (escape_t *)p[0];
591 ei = (_HZEncodingInfo *)p[1];
592 graphic = malloc(sizeof(*graphic));
593 if (graphic == NULL)
594 return ENOMEM;
595 memset(graphic, 0, sizeof(*graphic));
596 if (strcmp("GL", name) == 0) {
597 if (GL(escape) != NULL)
598 goto release;
599 GL(escape) = graphic;
600 } else if (strcmp("GR", name) == 0) {
601 if (GR(escape) != NULL)
602 goto release;
603 GR(escape) = graphic;
604 } else {
605 release:
606 free(graphic);
607 return EINVAL;
609 graphic->escape = escape;
610 if (_bcs_strncasecmp("ASCII", s, 5) == 0) {
611 if (s[5] != '\0')
612 return EINVAL;
613 graphic->charset = ASCII;
614 graphic->length = 1;
615 ei->ascii = graphic;
616 return 0;
617 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) {
618 if (s[6] != '\0')
619 return EINVAL;
620 graphic->charset = GB2312;
621 graphic->length = 2;
622 ei->gb2312 = graphic;
623 return 0;
624 } else if (strncmp("94*", s, 3) == 0) {
625 graphic->charset = CS94;
626 } else if (strncmp("96*", s, 3) == 0) {
627 graphic->charset = CS96;
628 } else {
629 return EINVAL;
631 s += 3;
632 switch(*s) {
633 case '1': case '2': case '3':
634 graphic->length = (size_t)(*s - '0');
635 if (*++s == '\0')
636 break;
637 /*FALLTHROUGH*/
638 default:
639 return EINVAL;
641 return 0;
644 static const _citrus_prop_hint_t escape_hints[] = {
645 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char),
646 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic),
647 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic),
648 _CITRUS_PROP_HINT_END
651 static int
652 _citrus_HZ_parse_escape(void **context, const char *name, const char *s)
654 _HZEncodingInfo *ei;
655 escape_t *escape;
656 void *p[2];
658 _DIAGASSERT(context != NULL);
659 _DIAGASSERT(name != NULL);
660 _DIAGASSERT(s != NULL);
662 ei = (_HZEncodingInfo *)*context;
663 escape = malloc(sizeof(*escape));
664 if (escape == NULL)
665 return EINVAL;
666 memset(escape, 0, sizeof(*escape));
667 if (strcmp("0", name) == 0) {
668 escape->set = E0SET(ei);
669 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry);
670 } else if (strcmp("1", name) == 0) {
671 escape->set = E1SET(ei);
672 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry);
673 } else {
674 free(escape);
675 return EINVAL;
677 p[0] = (void *)escape;
678 p[1] = (void *)ei;
679 return _citrus_prop_parse_variable(
680 escape_hints, (void *)&p[0], s, strlen(s));
683 static const _citrus_prop_hint_t root_hints[] = {
684 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape),
685 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape),
686 _CITRUS_PROP_HINT_END
689 static int
690 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,
691 const void * __restrict var, size_t lenvar)
693 int errnum;
695 _DIAGASSERT(ei != NULL);
697 memset(ei, 0, sizeof(*ei));
698 TAILQ_INIT(E0SET(ei));
699 TAILQ_INIT(E1SET(ei));
700 errnum = _citrus_prop_parse_variable(
701 root_hints, (void *)ei, var, lenvar);
702 if (errnum != 0)
703 _citrus_HZ_encoding_module_uninit(ei);
704 return errnum;
707 /* ----------------------------------------------------------------------
708 * public interface for ctype
711 _CITRUS_CTYPE_DECLS(HZ);
712 _CITRUS_CTYPE_DEF_OPS(HZ);
714 #include "citrus_ctype_template.h"
716 /* ----------------------------------------------------------------------
717 * public interface for stdenc
720 _CITRUS_STDENC_DECLS(HZ);
721 _CITRUS_STDENC_DEF_OPS(HZ);
723 #include "citrus_stdenc_template.h"