1 /* $NetBSD: citrus_iso2022.c,v 1.18 2007/11/21 14:19:32 tnozaki Exp $ */
2 /* $DragonFly: src/lib/libc/citrus/modules/citrus_iso2022.c,v 1.2 2008/04/10 10:21:01 hasso Exp $ */
5 * Copyright (c)1999, 2002 Citrus Project,
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
32 #include <sys/types.h>
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_ctype.h"
47 #include "citrus_stdenc.h"
48 #include "citrus_iso2022.h"
51 /* ----------------------------------------------------------------------
52 * private stuffs used by templates
58 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
59 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
60 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
61 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
62 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
63 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
64 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
65 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
66 * 94x94 charset (ESC & V ESC $ ( F)
67 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
68 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
69 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
70 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
71 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
78 #define CS94MULTI (2U)
79 #define CS96MULTI (3U)
86 static const _ISO2022Charset ascii
= { CS94
, 'B', '\0', '\0' };
87 static const _ISO2022Charset iso88591
= { CS96
, 'A', '\0', '\0' };
91 /* need 3 bits to hold -1, 0, ..., 3 */
96 char ch
[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
99 #define _ISO2022STATE_FLAG_INITIALIZED 1
103 _ISO2022Charset
*recommend
[4];
104 size_t recommendsize
[4];
105 _ISO2022Charset initg
[4];
108 #define F_8BIT 0x0001
109 #define F_NOOLD 0x0002
110 #define F_SI 0x0010 /*0F*/
111 #define F_SO 0x0020 /*0E*/
112 #define F_LS0 0x0010 /*0F*/
113 #define F_LS1 0x0020 /*0E*/
114 #define F_LS2 0x0040 /*ESC n*/
115 #define F_LS3 0x0080 /*ESC o*/
116 #define F_LS1R 0x0100 /*ESC ~*/
117 #define F_LS2R 0x0200 /*ESC }*/
118 #define F_LS3R 0x0400 /*ESC |*/
119 #define F_SS2 0x0800 /*ESC N*/
120 #define F_SS3 0x1000 /*ESC O*/
121 #define F_SS2R 0x2000 /*8E*/
122 #define F_SS3R 0x4000 /*8F*/
123 } _ISO2022EncodingInfo
;
125 _ISO2022EncodingInfo ei
;
127 /* for future multi-locale facility */
128 _ISO2022State s_mblen
;
129 _ISO2022State s_mbrlen
;
130 _ISO2022State s_mbrtowc
;
131 _ISO2022State s_mbtowc
;
132 _ISO2022State s_mbsrtowcs
;
133 _ISO2022State s_wcrtomb
;
134 _ISO2022State s_wcsrtombs
;
135 _ISO2022State s_wctomb
;
139 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
140 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
142 #define _FUNCNAME(m) _citrus_ISO2022_##m
143 #define _ENCODING_INFO _ISO2022EncodingInfo
144 #define _CTYPE_INFO _ISO2022CTypeInfo
145 #define _ENCODING_STATE _ISO2022State
146 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
147 #define _ENCODING_IS_STATE_DEPENDENT 1
148 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
149 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
152 #define _ISO2022INVALID (wchar_t)-1
154 static __inline
int isc0(__uint8_t x
) { return ((x
& 0x1f) == x
); }
155 static __inline
int isc1(__uint8_t x
) { return (0x80 <= x
&& x
<= 0x9f); }
156 static __inline
int iscntl(__uint8_t x
) { return (isc0(x
) || isc1(x
) || x
== 0x7f); }
157 static __inline
int is94(__uint8_t x
) { return (0x21 <= x
&& x
<= 0x7e); }
158 static __inline
int is96(__uint8_t x
) { return (0x20 <= x
&& x
<= 0x7f); }
159 static __inline
int isecma(__uint8_t x
) { return (0x30 <= x
&& x
<= 0x7f); }
160 static __inline
int isinterm(__uint8_t x
) { return (0x20 <= x
&& x
<= 0x2f); }
161 static __inline
int isthree(__uint8_t x
) { return (0x60 <= x
&& x
<= 0x6f); }
164 getcs(const char * __restrict p
, _ISO2022Charset
* __restrict cs
)
167 _DIAGASSERT(p
!= NULL
);
168 _DIAGASSERT(cs
!= NULL
);
170 if (!strncmp(p
, "94$", 3) && p
[3] && !p
[4]) {
171 cs
->final
= (u_char
)(p
[3] & 0xff);
174 cs
->type
= CS94MULTI
;
175 } else if (!strncmp(p
, "96$", 3) && p
[3] && !p
[4]) {
176 cs
->final
= (u_char
)(p
[3] & 0xff);
179 cs
->type
= CS96MULTI
;
180 } else if (!strncmp(p
, "94", 2) && p
[2] && !p
[3]) {
181 cs
->final
= (u_char
)(p
[2] & 0xff);
185 } else if (!strncmp(p
, "96", 2) && p
[2] && !p
[3]) {
186 cs
->final
= (u_char
)(p
[2] & 0xff);
203 get_recommend(_ISO2022EncodingInfo
* __restrict ei
,
204 const char * __restrict token
)
207 _ISO2022Charset cs
, *p
;
209 if (!strchr("0123", token
[0]) || token
[1] != '=')
212 if (getcs(&token
[2], &cs
) == 0)
214 else if (!strcmp(&token
[2], "94")) {
215 cs
.final
= (u_char
)(token
[4]);
219 } else if (!strcmp(&token
[2], "96")) {
220 cs
.final
= (u_char
)(token
[4]);
224 } else if (!strcmp(&token
[2], "94$")) {
225 cs
.final
= (u_char
)(token
[5]);
229 } else if (!strcmp(&token
[2], "96$")) {
230 cs
.final
= (u_char
)(token
[5]);
239 if (!ei
->recommend
[i
]) {
240 ei
->recommend
[i
] = malloc(sizeof(_ISO2022Charset
));
242 p
= realloc(ei
->recommend
[i
],
243 sizeof(_ISO2022Charset
) * (ei
->recommendsize
[i
] + 1));
246 ei
->recommend
[i
] = p
;
248 if (!ei
->recommend
[i
])
250 ei
->recommendsize
[i
]++;
252 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->final
= cs
.final
;
253 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->interm
= cs
.interm
;
254 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->vers
= cs
.vers
;
255 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->type
= cs
.type
;
261 get_initg(_ISO2022EncodingInfo
* __restrict ei
,
262 const char * __restrict token
)
266 if (strncmp("INIT", &token
[0], 4) ||
267 !strchr("0123", token
[4]) ||
271 if (getcs(&token
[6], &cs
) != 0)
274 ei
->initg
[token
[4] - '0'].type
= cs
.type
;
275 ei
->initg
[token
[4] - '0'].final
= cs
.final
;
276 ei
->initg
[token
[4] - '0'].interm
= cs
.interm
;
277 ei
->initg
[token
[4] - '0'].vers
= cs
.vers
;
283 get_max(_ISO2022EncodingInfo
* __restrict ei
,
284 const char * __restrict token
)
286 if (!strcmp(token
, "MAX1")) {
288 } else if (!strcmp(token
, "MAX2")) {
290 } else if (!strcmp(token
, "MAX3")) {
300 get_flags(_ISO2022EncodingInfo
* __restrict ei
,
301 const char * __restrict token
)
310 { "NOOLD", F_NOOLD
},
327 for (i
= 0; tags
[i
].tag
; i
++) {
328 if (!strcmp(token
, tags
[i
].tag
)) {
329 ei
->flags
|= tags
[i
].flag
;
339 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo
* __restrict ei
,
340 const void * __restrict var
, size_t lenvar
)
346 _DIAGASSERT(ei
!= NULL
);
350 * parse VARIABLE section.
356 v
= (const char *) var
;
358 /* initialize structure */
360 for (i
= 0; i
< 4; i
++) {
361 ei
->recommend
[i
] = NULL
;
362 ei
->recommendsize
[i
] = 0;
367 while (*v
== ' ' || *v
== '\t')
372 while (*e
&& *e
!= ' ' && *e
!= '\t')
378 if (len
>=sizeof(buf
))
380 snprintf(buf
, sizeof(buf
), "%.*s", len
, v
);
382 if ((ret
= get_recommend(ei
, buf
)) != _NOTMATCH
)
384 else if ((ret
= get_initg(ei
, buf
)) != _NOTMATCH
)
386 else if ((ret
= get_max(ei
, buf
)) != _NOTMATCH
)
388 else if ((ret
= get_flags(ei
, buf
)) != _NOTMATCH
)
401 free(ei
->recommend
[0]);
402 free(ei
->recommend
[1]);
403 free(ei
->recommend
[2]);
404 free(ei
->recommend
[3]);
411 _citrus_ISO2022_init_state(_ISO2022EncodingInfo
* __restrict ei
,
412 _ISO2022State
* __restrict s
)
416 memset(s
, 0, sizeof(*s
));
418 s
->gr
= (ei
->flags
& F_8BIT
) ? 1 : -1;
420 for (i
= 0; i
< 4; i
++) {
421 if (ei
->initg
[i
].final
) {
422 s
->g
[i
].type
= ei
->initg
[i
].type
;
423 s
->g
[i
].final
= ei
->initg
[i
].final
;
424 s
->g
[i
].interm
= ei
->initg
[i
].interm
;
427 s
->singlegl
= s
->singlegr
= -1;
428 s
->flags
|= _ISO2022STATE_FLAG_INITIALIZED
;
433 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo
* __restrict ei
,
434 void * __restrict pspriv
,
435 const _ISO2022State
* __restrict s
)
437 memcpy(pspriv
, (const void *)s
, sizeof(*s
));
442 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo
* __restrict ei
,
443 _ISO2022State
* __restrict s
,
444 const void * __restrict pspriv
)
446 memcpy((void *)s
, pspriv
, sizeof(*s
));
451 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo
* __restrict ei
,
452 const void * __restrict var
,
456 _DIAGASSERT(ei
!= NULL
);
458 return _citrus_ISO2022_parse_variable(ei
, var
, lenvar
);
463 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo
*ei
)
472 static const struct seqtable
{
481 /* G0 94MULTI special */
482 { CS94MULTI
, -1, 2, -1, -1, 3, { ESC
, '$', OECMA
}, },
483 /* G0 94MULTI special with version identification */
484 { CS94MULTI
, -1, 5, -1, 2, 6, { ESC
, '&', ECMA
, ESC
, '$', OECMA
}, },
486 { CS94
, 1, 2, -1, -1, 3, { ESC
, CS94
, ECMA
, }, },
487 /* G? 94 with 2nd intermediate char */
488 { CS94
, 1, 3, 2, -1, 4, { ESC
, CS94
, INTERM
, ECMA
, }, },
490 { CS96
, 1, 2, -1, -1, 3, { ESC
, CS96
, ECMA
, }, },
491 /* G? 96 with 2nd intermediate char */
492 { CS96
, 1, 3, 2, -1, 4, { ESC
, CS96
, INTERM
, ECMA
, }, },
494 { CS94MULTI
, 2, 3, -1, -1, 4, { ESC
, '$', CS94
, ECMA
, }, },
496 { CS96MULTI
, 2, 3, -1, -1, 4, { ESC
, '$', CS96
, ECMA
, }, },
497 /* G? 94MULTI with version specification */
498 { CS94MULTI
, 5, 6, -1, 2, 7, { ESC
, '&', ECMA
, ESC
, '$', CS94
, ECMA
, }, },
500 { -1, -1, -1, -1, -1, 2, { ESC
, 'n', }, },
501 { -1, -1, -1, -1, -1, 2, { ESC
, 'o', }, },
503 { -1, -1, -1, -1, -1, 2, { ESC
, '~', }, },
504 { -1, -1, -1, -1, -1, 2, { ESC
, /*{*/ '}', }, },
505 { -1, -1, -1, -1, -1, 2, { ESC
, '|', }, },
507 { -1, -1, -1, -1, -1, 2, { ESC
, 'N', }, },
508 { -1, -1, -1, -1, -1, 2, { ESC
, 'O', }, },
514 seqmatch(const char * __restrict s
, size_t n
,
515 const struct seqtable
* __restrict sp
)
519 _DIAGASSERT(s
!= NULL
);
520 _DIAGASSERT(sp
!= NULL
);
523 while (p
- sp
->chars
< n
&& p
- sp
->chars
< sp
->len
) {
530 if (*s
&& strchr("@AB", *s
))
539 if (*s
&& strchr("()*+", *s
))
544 if (*s
&& strchr(",-./", *s
))
559 return p
- sp
->chars
;
563 _ISO2022_sgetwchar(_ISO2022EncodingInfo
* __restrict ei
,
564 const char * __restrict string
, size_t n
,
565 const char ** __restrict result
,
566 _ISO2022State
* __restrict psenc
)
570 const struct seqtable
*sp
;
574 _DIAGASSERT(ei
!= NULL
);
575 _DIAGASSERT(string
!= NULL
);
576 /* result may be NULL */
580 if (1 <= n
&& string
[0] == '\017') {
586 if (1 <= n
&& string
[0] == '\016') {
594 if (1 <= n
&& string
[0] && strchr("\217\216", string
[0])) {
595 psenc
->singlegl
= psenc
->singlegr
=
596 (string
[0] - '\216') + 2;
602 /* eat the letter if this is not ESC */
603 if (1 <= n
&& string
[0] != '\033')
606 /* look for a perfect match from escape sequences */
607 for (sp
= &seqtable
[0]; sp
->len
; sp
++) {
608 nmatch
= seqmatch(string
, n
, sp
);
609 if (sp
->len
== nmatch
&& n
>= sp
->len
)
616 if (sp
->type
!= -1) {
623 i
= string
[sp
->csoff
] - '(';
627 i
= string
[sp
->csoff
] - ',';
630 return (_ISO2022INVALID
);
633 psenc
->g
[i
].type
= sp
->type
;
634 psenc
->g
[i
].final
= '\0';
635 psenc
->g
[i
].interm
= '\0';
636 psenc
->g
[i
].vers
= '\0';
637 /* sp->finaloff must not be -1 */
638 if (sp
->finaloff
!= -1)
639 psenc
->g
[i
].final
= string
[sp
->finaloff
];
640 if (sp
->intermoff
!= -1)
641 psenc
->g
[i
].interm
= string
[sp
->intermoff
];
642 if (sp
->versoff
!= -1)
643 psenc
->g
[i
].vers
= string
[sp
->versoff
];
651 if (2 <= n
&& string
[0] == '\033'
652 && string
[1] && strchr("no", string
[1])) {
653 psenc
->gl
= string
[1] - 'n' + 2;
660 /* XXX: { for vi showmatch */
661 if (2 <= n
&& string
[0] == '\033'
662 && string
[1] && strchr("~}|", string
[1])) {
663 psenc
->gr
= 3 - (string
[1] - '|');
670 if (2 <= n
&& string
[0] == '\033'
671 && string
[1] && strchr("NO", string
[1])) {
672 psenc
->singlegl
= (string
[1] - 'N') + 2;
680 * if we've got an unknown escape sequence, eat the ESC at the
681 * head. otherwise, wait till full escape sequence comes.
683 for (sp
= &seqtable
[0]; sp
->len
; sp
++) {
684 nmatch
= seqmatch(string
, n
, sp
);
689 * if we are in the middle of escape sequence,
690 * we still need to wait for more characters to come
696 return (_ISO2022INVALID
);
699 if (nmatch
== sp
->len
) {
700 /* this case should not happen */
710 /* no letter to eat */
714 return (_ISO2022INVALID
);
717 /* normal chars. always eat C0/C1 as is. */
718 if (iscntl(*string
& 0xff))
720 else if (*string
& 0x80) {
721 cur
= (psenc
->singlegr
== -1)
722 ? psenc
->gr
: psenc
->singlegr
;
724 cur
= (psenc
->singlegl
== -1)
725 ? psenc
->gl
: psenc
->singlegl
;
730 wchar
= *string
++ & 0xff;
733 /* reset single shift state */
734 psenc
->singlegr
= psenc
->singlegl
= -1;
738 /* length error check */
739 switch (psenc
->g
[cur
].type
) {
742 if (!isthree(psenc
->g
[cur
].final
)) {
744 && (string
[0] & 0x80) == (string
[1] & 0x80))
748 && (string
[0] & 0x80) == (string
[1] & 0x80)
749 && (string
[0] & 0x80) == (string
[2] & 0x80))
753 /* we still need to wait for more characters to come */
756 return (_ISO2022INVALID
);
763 /* we still need to wait for more characters to come */
766 return (_ISO2022INVALID
);
770 switch (psenc
->g
[cur
].type
) {
772 if (!(is94(string
[0] & 0x7f)))
775 if (!(is96(string
[0] & 0x7f)))
779 if (!(is94(string
[0] & 0x7f) && is94(string
[1] & 0x7f)))
783 if (!(is96(string
[0] & 0x7f) && is96(string
[1] & 0x7f)))
788 /* extract the character. */
789 switch (psenc
->g
[cur
].type
) {
791 /* special case for ASCII. */
792 if (psenc
->g
[cur
].final
== 'B' && !psenc
->g
[cur
].interm
) {
797 wchar
= psenc
->g
[cur
].final
;
798 wchar
= (wchar
<< 8);
799 wchar
|= (psenc
->g
[cur
].interm
? (0x80 | psenc
->g
[cur
].interm
) : 0);
800 wchar
= (wchar
<< 8);
801 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
804 /* special case for ISO-8859-1. */
805 if (psenc
->g
[cur
].final
== 'A' && !psenc
->g
[cur
].interm
) {
811 wchar
= psenc
->g
[cur
].final
;
812 wchar
= (wchar
<< 8);
813 wchar
|= (psenc
->g
[cur
].interm
? (0x80 | psenc
->g
[cur
].interm
) : 0);
814 wchar
= (wchar
<< 8);
815 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
820 wchar
= psenc
->g
[cur
].final
;
821 wchar
= (wchar
<< 8);
822 if (isthree(psenc
->g
[cur
].final
))
823 wchar
|= (*string
++ & 0x7f);
824 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
825 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
826 if (psenc
->g
[cur
].type
== CS96MULTI
)
833 /* reset single shift state */
834 psenc
->singlegr
= psenc
->singlegl
= -1;
841 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo
* __restrict ei
,
842 wchar_t * __restrict pwc
,
843 const char ** __restrict s
,
844 size_t n
, _ISO2022State
* __restrict psenc
,
845 size_t * __restrict nresult
)
848 const char *s0
, *p
, *result
;
852 _DIAGASSERT(nresult
!= 0);
853 _DIAGASSERT(ei
!= NULL
);
854 _DIAGASSERT(psenc
!= NULL
);
855 _DIAGASSERT(s
!= NULL
);
858 _citrus_ISO2022_init_state(ei
, psenc
);
859 *nresult
= _ENCODING_IS_STATE_DEPENDENT
;
864 chlenbak
= psenc
->chlen
;
867 * if we have something in buffer, use that.
868 * otherwise, skip here
870 if (psenc
->chlen
< 0 || psenc
->chlen
> sizeof(psenc
->ch
)) {
872 _citrus_ISO2022_init_state(ei
, psenc
);
875 if (psenc
->chlen
== 0)
878 /* buffer is not empty */
880 while (psenc
->chlen
< sizeof(psenc
->ch
) && n
>= 0) {
882 psenc
->ch
[psenc
->chlen
++] = *s0
++;
886 wchar
= _ISO2022_sgetwchar(ei
, p
, psenc
->chlen
- (p
-psenc
->ch
),
889 if (wchar
!= _ISO2022INVALID
) {
890 if (psenc
->chlen
> c
)
891 memmove(psenc
->ch
, result
, psenc
->chlen
- c
);
892 if (psenc
->chlen
< c
)
900 if ((result
- p
) == psenc
->chlen
)
901 /* complete shift sequence. */
909 /* escape sequence too long? */
913 wchar
= _ISO2022_sgetwchar(ei
, s0
, n
, &result
, psenc
);
914 if (wchar
!= _ISO2022INVALID
) {
926 /* complete shift sequence. */
930 if (n
< sizeof(psenc
->ch
)) {
931 memcpy(psenc
->ch
, s0
- c
, n
);
937 /* escape sequence too long? */
941 *nresult
= (size_t)-1;
952 *nresult
= c
- chlenbak
;
958 *nresult
= (size_t)-2;
964 recommendation(_ISO2022EncodingInfo
* __restrict ei
,
965 _ISO2022Charset
* __restrict cs
)
968 _ISO2022Charset
*recommend
;
970 _DIAGASSERT(ei
!= NULL
);
971 _DIAGASSERT(cs
!= NULL
);
973 /* first, try a exact match. */
974 for (i
= 0; i
< 4; i
++) {
975 recommend
= ei
->recommend
[i
];
976 for (j
= 0; j
< ei
->recommendsize
[i
]; j
++) {
977 if (cs
->type
!= recommend
[j
].type
)
979 if (cs
->final
!= recommend
[j
].final
)
981 if (cs
->interm
!= recommend
[j
].interm
)
988 /* then, try a wildcard match over final char. */
989 for (i
= 0; i
< 4; i
++) {
990 recommend
= ei
->recommend
[i
];
991 for (j
= 0; j
< ei
->recommendsize
[i
]; j
++) {
992 if (cs
->type
!= recommend
[j
].type
)
994 if (cs
->final
&& (cs
->final
!= recommend
[j
].final
))
996 if (cs
->interm
&& (cs
->interm
!= recommend
[j
].interm
))
1003 /* there's no recommendation. make a guess. */
1004 if (ei
->maxcharset
== 0) {
1020 _ISO2022_sputwchar(_ISO2022EncodingInfo
* __restrict ei
, wchar_t wc
,
1021 char * __restrict string
, size_t n
,
1022 char ** __restrict result
,
1023 _ISO2022State
* __restrict psenc
,
1024 size_t * __restrict nresult
)
1030 char tmp
[MB_LEN_MAX
];
1035 _DIAGASSERT(ei
!= NULL
);
1036 _DIAGASSERT(string
!= NULL
);
1037 /* result may be NULL */
1038 _DIAGASSERT(psenc
!= NULL
);
1039 _DIAGASSERT(nresult
!= NULL
);
1041 if (isc0(wc
& 0xff)) {
1042 /* go back to INIT0 or ASCII on control chars */
1043 cs
= ei
->initg
[0].final
? ei
->initg
[0] : ascii
;
1044 } else if (isc1(wc
& 0xff)) {
1045 /* go back to INIT1 or ISO-8859-1 on control chars */
1046 cs
= ei
->initg
[1].final
? ei
->initg
[1] : iso88591
;
1047 } else if (!(wc
& ~0xff)) {
1049 /* special treatment for ISO-8859-1 */
1052 /* special treatment for ASCII */
1056 cs
.final
= (wc
>> 24) & 0x7f;
1057 if ((wc
>> 16) & 0x80)
1058 cs
.interm
= (wc
>> 16) & 0x7f;
1062 cs
.type
= (wc
& 0x00007f00) ? CS96MULTI
: CS96
;
1064 cs
.type
= (wc
& 0x00007f00) ? CS94MULTI
: CS94
;
1066 target
= recommendation(ei
, &cs
);
1068 bit8
= ei
->flags
& F_8BIT
;
1070 /* designate the charset onto the target plane(G0/1/2/3). */
1071 if (psenc
->g
[target
].type
== cs
.type
1072 && psenc
->g
[target
].final
== cs
.final
1073 && psenc
->g
[target
].interm
== cs
.interm
)
1077 if (cs
.type
== CS94MULTI
|| cs
.type
== CS96MULTI
)
1079 if (target
== 0 && cs
.type
== CS94MULTI
&& strchr("@AB", cs
.final
)
1080 && !cs
.interm
&& !(ei
->flags
& F_NOOLD
))
1082 else if (cs
.type
== CS94
|| cs
.type
== CS94MULTI
)
1083 *p
++ = "()*+"[target
];
1085 *p
++ = ",-./"[target
];
1090 psenc
->g
[target
].type
= cs
.type
;
1091 psenc
->g
[target
].final
= cs
.final
;
1092 psenc
->g
[target
].interm
= cs
.interm
;
1095 /* invoke the plane onto GL or GR. */
1096 if (psenc
->gl
== target
)
1098 if (bit8
&& psenc
->gr
== target
)
1101 if (target
== 0 && (ei
->flags
& F_LS0
)) {
1104 } else if (target
== 1 && (ei
->flags
& F_LS1
)) {
1107 } else if (target
== 2 && (ei
->flags
& F_LS2
)) {
1111 } else if (target
== 3 && (ei
->flags
& F_LS3
)) {
1115 } else if (bit8
&& target
== 1 && (ei
->flags
& F_LS1R
)) {
1119 } else if (bit8
&& target
== 2 && (ei
->flags
& F_LS2R
)) {
1124 } else if (bit8
&& target
== 3 && (ei
->flags
& F_LS3R
)) {
1128 } else if (target
== 2 && (ei
->flags
& F_SS2
)) {
1131 psenc
->singlegl
= 2;
1132 } else if (target
== 3 && (ei
->flags
& F_SS3
)) {
1135 psenc
->singlegl
= 3;
1136 } else if (bit8
&& target
== 2 && (ei
->flags
& F_SS2R
)) {
1139 psenc
->singlegl
= psenc
->singlegr
= 2;
1140 } else if (bit8
&& target
== 3 && (ei
->flags
& F_SS3R
)) {
1143 psenc
->singlegl
= psenc
->singlegr
= 3;
1148 if (psenc
->singlegl
== target
)
1150 else if (psenc
->singlegr
== target
)
1152 else if (psenc
->gl
== target
)
1154 else if ((ei
->flags
& F_8BIT
) && psenc
->gr
== target
)
1166 i
= !iscntl(wc
& 0xff) ?
1167 (isthree(cs
.final
) ? 3 : 2) : 1;
1171 *p
++ = ((wc
>> (i
<< 3)) & 0x7f) | mask
;
1173 /* reset single shift state */
1174 psenc
->singlegl
= psenc
->singlegr
= -1;
1176 len
= (size_t)(p
- tmp
);
1180 *nresult
= (size_t)-1;
1184 *result
= string
+ len
;
1185 memcpy(string
, tmp
, len
);
1191 *nresult
= (size_t)-1;
1196 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo
* __restrict ei
,
1197 char * __restrict s
, size_t n
,
1198 _ISO2022State
* __restrict psenc
,
1199 size_t * __restrict nresult
)
1201 char buf
[MB_LEN_MAX
];
1206 _DIAGASSERT(ei
!= NULL
);
1207 _DIAGASSERT(nresult
!= 0);
1208 _DIAGASSERT(s
!= NULL
);
1210 /* XXX state will be modified after this operation... */
1211 ret
= _ISO2022_sputwchar(ei
, L
'\0', buf
, sizeof(buf
), &result
, psenc
,
1218 if (sizeof(buf
) < len
|| n
< len
-1) {
1219 /* XXX should recover state? */
1220 *nresult
= (size_t)-1;
1224 memcpy(s
, buf
, len
-1);
1230 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo
* __restrict ei
,
1231 char * __restrict s
, size_t n
, wchar_t wc
,
1232 _ISO2022State
* __restrict psenc
,
1233 size_t * __restrict nresult
)
1235 char buf
[MB_LEN_MAX
];
1240 _DIAGASSERT(ei
!= NULL
);
1241 _DIAGASSERT(s
!= NULL
);
1242 _DIAGASSERT(psenc
!= NULL
);
1243 _DIAGASSERT(nresult
!= 0);
1245 /* XXX state will be modified after this operation... */
1246 ret
= _ISO2022_sputwchar(ei
, wc
, buf
, sizeof(buf
), &result
, psenc
,
1253 if (sizeof(buf
) < len
|| n
< len
) {
1254 /* XXX should recover state? */
1255 *nresult
= (size_t)-1;
1259 memcpy(s
, buf
, len
);
1266 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo
* __restrict ei
,
1267 _csid_t
* __restrict csid
,
1268 _index_t
* __restrict idx
, wchar_t wc
)
1272 _DIAGASSERT(csid
!= NULL
&& idx
!= NULL
);
1274 m
= wc
& 0x7FFF8080;
1275 nm
= wc
& 0x007F7F7F;
1276 if (m
& 0x00800000) {
1281 if (nm
& 0x007F0000) {
1284 } else if (nm
& 0x00007F00) {
1289 *idx
= (_index_t
)nm
;
1296 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo
* __restrict ei
,
1297 wchar_t * __restrict wc
,
1298 _csid_t csid
, _index_t idx
)
1301 _DIAGASSERT(ei
!= NULL
&& wc
!= NULL
);
1303 *wc
= (wchar_t)(csid
& 0x7F808080) | (wchar_t)idx
;
1310 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo
* __restrict ei
,
1311 _ISO2022State
* __restrict psenc
,
1312 int * __restrict rstate
)
1315 if (psenc
->chlen
== 0) {
1316 /* XXX: it should distinguish initial and stable. */
1317 *rstate
= _STDENC_SDGEN_STABLE
;
1319 if (psenc
->ch
[0] == '\033')
1320 *rstate
= _STDENC_SDGEN_INCOMPLETE_SHIFT
;
1322 *rstate
= _STDENC_SDGEN_INCOMPLETE_CHAR
;
1328 /* ----------------------------------------------------------------------
1329 * public interface for ctype
1332 _CITRUS_CTYPE_DECLS(ISO2022
);
1333 _CITRUS_CTYPE_DEF_OPS(ISO2022
);
1335 #include "citrus_ctype_template.h"
1337 /* ----------------------------------------------------------------------
1338 * public interface for stdenc
1341 _CITRUS_STDENC_DECLS(ISO2022
);
1342 _CITRUS_STDENC_DEF_OPS(ISO2022
);
1344 #include "citrus_stdenc_template.h"