2 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 2002-2004 Tim J. Robbins
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * PRC National Standard GB 18030-2000 encoding of Chinese text.
32 * See gb18030(5) for details.
36 #include <sys/types.h>
45 static size_t _GB18030_mbrtowc(wchar_t *_RESTRICT_KYWD
,
46 const char *_RESTRICT_KYWD
,
47 size_t, mbstate_t *_RESTRICT_KYWD
);
48 static int _GB18030_mbsinit(const mbstate_t *);
49 static size_t _GB18030_wcrtomb(char *_RESTRICT_KYWD
, wchar_t,
50 mbstate_t *_RESTRICT_KYWD
);
51 static size_t _GB18030_mbsnrtowcs(wchar_t *_RESTRICT_KYWD
,
52 const char **_RESTRICT_KYWD
, size_t, size_t,
53 mbstate_t *_RESTRICT_KYWD
);
54 static size_t _GB18030_wcsnrtombs(char *_RESTRICT_KYWD
,
55 const wchar_t **_RESTRICT_KYWD
, size_t, size_t,
56 mbstate_t *_RESTRICT_KYWD
);
65 _GB18030_init(struct lc_ctype
*lct
)
68 lct
->lc_mbrtowc
= _GB18030_mbrtowc
;
69 lct
->lc_wcrtomb
= _GB18030_wcrtomb
;
70 lct
->lc_mbsinit
= _GB18030_mbsinit
;
71 lct
->lc_mbsnrtowcs
= _GB18030_mbsnrtowcs
;
72 lct
->lc_wcsnrtombs
= _GB18030_wcsnrtombs
;
73 lct
->lc_max_mblen
= 4;
78 _GB18030_mbsinit(const mbstate_t *ps
)
81 return (ps
== NULL
|| ((const _GB18030State
*)ps
)->count
== 0);
85 _GB18030_mbrtowc(wchar_t *_RESTRICT_KYWD pwc
, const char *_RESTRICT_KYWD s
,
86 size_t n
, mbstate_t *_RESTRICT_KYWD ps
)
93 gs
= (_GB18030State
*)ps
;
95 if (gs
->count
< 0 || gs
->count
> sizeof (gs
->bytes
)) {
106 ncopy
= MIN(MIN(n
, MB_CUR_MAX
), sizeof (gs
->bytes
) - gs
->count
);
107 (void) memcpy(gs
->bytes
+ gs
->count
, s
, ncopy
);
110 s
= (char *)gs
->bytes
;
114 /* Incomplete multibyte sequence */
118 * Single byte: [00-7f]
119 * Two byte: [81-fe][40-7e,80-fe]
120 * Four byte: [81-fe][30-39][81-fe][30-39]
122 ch
= (unsigned char)*s
++;
126 } else if (ch
>= 0x81 && ch
<= 0xfe) {
130 ch
= (unsigned char)*s
++;
131 if ((ch
>= 0x40 && ch
<= 0x7e) || (ch
>= 0x80 && ch
<= 0xfe)) {
132 wch
= (wch
<< 8) | ch
;
134 } else if (ch
>= 0x30 && ch
<= 0x39) {
136 * Strip high bit off the wide character we will
137 * eventually output so that it is positive when
138 * cast to wint_t on 32-bit twos-complement machines.
140 wch
= ((wch
& 0x7f) << 8) | ch
;
143 ch
= (unsigned char)*s
++;
144 if (ch
< 0x81 || ch
> 0xfe)
146 wch
= (wch
<< 8) | ch
;
149 ch
= (unsigned char)*s
++;
150 if (ch
< 0x30 || ch
> 0x39)
152 wch
= (wch
<< 8) | ch
;
162 return (wch
== L
'\0' ? 0 : len
- ocount
);
169 _GB18030_wcrtomb(char *_RESTRICT_KYWD s
, wchar_t wc
,
170 mbstate_t *_RESTRICT_KYWD ps
)
176 gs
= (_GB18030State
*)ps
;
178 if (gs
->count
!= 0) {
184 /* Reset to initial shift state (no-op) */
186 if ((wc
& ~0x7fffffff) != 0)
188 if (wc
& 0x7f000000) {
189 /* Replace high bit that mbrtowc() removed. */
191 c
= (wc
>> 24) & 0xff;
192 if (c
< 0x81 || c
> 0xfe)
195 c
= (wc
>> 16) & 0xff;
196 if (c
< 0x30 || c
> 0x39)
199 c
= (wc
>> 8) & 0xff;
200 if (c
< 0x81 || c
> 0xfe)
204 if (c
< 0x30 || c
> 0x39)
208 } else if (wc
& 0x00ff0000)
210 else if (wc
& 0x0000ff00) {
211 c
= (wc
>> 8) & 0xff;
212 if (c
< 0x81 || c
> 0xfe)
216 if (c
< 0x40 || c
== 0x7f || c
== 0xff)
220 } else if (wc
<= 0x7f) {
233 _GB18030_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst
,
234 const char **_RESTRICT_KYWD src
, size_t nms
, size_t len
,
235 mbstate_t *_RESTRICT_KYWD ps
)
237 return (__mbsnrtowcs_std(dst
, src
, nms
, len
, ps
, _GB18030_mbrtowc
));
241 _GB18030_wcsnrtombs(char *_RESTRICT_KYWD dst
,
242 const wchar_t **_RESTRICT_KYWD src
, size_t nwc
, size_t len
,
243 mbstate_t *_RESTRICT_KYWD ps
)
245 return (__wcsnrtombs_std(dst
, src
, nwc
, len
, ps
, _GB18030_wcrtomb
));