i386 removal, part 68/x: Remove a number of obsolete Makefiles from gnu/.
[dragonfly.git] / lib / libc / locale / euc.c
blobad623d75c040c6957da35341e6ef0344a9529adf
1 /*
2 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
5 * Copyright (c) 1993
6 * The Regents of the University of California. All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Paul Borman at Krystal Technologies.
11 * Copyright (c) 2011 The FreeBSD Foundation
12 * All rights reserved.
13 * Portions of this software were developed by David Chisnall
14 * under sponsorship from the FreeBSD Foundation.
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
40 * @(#)euc.c 8.1 (Berkeley) 6/4/93
43 #include <sys/param.h>
45 #include <errno.h>
46 #include <limits.h>
47 #include <runetype.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <wchar.h>
51 #include "mblocal.h"
53 extern int __mb_sb_limit;
55 static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
56 size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
57 static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t,
58 mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
60 static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
61 size_t, mbstate_t * __restrict);
62 static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
63 size_t, mbstate_t * __restrict);
64 static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
65 size_t, mbstate_t * __restrict);
66 static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
67 size_t, mbstate_t * __restrict);
69 static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t,
70 mbstate_t * __restrict);
71 static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t,
72 mbstate_t * __restrict);
73 static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t,
74 mbstate_t * __restrict);
75 static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t,
76 mbstate_t * __restrict);
78 static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict,
79 const char ** __restrict, size_t, size_t,
80 mbstate_t * __restrict);
81 static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict,
82 const char ** __restrict, size_t, size_t,
83 mbstate_t * __restrict);
84 static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict,
85 const char ** __restrict, size_t, size_t,
86 mbstate_t * __restrict);
87 static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict,
88 const char ** __restrict, size_t, size_t,
89 mbstate_t * __restrict);
91 static size_t _EUC_CN_wcsnrtombs(char * __restrict,
92 const wchar_t ** __restrict, size_t, size_t,
93 mbstate_t * __restrict);
94 static size_t _EUC_JP_wcsnrtombs(char * __restrict,
95 const wchar_t ** __restrict, size_t, size_t,
96 mbstate_t * __restrict);
97 static size_t _EUC_KR_wcsnrtombs(char * __restrict,
98 const wchar_t ** __restrict, size_t, size_t,
99 mbstate_t * __restrict);
100 static size_t _EUC_TW_wcsnrtombs(char * __restrict,
101 const wchar_t ** __restrict, size_t, size_t,
102 mbstate_t * __restrict);
104 static int _EUC_mbsinit(const mbstate_t *);
106 typedef struct {
107 wchar_t ch;
108 int set;
109 int want;
110 } _EucState;
112 static int
113 _EUC_mbsinit(const mbstate_t *ps)
116 return (ps == NULL || ((const _EucState *)ps)->want == 0);
120 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
123 _EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
125 l->__mbrtowc = _EUC_CN_mbrtowc;
126 l->__wcrtomb = _EUC_CN_wcrtomb;
127 l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
128 l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
129 l->__mbsinit = _EUC_mbsinit;
131 l->runes = rl;
132 l->__mb_cur_max = 4;
133 l->__mb_sb_limit = 256;
134 return (0);
137 static size_t
138 _EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
139 size_t n, mbstate_t * __restrict ps)
141 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
144 static size_t
145 _EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
146 const char ** __restrict src,
147 size_t nms, size_t len, mbstate_t * __restrict ps)
149 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
152 static size_t
153 _EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
154 mbstate_t * __restrict ps)
156 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
159 static size_t
160 _EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
161 size_t nwc, size_t len, mbstate_t * __restrict ps)
163 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
167 * EUC-KR uses only CS0 and CS1.
170 _EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
172 l->__mbrtowc = _EUC_KR_mbrtowc;
173 l->__wcrtomb = _EUC_KR_wcrtomb;
174 l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
175 l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
176 l->__mbsinit = _EUC_mbsinit;
178 l->runes = rl;
179 l->__mb_cur_max = 2;
180 l->__mb_sb_limit = 128;
181 return (0);
184 static size_t
185 _EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
186 size_t n, mbstate_t * __restrict ps)
188 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
191 static size_t
192 _EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
193 const char ** __restrict src,
194 size_t nms, size_t len, mbstate_t * __restrict ps)
196 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
199 static size_t
200 _EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
201 mbstate_t * __restrict ps)
203 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
206 static size_t
207 _EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
208 size_t nwc, size_t len, mbstate_t * __restrict ps)
210 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
214 * EUC-JP uses CS0, CS1, CS2, and CS3.
217 _EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
219 l->__mbrtowc = _EUC_JP_mbrtowc;
220 l->__wcrtomb = _EUC_JP_wcrtomb;
221 l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
222 l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
223 l->__mbsinit = _EUC_mbsinit;
225 l->runes = rl;
226 l->__mb_cur_max = 3;
227 l->__mb_sb_limit = 196;
228 return (0);
231 static size_t
232 _EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
233 size_t n, mbstate_t * __restrict ps)
235 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
238 static size_t
239 _EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
240 const char ** __restrict src,
241 size_t nms, size_t len, mbstate_t * __restrict ps)
243 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
246 static size_t
247 _EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
248 mbstate_t * __restrict ps)
250 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
253 static size_t
254 _EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
255 size_t nwc, size_t len, mbstate_t * __restrict ps)
257 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
261 * EUC-TW uses CS0, CS1, and CS2.
264 _EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
266 l->__mbrtowc = _EUC_TW_mbrtowc;
267 l->__wcrtomb = _EUC_TW_wcrtomb;
268 l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
269 l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
270 l->__mbsinit = _EUC_mbsinit;
272 l->runes = rl;
273 l->__mb_cur_max = 4;
274 l->__mb_sb_limit = 256;
275 return (0);
278 static size_t
279 _EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
280 size_t n, mbstate_t * __restrict ps)
282 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
285 static size_t
286 _EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
287 const char ** __restrict src,
288 size_t nms, size_t len, mbstate_t * __restrict ps)
290 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
293 static size_t
294 _EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
295 mbstate_t * __restrict ps)
297 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
300 static size_t
301 _EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
302 size_t nwc, size_t len, mbstate_t * __restrict ps)
304 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
308 * Common EUC code.
311 static size_t
312 _EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
313 size_t n, mbstate_t * __restrict ps,
314 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
316 _EucState *es;
317 int i, want;
318 wchar_t wc = 0;
319 unsigned char ch, chs;
321 es = (_EucState *)ps;
323 if (es->want < 0 || es->want > MB_CUR_MAX) {
324 errno = EINVAL;
325 return ((size_t)-1);
328 if (s == NULL) {
329 s = "";
330 n = 1;
331 pwc = NULL;
334 if (n == 0)
335 /* Incomplete multibyte sequence */
336 return ((size_t)-2);
338 if (es->want == 0) {
339 /* Fast path for plain ASCII (CS0) */
340 if (((ch = (unsigned char)*s) & 0x80) == 0) {
341 if (pwc != NULL)
342 *pwc = ch;
343 return (ch != '\0' ? 1 : 0);
346 if (ch >= 0xa1) {
347 /* CS1 */
348 want = 2;
349 } else if (ch == cs2) {
350 want = cs2width;
351 } else if (ch == cs3) {
352 want = cs3width;
353 } else {
354 errno = EILSEQ;
355 return ((size_t)-1);
359 es->want = want;
360 es->ch = 0;
361 } else {
362 want = es->want;
363 wc = es->ch;
366 for (i = 0; i < MIN(want, n); i++) {
367 wc <<= 8;
368 chs = *s;
369 wc |= chs;
370 s++;
372 if (i < want) {
373 /* Incomplete multibyte sequence */
374 es->want = want - i;
375 es->ch = wc;
376 errno = EILSEQ;
377 return ((size_t)-2);
379 if (pwc != NULL)
380 *pwc = wc;
381 es->want = 0;
382 return (wc == L'\0' ? 0 : want);
385 static size_t
386 _EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
387 mbstate_t * __restrict ps,
388 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
390 _EucState *es;
391 int i, len;
392 wchar_t nm;
394 es = (_EucState *)ps;
396 if (es->want != 0) {
397 errno = EINVAL;
398 return ((size_t)-1);
401 if (s == NULL)
402 /* Reset to initial shift state (no-op) */
403 return (1);
405 if ((wc & ~0x7f) == 0) {
406 /* Fast path for plain ASCII (CS0) */
407 *s = (char)wc;
408 return (1);
411 /* Determine the "length" */
412 if ((unsigned)wc > 0xffffff) {
413 len = 4;
414 } else if ((unsigned)wc > 0xffff) {
415 len = 3;
416 } else if ((unsigned)wc > 0xff) {
417 len = 2;
418 } else {
419 len = 1;
422 if (len > MB_CUR_MAX) {
423 errno = EILSEQ;
424 return ((size_t)-1);
427 /* This first check excludes CS1, which is implicitly valid. */
428 if ((wc < 0xa100) || (wc > 0xffff)) {
429 /* Check for valid CS2 or CS3 */
430 nm = (wc >> ((len - 1) * 8));
431 if (nm == cs2) {
432 if (len != cs2width) {
433 errno = EILSEQ;
434 return ((size_t)-1);
436 } else if (nm == cs3) {
437 if (len != cs3width) {
438 errno = EILSEQ;
439 return ((size_t)-1);
441 } else {
442 errno = EILSEQ;
443 return ((size_t)-1);
447 /* Stash the bytes, least significant last */
448 for (i = len - 1; i >= 0; i--) {
449 s[i] = (wc & 0xff);
450 wc >>= 8;
452 return (len);