4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #ifndef _SYS_KICONV_CCK_COMMON_H
27 #define _SYS_KICONV_CCK_COMMON_H
29 #pragma ident "%Z%%M% %I% %E% SMI"
37 /* The start value of leading byte of EUC encoding. */
38 #define KICONV_EUC_START (0xA1)
40 /* Valid EUC range or not. */
41 #define KICONV_IS_VALID_EUC_BYTE(v) ((v) >= 0xA1 && (v) <= 0xFE)
43 /* Is ASCII character or not: 0x00 - 0x7F. */
44 #define KICONV_IS_ASCII(c) (((uchar_t)(c)) <= 0x7F)
46 /* UTF-8 replacement character for non-identicals and its length. */
47 #define KICONV_UTF8_REPLACEMENT_CHAR1 (0xEF)
48 #define KICONV_UTF8_REPLACEMENT_CHAR2 (0xBF)
49 #define KICONV_UTF8_REPLACEMENT_CHAR3 (0xBD)
50 #define KICONV_UTF8_REPLACEMENT_CHAR (0xefbfbd)
51 #define KICONV_UTF8_REPLACEMENT_CHAR_LEN (3)
54 * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not.
56 #define KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first) \
57 ((second) < u8_valid_min_2nd_byte[(first)] || \
58 (second) > u8_valid_max_2nd_byte[(first)])
61 * If we haven't checked on the UTF-8 signature BOM character in
62 * the beginning of the conversion data stream, we check it and if
63 * find one, we skip it since we have no use for it.
65 #define KICONV_CHECK_UTF8_BOM(ib, ibtail) \
66 if (((kiconv_state_t)kcd)->bom_processed == 0 && \
67 ((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \
68 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \
71 ((kiconv_state_t)kcd)->bom_processed = 1
74 * Check BOM of UTF-8 without state information.
76 #define KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail) \
77 if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \
78 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \
83 * Set errno and break.
85 #define KICONV_SET_ERRNO_AND_BREAK(err) \
87 ret_val = (size_t)-1; \
91 * Handling flag, advance input buffer, set errno and break.
93 #define KICONV_SET_ERRNO_WITH_FLAG(advance, err) \
94 if (flag & KICONV_REPLACE_INVALID) { \
96 goto REPLACE_INVALID; \
98 KICONV_SET_ERRNO_AND_BREAK((err))
100 /* Conversion table for UTF-8 -> CCK encoding. */
106 /* Conversion table for CCK encoding -> utf8. */
110 } kiconv_table_array_t
;
113 * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC...
114 * Currently parameter ib/ibtail are used by BIG5HKSCS only.
116 typedef int8_t (*kiconv_utf8tocck_t
)(uint32_t utf8
, uchar_t
**ib
,
117 uchar_t
*ibtail
, uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
);
119 /* Common open and close function for UTF-8 to CCK conversion. */
120 void * kiconv_open_to_cck(void);
121 int kiconv_close_to_cck(void *);
123 /* Binary search funciton. */
124 size_t kiconv_binsearch(uint32_t key
, void *tbl
, size_t nitems
);
126 /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */
127 size_t kiconv_utf8_to_cck(void *kcd
, char **inbuf
, size_t *inbytesleft
,
128 char **outbuf
, size_t *outbytesleft
, int *errno
,
129 kiconv_utf8tocck_t ptr_utf8tocck
);
132 * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC...
134 size_t kiconvstr_utf8_to_cck(uchar_t
*inarray
, size_t *inlen
,
135 uchar_t
*outarray
, size_t *outlen
, int flag
, int *errno
,
136 kiconv_utf8tocck_t ptr_utf8tocck
);
139 * The following tables are coming from u8_textprep.c. We use them to
140 * check on validity of UTF-8 characters and their bytes.
142 extern const int8_t u8_number_of_bytes
[];
143 extern const uint8_t u8_valid_min_2nd_byte
[];
144 extern const uint8_t u8_valid_max_2nd_byte
[];
152 #endif /* _SYS_KICONV_CCK_COMMON_H */