2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
21 * The author of this file:
25 * The source code included in this files was separated from mbfilter_kr.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
35 #include "mbfilter_uhc.h"
36 #define UNICODE_TABLE_UHC_DEF
37 #include "unicode_table_uhc.h"
39 static int mbfl_filt_ident_uhc(int c
, mbfl_identify_filter
*filter
);
41 static const unsigned char mblen_table_uhc
[] = { /* 0x81-0xFE */
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
53 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
54 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
55 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
56 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
57 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
60 static const char *mbfl_encoding_uhc_aliases
[] = {"CP949", NULL
};
62 const mbfl_encoding mbfl_encoding_uhc
= {
66 (const char *(*)[])&mbfl_encoding_uhc_aliases
,
71 const struct mbfl_identify_vtbl vtbl_identify_uhc
= {
73 mbfl_filt_ident_common_ctor
,
74 mbfl_filt_ident_common_dtor
,
78 const struct mbfl_convert_vtbl vtbl_uhc_wchar
= {
80 mbfl_no_encoding_wchar
,
81 mbfl_filt_conv_common_ctor
,
82 mbfl_filt_conv_common_dtor
,
83 mbfl_filt_conv_uhc_wchar
,
84 mbfl_filt_conv_common_flush
87 const struct mbfl_convert_vtbl vtbl_wchar_uhc
= {
88 mbfl_no_encoding_wchar
,
90 mbfl_filt_conv_common_ctor
,
91 mbfl_filt_conv_common_dtor
,
92 mbfl_filt_conv_wchar_uhc
,
93 mbfl_filt_conv_common_flush
96 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
102 mbfl_filt_conv_uhc_wchar(int c
, mbfl_convert_filter
*filter
)
104 int c1
, w
= 0, flag
= 0;
106 switch (filter
->status
) {
108 if (c
>= 0 && c
< 0x80) { /* latin */
109 CK((*filter
->output_function
)(c
, filter
->data
));
110 } else if (c
> 0x80 && c
< 0xff && c
!= 0xc9) { /* dbcs lead byte */
114 w
= c
& MBFL_WCSGROUP_MASK
;
115 w
|= MBFL_WCSGROUP_THROUGH
;
116 CK((*filter
->output_function
)(w
, filter
->data
));
120 case 1: /* dbcs second byte */
124 if ( c1
>= 0x81 && c1
<= 0xa0){
125 w
= (c1
- 0x81)*190 + (c
- 0x41);
126 if (w
>= 0 && w
< uhc1_ucs_table_size
) {
128 w
= uhc1_ucs_table
[w
];
132 } else if ( c1
>= 0xa1 && c1
<= 0xc6){
133 w
= (c1
- 0xa1)*190 + (c
- 0x41);
134 if (w
>= 0 && w
< uhc2_ucs_table_size
) {
136 w
= uhc2_ucs_table
[w
];
140 } else if ( c1
>= 0xc7 && c1
<= 0xfe){
141 w
= (c1
- 0xc7)*94 + (c
- 0xa1);
142 if (w
>= 0 && w
< uhc3_ucs_table_size
) {
144 w
= uhc3_ucs_table
[w
];
152 w
&= MBFL_WCSPLANE_MASK
;
153 w
|= MBFL_WCSPLANE_UHC
;
155 CK((*filter
->output_function
)(w
, filter
->data
));
157 if ((c
>= 0 && c
< 0x21) || c
== 0x7f) { /* CTLs */
158 CK((*filter
->output_function
)(c
, filter
->data
));
161 w
&= MBFL_WCSGROUP_MASK
;
162 w
|= MBFL_WCSGROUP_THROUGH
;
163 CK((*filter
->output_function
)(w
, filter
->data
));
180 mbfl_filt_conv_wchar_uhc(int c
, mbfl_convert_filter
*filter
)
185 if (c
>= ucs_a1_uhc_table_min
&& c
< ucs_a1_uhc_table_max
) {
186 s
= ucs_a1_uhc_table
[c
- ucs_a1_uhc_table_min
];
187 } else if (c
>= ucs_a2_uhc_table_min
&& c
< ucs_a2_uhc_table_max
) {
188 s
= ucs_a2_uhc_table
[c
- ucs_a2_uhc_table_min
];
189 } else if (c
>= ucs_a3_uhc_table_min
&& c
< ucs_a3_uhc_table_max
) {
190 s
= ucs_a3_uhc_table
[c
- ucs_a3_uhc_table_min
];
191 } else if (c
>= ucs_i_uhc_table_min
&& c
< ucs_i_uhc_table_max
) {
192 s
= ucs_i_uhc_table
[c
- ucs_i_uhc_table_min
];
193 } else if (c
>= ucs_s_uhc_table_min
&& c
< ucs_s_uhc_table_max
) {
194 s
= ucs_s_uhc_table
[c
- ucs_s_uhc_table_min
];
195 } else if (c
>= ucs_r1_uhc_table_min
&& c
< ucs_r1_uhc_table_max
) {
196 s
= ucs_r1_uhc_table
[c
- ucs_r1_uhc_table_min
];
197 } else if (c
>= ucs_r2_uhc_table_min
&& c
< ucs_r2_uhc_table_max
) {
198 s
= ucs_r2_uhc_table
[c
- ucs_r2_uhc_table_min
];
201 c1
= c
& ~MBFL_WCSPLANE_MASK
;
202 if (c1
== MBFL_WCSPLANE_UHC
) {
203 s
= c
& MBFL_WCSPLANE_MASK
;
212 if (s
< 0x80) { /* latin */
213 CK((*filter
->output_function
)(s
, filter
->data
));
215 CK((*filter
->output_function
)((s
>> 8) & 0xff, filter
->data
));
216 CK((*filter
->output_function
)(s
& 0xff, filter
->data
));
219 if (filter
->illegal_mode
!= MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE
) {
220 CK(mbfl_filt_conv_illegal_output(c
, filter
));
227 static int mbfl_filt_ident_uhc(int c
, mbfl_identify_filter
*filter
)
229 switch (filter
->status
) {
231 if (c
>= 0 && c
< 0x80) { /* ok */
233 } else if (c
>= 0x81 && c
<= 0xa0) { /* dbcs first char */
235 } else if (c
>= 0xa1 && c
<= 0xc6) { /* dbcs first char */
237 } else if (c
>= 0xc7 && c
<= 0xfe) { /* dbcs first char */
245 if (c
< 0x41 || (c
> 0x5a && c
< 0x61)
246 || (c
> 0x7a && c
< 0x81) || c
> 0xfe) { /* bad */
253 if (c
< 0xa1 || c
> 0xfe) { /* bad */