2 * _codecs_jp.c: Codecs collection for Japanese encodings
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
7 #define USING_BINARY_PAIR_SEARCH
8 #define EMPBASE 0x20000
10 #include "cjkcodecs.h"
11 #include "mappings_jp.h"
12 #include "mappings_jisx0213_pair.h"
13 #include "alg_jisx0201.h"
14 #include "emu_jisx0213_2000.h"
28 WRITE1((unsigned char)c
)
32 else if (c
>= 0xff61 && c
<= 0xff9f) {
37 else if (c
>= 0xf8f0 && c
<= 0xf8f3) {
38 /* Windows compatibility */
43 OUT1(c
- 0xfef1 + 0xfd)
51 TRYMAP_ENC(cp932ext
, code
, c
) {
55 else TRYMAP_ENC(jisxcommon
, code
, c
) {
56 if (code
& 0x8000) /* MSB set: JIS X 0212 */
62 c2
= (((c1
- 0x21) & 1) ? 0x5e : 0) + (c2
- 0x21);
63 c1
= (c1
- 0x21) >> 1;
64 OUT1(c1
< 0x1f ? c1
+ 0x81 : c1
+ 0xc1)
65 OUT2(c2
< 0x3f ? c2
+ 0x40 : c2
+ 0x41)
67 else if (c
>= 0xe000 && c
< 0xe758) {
68 /* User-defined area */
69 c1
= (Py_UNICODE
)(c
- 0xe000) / 188;
70 c2
= (Py_UNICODE
)(c
- 0xe000) % 188;
72 OUT2(c2
< 0x3f ? c2
+ 0x40 : c2
+ 0x41)
86 unsigned char c
= IN1
, c2
;
94 else if (c
>= 0xa0 && c
<= 0xdf) {
96 OUT1(0xf8f0) /* half-width katakana */
102 else if (c
>= 0xfd/* && c <= 0xff*/) {
103 /* Windows compatibility */
104 OUT1(0xf8f1 - 0xfd + c
)
112 TRYMAP_DEC(cp932ext
, **outbuf
, c
, c2
);
113 else if ((c
>= 0x81 && c
<= 0x9f) || (c
>= 0xe0 && c
<= 0xea)){
114 if (c2
< 0x40 || (c2
> 0x7e && c2
< 0x80) || c2
> 0xfc)
117 c
= (c
< 0xe0 ? c
- 0x81 : c
- 0xc1);
118 c2
= (c2
< 0x80 ? c2
- 0x40 : c2
- 0x41);
119 c
= (2 * c
+ (c2
< 0x5e ? 0 : 1) + 0x21);
120 c2
= (c2
< 0x5e ? c2
: c2
- 0x5e) + 0x21;
122 TRYMAP_DEC(jisx0208
, **outbuf
, c
, c2
);
125 else if (c
>= 0xf0 && c
<= 0xf9) {
126 if ((c2
>= 0x40 && c2
<= 0x7e) ||
127 (c2
>= 0x80 && c2
<= 0xfc))
128 OUT1(0xe000 + 188 * (c
- 0xf0) +
129 (c2
< 0x80 ? c2
- 0x40 : c2
- 0x41))
147 ENCODER(euc_jis_2004
)
161 insize
= GET_INSIZE(c
);
164 EMULATE_JISX0213_2000_ENCODE_BMP(code
, c
)
165 else TRYMAP_ENC(jisx0213_bmp
, code
, c
) {
166 if (code
== MULTIC
) {
168 if (flags
& MBENC_FLUSH
) {
169 code
= find_pairencmap(
171 jisx0213_pair_encmap
,
180 code
= find_pairencmap(
181 (ucs2_t
)c
, (*inbuf
)[1],
182 jisx0213_pair_encmap
,
184 if (code
== DBCINV
) {
185 code
= find_pairencmap(
187 jisx0213_pair_encmap
,
196 else TRYMAP_ENC(jisxcommon
, code
, c
);
197 else if (c
>= 0xff61 && c
<= 0xff9f) {
198 /* JIS X 0201 half-width katakana */
199 WRITE2(0x8e, c
- 0xfec0)
203 else if (c
== 0xff3c)
204 /* F/W REVERSE SOLIDUS (see NOTES) */
206 else if (c
== 0xff5e)
207 /* F/W TILDE (see NOTES) */
212 else if (c
>> 16 == EMPBASE
>> 16) {
213 EMULATE_JISX0213_2000_ENCODE_EMP(code
, c
)
214 else TRYMAP_ENC(jisx0213_emp
, code
, c
& 0xffff);
222 WRITE3(0x8f, code
>> 8, (code
& 0xFF) | 0x80)
226 WRITE2((code
>> 8) | 0x80, (code
& 0xFF) | 0x80)
234 DECODER(euc_jis_2004
)
237 unsigned char c
= IN1
;
249 /* JIS X 0201 half-width katakana */
254 if (c2
>= 0xa1 && c2
<= 0xdf) {
261 else if (c
== 0x8f) {
262 unsigned char c2
, c3
;
268 /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
269 EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf
, c2
, c3
)
270 else TRYMAP_DEC(jisx0213_2_bmp
, **outbuf
, c2
, c3
) ;
271 else TRYMAP_DEC(jisx0213_2_emp
, code
, c2
, c3
) {
272 WRITEUCS4(EMPBASE
| code
)
276 else TRYMAP_DEC(jisx0212
, **outbuf
, c2
, c3
) ;
287 /* JIS X 0213 Plane 1 */
288 EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf
, c
, c2
)
289 else if (c
== 0x21 && c2
== 0x40) **outbuf
= 0xff3c;
290 else if (c
== 0x22 && c2
== 0x32) **outbuf
= 0xff5e;
291 else TRYMAP_DEC(jisx0208
, **outbuf
, c
, c2
);
292 else TRYMAP_DEC(jisx0213_1_bmp
, **outbuf
, c
, c2
);
293 else TRYMAP_DEC(jisx0213_1_emp
, code
, c
, c2
) {
294 WRITEUCS4(EMPBASE
| code
)
298 else TRYMAP_DEC(jisx0213_pair
, code
, c
, c2
) {
299 WRITE2(code
>> 16, code
& 0xffff)
323 WRITE1((unsigned char)c
)
330 TRYMAP_ENC(jisxcommon
, code
, c
);
331 else if (c
>= 0xff61 && c
<= 0xff9f) {
332 /* JIS X 0201 half-width katakana */
333 WRITE2(0x8e, c
- 0xfec0)
338 else if (c
== 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
340 else if (c
== 0xa5) { /* YEN SIGN */
344 } else if (c
== 0x203e) { /* OVERLINE */
355 WRITE3(0x8f, code
>> 8, (code
& 0xFF) | 0x80)
359 WRITE2((code
>> 8) | 0x80, (code
& 0xFF) | 0x80)
370 unsigned char c
= IN1
;
381 /* JIS X 0201 half-width katakana */
386 if (c2
>= 0xa1 && c2
<= 0xdf) {
393 else if (c
== 0x8f) {
394 unsigned char c2
, c3
;
400 TRYMAP_DEC(jisx0212
, **outbuf
, c2
^ 0x80, c3
^ 0x80) {
413 if (c
== 0xa1 && c2
== 0xc0)
414 /* FULL-WIDTH REVERSE SOLIDUS */
418 TRYMAP_DEC(jisx0208
, **outbuf
,
419 c
^ 0x80, c2
^ 0x80) ;
438 unsigned char c1
, c2
;
441 JISX0201_R_ENCODE(c
, code
)
443 if (c
< 0x80) code
= c
;
444 else if (c
== 0x00a5) code
= 0x5c; /* YEN SIGN */
445 else if (c
== 0x203e) code
= 0x7e; /* OVERLINE */
447 else JISX0201_K_ENCODE(c
, code
)
451 if (code
< 0x80 || (code
>= 0xa1 && code
<= 0xdf)) {
454 OUT1((unsigned char)code
)
461 if (code
== NOCHAR
) {
462 TRYMAP_ENC(jisxcommon
, code
, c
);
464 else if (c
== 0xff3c)
465 code
= 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
470 if (code
& 0x8000) /* MSB set: JIS X 0212 */
476 c2
= (((c1
- 0x21) & 1) ? 0x5e : 0) + (c2
- 0x21);
477 c1
= (c1
- 0x21) >> 1;
478 OUT1(c1
< 0x1f ? c1
+ 0x81 : c1
+ 0xc1)
479 OUT2(c2
< 0x3f ? c2
+ 0x40 : c2
+ 0x41)
489 unsigned char c
= IN1
;
494 JISX0201_R_DECODE(c
, **outbuf
)
496 if (c
< 0x80) **outbuf
= c
;
498 else JISX0201_K_DECODE(c
, **outbuf
)
499 else if ((c
>= 0x81 && c
<= 0x9f) || (c
>= 0xe0 && c
<= 0xea)){
500 unsigned char c1
, c2
;
504 if (c2
< 0x40 || (c2
> 0x7e && c2
< 0x80) || c2
> 0xfc)
507 c1
= (c
< 0xe0 ? c
- 0x81 : c
- 0xc1);
508 c2
= (c2
< 0x80 ? c2
- 0x40 : c2
- 0x41);
509 c1
= (2 * c1
+ (c2
< 0x5e ? 0 : 1) + 0x21);
510 c2
= (c2
< 0x5e ? c2
: c2
- 0x5e) + 0x21;
513 if (c1
== 0x21 && c2
== 0x40) {
514 /* FULL-WIDTH REVERSE SOLIDUS */
520 TRYMAP_DEC(jisx0208
, **outbuf
, c1
, c2
) {
530 NEXT(1, 1) /* JIS X 0201 */
538 * SHIFT_JIS-2004 codec
541 ENCODER(shift_jis_2004
)
545 DBCHAR code
= NOCHAR
;
549 JISX0201_ENCODE(c
, code
)
550 else DECODE_SURROGATE(c
)
552 if (code
< 0x80 || (code
>= 0xa1 && code
<= 0xdf)) {
553 WRITE1((unsigned char)code
)
559 insize
= GET_INSIZE(c
);
561 if (code
== NOCHAR
) {
563 EMULATE_JISX0213_2000_ENCODE_BMP(code
, c
)
564 else TRYMAP_ENC(jisx0213_bmp
, code
, c
) {
565 if (code
== MULTIC
) {
567 if (flags
& MBENC_FLUSH
) {
568 code
= find_pairencmap
570 jisx0213_pair_encmap
,
579 code
= find_pairencmap(
581 jisx0213_pair_encmap
,
583 if (code
== DBCINV
) {
584 code
= find_pairencmap(
586 jisx0213_pair_encmap
,
596 else TRYMAP_ENC(jisxcommon
, code
, c
) {
597 /* abandon JIS X 0212 codes */
603 else if (c
>> 16 == EMPBASE
>> 16) {
604 EMULATE_JISX0213_2000_ENCODE_EMP(code
, c
)
605 else TRYMAP_ENC(jisx0213_emp
, code
, c
&0xffff);
613 c2
= (code
& 0xff) - 0x21;
615 if (c1
& 0x80) { /* Plane 2 */
616 if (c1
>= 0xee) c1
-= 0x87;
617 else if (c1
>= 0xac || c1
== 0xa8) c1
-= 0x49;
623 if (c1
& 1) c2
+= 0x5e;
625 OUT1(c1
+ (c1
< 0x1f ? 0x81 : 0xc1))
626 OUT2(c2
+ (c2
< 0x3f ? 0x40 : 0x41))
634 DECODER(shift_jis_2004
)
637 unsigned char c
= IN1
;
640 JISX0201_DECODE(c
, **outbuf
)
641 else if ((c
>= 0x81 && c
<= 0x9f) || (c
>= 0xe0 && c
<= 0xfc)){
642 unsigned char c1
, c2
;
647 if (c2
< 0x40 || (c2
> 0x7e && c2
< 0x80) || c2
> 0xfc)
650 c1
= (c
< 0xe0 ? c
- 0x81 : c
- 0xc1);
651 c2
= (c2
< 0x80 ? c2
- 0x40 : c2
- 0x41);
652 c1
= (2 * c1
+ (c2
< 0x5e ? 0 : 1));
653 c2
= (c2
< 0x5e ? c2
: c2
- 0x5e) + 0x21;
655 if (c1
< 0x5e) { /* Plane 1 */
657 EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf
,
659 else TRYMAP_DEC(jisx0208
, **outbuf
, c1
, c2
) {
662 else TRYMAP_DEC(jisx0213_1_bmp
, **outbuf
,
666 else TRYMAP_DEC(jisx0213_1_emp
, code
, c1
, c2
) {
667 WRITEUCS4(EMPBASE
| code
)
669 else TRYMAP_DEC(jisx0213_pair
, code
, c1
, c2
) {
670 WRITE2(code
>> 16, code
& 0xffff)
678 if (c1
>= 0x67) c1
+= 0x07;
679 else if (c1
>= 0x63 || c1
== 0x5f) c1
-= 0x37;
682 EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf
,
684 else TRYMAP_DEC(jisx0213_2_bmp
, **outbuf
,
686 else TRYMAP_DEC(jisx0213_2_emp
, code
, c1
, c2
) {
687 WRITEUCS4(EMPBASE
| code
)
700 NEXT(1, 1) /* JIS X 0201 */
708 MAPPING_DECONLY(jisx0208
)
709 MAPPING_DECONLY(jisx0212
)
710 MAPPING_ENCONLY(jisxcommon
)
711 MAPPING_DECONLY(jisx0213_1_bmp
)
712 MAPPING_DECONLY(jisx0213_2_bmp
)
713 MAPPING_ENCONLY(jisx0213_bmp
)
714 MAPPING_DECONLY(jisx0213_1_emp
)
715 MAPPING_DECONLY(jisx0213_2_emp
)
716 MAPPING_ENCONLY(jisx0213_emp
)
717 MAPPING_ENCDEC(jisx0213_pair
)
718 MAPPING_ENCDEC(cp932ext
)
722 CODEC_STATELESS(shift_jis
)
723 CODEC_STATELESS(cp932
)
724 CODEC_STATELESS(euc_jp
)
725 CODEC_STATELESS(shift_jis_2004
)
726 CODEC_STATELESS(euc_jis_2004
)
727 { "euc_jisx0213", (void *)2000, NULL
, _STATELESS_METHODS(euc_jis_2004
) },
728 { "shift_jisx0213", (void *)2000, NULL
, _STATELESS_METHODS(shift_jis_2004
) },
731 I_AM_A_MODULE_FOR(jp
)