2 Unix SMB/Netbios implementation.
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 Adding for Japanese language by <fujita@ainix.isac.co.jp> 1994.9.5
22 and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11
23 and add all jis codes sequence type at 1995.8.16
24 Notes: Hexadecimal code by <ohki@gssm.otuka.tsukuba.ac.jp>
25 Adding features about Machine dependent codes and User Defined Codes
26 by Hiroshi MIURA <miura@samba.gr.jp> 2000.3.19
33 * Function pointers that get overridden when multi-byte code pages
37 const char *(*multibyte_strchr
)(const char *, int ) = (const char *(*)(const char *, int )) strchr
;
38 const char *(*multibyte_strrchr
)(const char *, int ) = (const char *(*)(const char *, int )) strrchr
;
39 const char *(*multibyte_strstr
)(const char *, const char *) = (const char *(*)(const char *, const char *)) strstr
;
40 char *(*multibyte_strtok
)(char *, const char *) = (char *(*)(char *, const char *)) strtok
;
43 * Kanji is treated differently here due to historical accident of
44 * it being the first non-English codepage added to Samba.
45 * The define 'KANJI' is being overloaded to mean 'use kanji codepage
46 * by default' and also 'this is the filename-to-disk conversion
47 * method to use'. This really should be removed and all control
48 * over this left in the smb.conf parameters 'client codepage'
49 * and 'coding system'.
55 * Set the default conversion to be the functions in
59 static size_t skip_non_multibyte_char(char);
60 static BOOL
not_multibyte_char_1(char);
62 char *(*_dos_to_unix
)(char *) = dos2unix_format
;
63 char *(*_dos_to_unix_static
)(const char *) = dos2unix_format_static
;
64 char *(*_unix_to_dos
)(char *) = unix2dos_format
;
65 char *(*_unix_to_dos_static
)(const char *) = unix2dos_format_static
;
66 size_t (*_skip_multibyte_char
)(char) = skip_non_multibyte_char
;
67 BOOL (*is_multibyte_char_1
)(char) = not_multibyte_char_1
;
72 * Set the default conversion to be the function
73 * sj_to_sj in this file.
76 static char *sj_to_sj(char *from
);
77 static char *sj_to_sj_static(const char *from
);
78 static size_t skip_kanji_multibyte_char(char);
79 static BOOL
is_kanji_multibyte_char_1(char);
81 char *(*_dos_to_unix
)(char *) = sj_to_sj
;
82 char *(*_dos_to_unix_static
)(const char *) = sj_to_sj_static
;
83 char *(*_unix_to_dos
)(char *) = sj_to_sj
;
84 char *(*_unix_to_dos_static
)(const char *) = sj_to_sj_static
;
85 size_t (*_skip_multibyte_char
)(char) = skip_kanji_multibyte_char
;
86 int (*is_multibyte_char_1
)(char) = is_kanji_multibyte_char_1
;
90 BOOL global_is_multibyte_codepage
= False
;
92 /* jis si/so sequence */
93 static char jis_kso
= JIS_KSO
;
94 static char jis_ksi
= JIS_KSI
;
95 static char hex_tag
= HEXTAG
;
97 /*******************************************************************
99 ********************************************************************/
101 /*******************************************************************
102 search token from S1 separated any char of S2
103 S1 contains SHIFT JIS chars.
104 ********************************************************************/
106 static char *sj_strtok(char *s1
, const char *s2
)
108 static char *s
= NULL
;
116 for (q
= s1
; *s1
; ) {
117 if (is_shift_jis (*s1
)) {
119 } else if (is_kana (*s1
)) {
122 char *p
= strchr (s2
, *s1
);
141 /*******************************************************************
142 search string S2 from S1
143 S1 contains SHIFT JIS chars.
144 ********************************************************************/
146 static const char *sj_strstr(const char *s1
, const char *s2
)
148 size_t len
= strlen (s2
);
150 return (const char *) s1
;
153 if (strncmp (s1
, s2
, len
) == 0)
154 return (const char *) s1
;
156 if (is_shift_jis (*s1
)) {
165 /*******************************************************************
166 Search char C from beginning of S.
167 S contains SHIFT JIS chars.
168 ********************************************************************/
170 static const char *sj_strchr (const char *s
, int c
)
174 return (const char *) s
;
175 if (is_shift_jis (*s
)) {
184 /*******************************************************************
185 Search char C end of S.
186 S contains SHIFT JIS chars.
187 ********************************************************************/
189 static const char *sj_strrchr(const char *s
, int c
)
195 q
= (const char *) s
;
197 if (is_shift_jis (*s
)) {
206 /*******************************************************************
207 Kanji multibyte char skip function.
208 *******************************************************************/
210 static size_t skip_kanji_multibyte_char(char c
)
212 if(is_shift_jis(c
)) {
214 } else if (is_kana(c
)) {
220 /*******************************************************************
221 Kanji multibyte char identification.
222 *******************************************************************/
224 static BOOL
is_kanji_multibyte_char_1(char c
)
226 return is_shift_jis(c
);
229 /*******************************************************************
230 The following functions are the only ones needed to do multibyte
231 support for Hangul, Big5 and Simplified Chinese. Most of the
232 real work for these codepages is done in the generic multibyte
233 functions. The only reason these functions are needed at all
234 is that the is_xxx(c) calls are really preprocessor macros.
235 ********************************************************************/
237 /*******************************************************************
238 Hangul (Korean - code page 949) function.
239 ********************************************************************/
241 static BOOL
hangul_is_multibyte_char_1(char c
)
246 /*******************************************************************
247 Big5 Traditional Chinese (code page 950) function.
248 ********************************************************************/
250 static BOOL
big5_is_multibyte_char_1(char c
)
252 return is_big5_c1(c
);
255 /*******************************************************************
256 Simplified Chinese (code page 936) function.
257 ********************************************************************/
259 static BOOL
simpch_is_multibyte_char_1(char c
)
261 return is_simpch_c1(c
);
264 /*******************************************************************
265 Generic multibyte functions - used by Hangul, Big5 and Simplified
267 ********************************************************************/
269 /*******************************************************************
270 search token from S1 separated any char of S2
271 S1 contains generic multibyte chars.
272 ********************************************************************/
274 static char *generic_multibyte_strtok(char *s1
, const char *s2
)
276 static char *s
= NULL
;
284 for (q
= s1
; *s1
; ) {
285 if ((*is_multibyte_char_1
)(*s1
)) {
288 char *p
= strchr (s2
, *s1
);
307 /*******************************************************************
308 search string S2 from S1
309 S1 contains generic multibyte chars.
310 ********************************************************************/
312 static const char *generic_multibyte_strstr(const char *s1
, const char *s2
)
314 size_t len
= strlen (s2
);
316 return (const char *) s1
;
319 if (strncmp (s1
, s2
, len
) == 0)
320 return (const char *) s1
;
322 if ((*is_multibyte_char_1
)(*s1
)) {
331 /*******************************************************************
332 Search char C from beginning of S.
333 S contains generic multibyte chars.
334 ********************************************************************/
336 static const char *generic_multibyte_strchr(const char *s
, int c
)
340 return (const char *) s
;
341 if ((*is_multibyte_char_1
)(*s
)) {
350 /*******************************************************************
351 Search char C end of S.
352 S contains generic multibyte chars.
353 ********************************************************************/
355 static const char *generic_multibyte_strrchr(const char *s
, int c
)
361 q
= (const char *) s
;
363 if ((*is_multibyte_char_1
)(*s
)) {
372 /*******************************************************************
373 Generic multibyte char skip function.
374 *******************************************************************/
376 static size_t skip_generic_multibyte_char(char c
)
378 if( (*is_multibyte_char_1
)(c
)) {
384 /*******************************************************************
386 ********************************************************************/
388 /* convesion buffer */
389 static char cvtbuf
[2*sizeof(pstring
)];
391 /*******************************************************************
393 ********************************************************************/
395 static int euc2sjis (int hi
, int lo
)
398 int maxidx
= SJISREVTBLSIZ
;
403 hi
= hi
/ 2 + (hi
< 0xdf ? 0x31 : 0x71);
404 w
= (hi
<< 8) | (lo
- (lo
>= 0xe0 ? 0x60 : 0x61));
406 hi
= hi
/ 2 + (hi
< 0xdf ? 0x30 : 0x70);
407 w
= (hi
<< 8) | (lo
- 2);
409 if ( (0x87 < hi
) && (hi
< 0xed ) ) {
412 while ( maxidx
>= minidx
) {
413 if ( sjisrev
[i
].start
> w
) {
415 } else if ( w
> sjisrev
[i
].end
) {
418 w
-= sjisrev
[i
].start
;
419 w
+= sjisrev
[i
].rstart
;
422 i
= (int)( minidx
+ (maxidx
- minidx
) % 2 );
427 static int sjis2euc (int hi
, int lo
)
430 int maxidx
= SJISCONVTBLSIZ
-1; /* max index 1 less than number of entries */
431 int i
= ( 0 + SJISCONVTBLSIZ
) % 2;
432 int w
= (int)((hi
<< 8) | lo
);
434 if ( (sjisconv
[0].start
< w
) && (w
< sjisconv
[SJISCONVTBLSIZ
-1].end
) ) {
435 while (maxidx
>= minidx
) {
436 if ( sjisconv
[i
].start
> w
) {
438 } else if (w
> sjisconv
[i
].end
) {
441 w
-= sjisconv
[i
].start
;
442 w
+= sjisconv
[i
].rstart
;
445 i
= (int)( minidx
+ (maxidx
-minidx
)%2 );
447 hi
= (int) ((w
>> 8) & 0xff);
448 lo
= (int) (w
& 0xff);
455 return ((hi
* 2 - (hi
>= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo
+ 2);
457 return ((hi
* 2 - (hi
>= 0xe0 ? 0xe1 : 0x61)) << 8) |
458 (lo
+ (lo
>= 0x7f ? 0x60 : 0x61));
461 /*******************************************************************
462 Convert FROM contain SHIFT JIS codes to EUC codes
463 return converted buffer
464 ********************************************************************/
466 static char *sj_to_euc_static(const char *from
)
470 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-3);) {
471 if (is_shift_jis (*from
)) {
472 int code
= sjis2euc ((int) from
[0] & 0xff, (int) from
[1] & 0xff);
473 *out
++ = (code
>> 8) & 0xff;
474 *out
++ = code
& 0xff;
476 } else if (is_kana (*from
)) {
477 *out
++ = (char)euc_kana
;
487 static char *sj_to_euc(char *from
)
489 pstrcpy(from
, sj_to_euc_static(from
));
493 /*******************************************************************
494 Convert FROM contain EUC codes to SHIFT JIS codes
495 return converted buffer
496 ********************************************************************/
498 static char *euc_to_sj_static(const char *from
)
502 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-3); ) {
503 if (is_euc (*from
)) {
504 int code
= euc2sjis ((int) from
[0] & 0xff, (int) from
[1] & 0xff);
505 *out
++ = (code
>> 8) & 0xff;
506 *out
++ = code
& 0xff;
508 } else if (is_euc_kana (*from
)) {
519 static char *euc_to_sj(char *from
)
521 pstrcpy(from
, euc_to_sj_static(from
));
525 /*******************************************************************
527 ********************************************************************/
528 static int sjis3euc (int hi
, int lo
, int *len
)
534 w
= (int)((hi
<< 8) | lo
);
537 if ( ( 0x40 >= lo
) && (lo
>= 0xfc) && (lo
== 0x7f )) {
538 w
= (GETAHI
<< 8) | GETALO
;
540 /* IBM Extended Kanji */
541 } else if (( w
== 0xfa54 )||( w
== 0x81ca )) {
545 } else if (( w
== 0xfa5b )||( w
== 0x81e6)) {
549 } else if (( 0xfa <= hi
) && ( hi
<= 0xfc ) ) {
550 i
= w
- 0xfa40 - ( hi
- 0xfa )*( 0xfb40 - 0xfafc) - ((lo
< 0x7f)? 0 : 1 );
551 if ( i
<= EUC3CONVTBLSIZ
){
556 /* NEC selected IBM Extend Kanji */
557 /* there are 3 code that is not good for conv */
558 } else if (( 0x8754 <= w
) && ( w
<= 0x878a)) {
560 maxidx
= EUC3CONV2TBLSIZ
;
561 i
= minidx
+ (maxidx
- minidx
) % 2;
562 while ( maxidx
>= minidx
) {
563 if ( euc3conv2
[i
].sjis
> w
) {
565 } else if ( w
> euc3conv2
[i
].sjis
) {
569 return (euc3conv2
[i
].euc
);
571 i
= (int)( minidx
+ (maxidx
- minidx
) % 2 );
573 /* else normal EUC */
575 } else if (( w
== 0xeef9 ) || ( w
== 0x81ca )) {
579 } else if (( 0xed <= hi
) && ( hi
<= 0xef )) {
581 maxidx
= SJISREVTBLSIZ
;
583 while ( maxidx
>= minidx
) {
584 if ( sjisrev
[i
].start
> w
) {
586 } else if ( w
> sjisrev
[i
].end
) {
589 w
-= sjisrev
[i
].start
;
590 w
+= sjisrev
[i
].rstart
;
593 i
= (int)( minidx
+ (maxidx
- minidx
) % 2 );
596 i
= w
- 0xfa40 - ( hi
- 0xfa )*( 0xfb40 - 0xfafc) - ((lo
< 0x7f)? 0 : 1 );
597 if ( i
<= EUC3CONVTBLSIZ
){
601 w
= (GETAHI
<< 8) | GETALO
;
604 /* else normal EUC */
607 /* this area maps to the G2 UDC area: 0xf5a1 -- 0xfefe */
608 } else if ((0xf0 <= hi
) && (hi
<= 0xf4)) {
611 return (((hi
* 2 - 0xea) << 8) | (lo
+ 2));
613 return (((hi
* 2 - 0xeb) << 8) | (lo
+ (lo
>=0x7f ? 0x60: 0x61 )));
617 /* this area maps to the G3 UDC area: 0xf8f5a1 -- 0xf8fefe */
618 } else if ((0xf5 <= hi
) && (hi
<= 0xf9)) {
621 return (((hi
*2 - 0xf4) << 8) | (lo
+ 2));
623 return (((hi
*2 - 0xf5) << 8) | (lo
+ (lo
>= 0x7f ? 0x60: 0x61 )));
625 /* ....checked all special case */
628 /* These Normal 2 byte EUC */
630 hi
= (int) ((w
>> 8) & 0xff);
631 lo
= (int) (w
& 0xff);
633 if (hi
>= 0xf0) { /* Check range */
639 return ((hi
* 2 - (hi
>= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo
+ 2);
641 return ((hi
* 2 - (hi
>= 0xe0 ? 0xe1 : 0x61)) << 8) |
642 (lo
+ (lo
>= 0x7f ? 0x60 : 0x61));
645 static int euc3sjis (int hi
, int lo
, BOOL is_3byte
)
649 w
= (int)((hi
<< 8) | lo
);
651 if (( 0xf5 <= hi
) && ( hi
<= 0xfe)) {
653 /* this area maps to the G3 UDC area */
654 /* 0xf8f5a1 -- 0xf8fefe --> 0xf540 -- 0xf9fc */
656 return (((hi
/ 2 + 0x7b) << 8) | (lo
- (lo
>= 0xe0 ? 0x60 : 0x61)));
658 return (((hi
/ 2 + 0x7a) << 8) | (lo
- 2));
661 /* Using map table */
663 int maxidx
= EUC3REVTBLSIZ
;
664 int i
= minidx
+ (maxidx
- minidx
) % 2;
666 while ( maxidx
>= minidx
) {
667 if (euc3rev
[i
].euc
> w
) {
669 } else if (euc3rev
[i
].euc
< w
) {
672 return (euc3rev
[i
].sjis
);
674 i
= (int)( minidx
+ ( maxidx
- minidx
) % 2);
676 return ((GETAHI
<< 8 ) | GETALO
);
678 } else { /* is_2byte */
679 if ((0xf5 <= hi
) && (hi
<= 0xfe)) {
681 /* this area maps to the G2 UDC area */
682 /* 0xf5a1 -- 0xfefe --> 0xf040 -- 0xf4fc */
684 return (((hi
/ 2 + 0x76) << 8) | (lo
- (lo
>= 0xe0 ? 0x60 : 0x61)));
686 return (((hi
/ 2 + 0x75) << 8) | (lo
- 2));
688 } else { /* Normal EUC */
690 hi
= hi
/ 2 + (hi
< 0xdf ? 0x31 : 0x71);
691 return ((hi
<< 8) | (lo
- (lo
>= 0xe0 ? 0x60 : 0x61)));
693 hi
= hi
/ 2 + (hi
< 0xdf ? 0x30 : 0x70);
694 return ((hi
<< 8) | (lo
- 2));
700 /*******************************************************************
701 Convert FROM contain SHIFT JIS codes to EUC codes (with SS2)
702 return converted buffer
703 ********************************************************************/
705 static char *sj_to_euc3_static(const char *from
)
710 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-4);) {
711 if (is_shift_jis (*from
)) {
712 int code
= sjis3euc ((int) from
[0] & 0xff, (int) from
[1] & 0xff, &len
);
714 *out
++ = (char)euc_sup
;
716 *out
++ = (code
>> 8) & 0xff;
717 *out
++ = code
& 0xff;
719 } else if (is_kana (*from
)) {
720 *out
++ = (char)euc_kana
;
730 static char *sj_to_euc3(char *from
)
732 pstrcpy(from
, sj_to_euc3_static(from
));
736 /*******************************************************************
737 Convert FROM contain EUC codes (with Sup-Kanji) to SHIFT JIS codes
738 return converted buffer
739 ********************************************************************/
741 static char *euc3_to_sj_static(const char *from
)
745 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-3); ) {
746 if (is_euc_sup (*from
)) {
747 int code
= euc3sjis((int) from
[1] & 0xff, (int) from
[2] & 0xff, True
);
748 *out
++ = (code
>> 8) & 0xff;
749 *out
++ = code
& 0xff;
751 } else if (is_euc (*from
)) {
752 int code
= euc3sjis ((int) from
[0] & 0xff, (int) from
[1] & 0xff,False
);
753 *out
++ = (code
>> 8) & 0xff;
754 *out
++ = code
& 0xff;
756 } else if (is_euc_kana (*from
)) {
767 static char *euc3_to_sj(char *from
)
769 pstrcpy(from
, euc3_to_sj_static(from
));
773 /*******************************************************************
774 JIS7,JIS8,JUNET <-> SJIS
775 ********************************************************************/
777 static int sjis2jis(int hi
, int lo
)
780 int maxidx
= SJISCONVTBLSIZ
-1; /* max index 1 less than number of entries */
781 int i
= (0 + SJISCONVTBLSIZ
) % 2;
782 int w
= (int)((hi
<< 8) | lo
);
784 if ((sjisconv
[0].start
< w
) && (w
< sjisconv
[SJISCONVTBLSIZ
-1].end
)) {
785 while (maxidx
>= minidx
) {
786 if (sjisconv
[i
].start
> w
) {
788 } else if (w
> sjisconv
[i
].end
) {
791 w
-= sjisconv
[i
].start
;
792 w
+= sjisconv
[i
].rstart
;
795 i
= (int)( minidx
+ (maxidx
-minidx
) %2 );
797 hi
= (int) ((w
>> 8) & 0xff);
798 lo
= (int) (w
& 0xff);
805 return ((hi
* 2 - (hi
>= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo
- 0x7e);
807 return ((hi
* 2 - (hi
>= 0xe0 ? 0x161 : 0xe1)) << 8) |
808 (lo
- (lo
>= 0x7f ? 0x20 : 0x1f));
811 static int jis2sjis(int hi
, int lo
)
815 int maxidx
= SJISREVTBLSIZ
;
819 hi
= hi
/ 2 + (hi
< 0x5f ? 0x71 : 0xb1);
820 w
= (hi
<< 8) | (lo
+ (lo
>= 0x60 ? 0x20 : 0x1f));
822 hi
= hi
/ 2 + (hi
< 0x5f ? 0x70 : 0xb0);
823 w
= (hi
<< 8) | (lo
+ 0x7e);
826 if (( 0x87 < hi
) && ( hi
< 0xed )) {
829 while (maxidx
>= minidx
) {
830 if (sjisrev
[i
].start
> w
) {
832 } else if (w
> sjisrev
[i
].end
) {
835 w
-= sjisrev
[i
].start
;
836 w
+= sjisrev
[i
].rstart
;
839 i
= (int)( minidx
+ (maxidx
-minidx
) %2 );
844 /*******************************************************************
845 Convert FROM contain JIS codes to SHIFT JIS codes
846 return converted buffer
847 ********************************************************************/
849 static char *jis8_to_sj_static(const char *from
)
855 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-3);) {
856 if (is_esc (*from
)) {
857 if (is_so1 (from
[1]) && is_so2 (from
[2])) {
860 } else if (is_si1 (from
[1]) && is_si2 (from
[2])) {
863 } else { /* sequence error */
877 int code
= jis2sjis ((int) from
[0] & 0xff, (int) from
[1] & 0xff);
878 *out
++ = (code
>> 8) & 0xff;
891 static char *jis8_to_sj(char *from
)
893 pstrcpy(from
, jis8_to_sj_static(from
));
897 /*******************************************************************
898 Convert FROM contain SHIFT JIS codes to JIS codes
899 return converted buffer
900 ********************************************************************/
902 static char *sj_to_jis8_static(const char *from
)
908 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-4); ) {
909 if (is_shift_jis (*from
)) {
912 case _KJ_ROMAN
: /* to KANJI */
919 code
= sjis2jis ((int) from
[0] & 0xff, (int) from
[1] & 0xff);
920 *out
++ = (code
>> 8) & 0xff;
925 case _KJ_KANJI
: /* to ROMAN/KANA */
937 case _KJ_KANJI
: /* to ROMAN/KANA */
948 static char *sj_to_jis8(char *from
)
950 pstrcpy(from
, sj_to_jis8_static(from
));
954 /*******************************************************************
955 Convert FROM contain 7 bits JIS codes to SHIFT JIS codes
956 return converted buffer
957 ********************************************************************/
959 static char *jis7_to_sj_static(const char *from
)
965 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-3);) {
966 if (is_esc (*from
)) {
967 if (is_so1 (from
[1]) && is_so2 (from
[2])) {
970 } else if (is_si1 (from
[1]) && is_si2 (from
[2])) {
973 } else { /* sequence error */
976 } else if (is_so (*from
)) {
977 shifted
= _KJ_KANA
; /* to KANA */
979 } else if (is_si (*from
)) {
980 shifted
= _KJ_ROMAN
; /* to ROMAN */
991 int code
= jis2sjis ((int) from
[0] & 0xff, (int) from
[1] & 0xff);
992 *out
++ = (code
>> 8) & 0xff;
998 *out
++ = ((int) from
[0]) + 0x80;
1007 static char *jis7_to_sj(char *from
)
1009 pstrcpy(from
, jis7_to_sj_static(from
));
1013 /*******************************************************************
1014 Convert FROM contain SHIFT JIS codes to 7 bits JIS codes
1015 return converted buffer
1016 ********************************************************************/
1018 static char *sj_to_jis7_static(const char *from
)
1023 shifted
= _KJ_ROMAN
;
1024 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-4); ) {
1025 if (is_shift_jis (*from
)) {
1029 *out
++ = jis_si
; /* to ROMAN and through down */
1030 case _KJ_ROMAN
: /* to KANJI */
1034 shifted
= _KJ_KANJI
;
1037 code
= sjis2jis ((int) from
[0] & 0xff, (int) from
[1] & 0xff);
1038 *out
++ = (code
>> 8) & 0xff;
1041 } else if (is_kana (from
[0])) {
1043 case _KJ_KANJI
: /* to ROMAN */
1047 case _KJ_ROMAN
: /* to KANA */
1052 *out
++ = ((int) *from
++) - 0x80;
1056 *out
++ = jis_si
; /* to ROMAN */
1057 shifted
= _KJ_ROMAN
;
1059 case _KJ_KANJI
: /* to ROMAN */
1063 shifted
= _KJ_ROMAN
;
1071 *out
++ = jis_si
; /* to ROMAN */
1073 case _KJ_KANJI
: /* to ROMAN */
1083 static char *sj_to_jis7(char *from
)
1085 pstrcpy(from
, sj_to_jis7_static(from
));
1089 /*******************************************************************
1090 Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes
1091 return converted buffer
1092 ********************************************************************/
1094 static char *junet_to_sj_static(const char *from
)
1099 shifted
= _KJ_ROMAN
;
1100 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-3);) {
1101 if (is_esc (*from
)) {
1102 if (is_so1 (from
[1]) && is_so2 (from
[2])) {
1103 shifted
= _KJ_KANJI
;
1105 } else if (is_si1 (from
[1]) && is_si2 (from
[2])) {
1106 shifted
= _KJ_ROMAN
;
1108 } else if (is_juk1(from
[1]) && is_juk2 (from
[2])) {
1111 } else { /* sequence error */
1123 int code
= jis2sjis ((int) from
[0] & 0xff, (int) from
[1] & 0xff);
1124 *out
++ = (code
>> 8) & 0xff;
1130 *out
++ = ((int) from
[0]) + 0x80;
1139 static char *junet_to_sj(char *from
)
1141 pstrcpy(from
, junet_to_sj_static(from
));
1145 /*******************************************************************
1146 Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes
1147 return converted buffer
1148 ********************************************************************/
1150 static char *sj_to_junet_static(const char *from
)
1155 shifted
= _KJ_ROMAN
;
1156 for (out
= cvtbuf
; *from
&& (out
- cvtbuf
< sizeof(cvtbuf
)-4); ) {
1157 if (is_shift_jis (*from
)) {
1161 case _KJ_ROMAN
: /* to KANJI */
1165 shifted
= _KJ_KANJI
;
1168 code
= sjis2jis ((int) from
[0] & 0xff, (int) from
[1] & 0xff);
1169 *out
++ = (code
>> 8) & 0xff;
1172 } else if (is_kana (from
[0])) {
1174 case _KJ_KANJI
: /* to ROMAN */
1175 case _KJ_ROMAN
: /* to KANA */
1177 *out
++ = junet_kana1
;
1178 *out
++ = junet_kana2
;
1182 *out
++ = ((int) *from
++) - 0x80;
1186 case _KJ_KANJI
: /* to ROMAN */
1190 shifted
= _KJ_ROMAN
;
1198 case _KJ_KANJI
: /* to ROMAN */
1208 static char *sj_to_junet(char *from
)
1210 pstrcpy(from
, sj_to_junet_static(from
));
1214 /*******************************************************************
1216 ********************************************************************/
1217 /* ":xx" -> a byte */
1219 static char *hex_to_sj_static(const char *from
)
1226 while (*sp
&& (dp
- cvtbuf
< sizeof(cvtbuf
)-3)) {
1227 if (*sp
== hex_tag
&& isxdigit((int)sp
[1]) && isxdigit((int)sp
[2])) {
1228 *dp
++ = (hex2bin (sp
[1])<<4) | (hex2bin (sp
[2]));
1237 static char *hex_to_sj(char *from
)
1239 pstrcpy(from
, hex_to_sj_static(from
));
1243 /*******************************************************************
1245 ********************************************************************/
1247 static char *sj_to_hex_static(const char *from
)
1249 const unsigned char *sp
;
1252 sp
= (const uchar
*)from
;
1253 dp
= (unsigned char*) cvtbuf
;
1254 while (*sp
&& (((char *)dp
)- cvtbuf
< sizeof(cvtbuf
)-7)) {
1257 *dp
++ = bin2hex (((*sp
)>>4)&0x0f);
1258 *dp
++ = bin2hex ((*sp
)&0x0f);
1260 } else if (is_shift_jis (*sp
) && is_shift_jis2 (sp
[1])) {
1262 *dp
++ = bin2hex (((*sp
)>>4)&0x0f);
1263 *dp
++ = bin2hex ((*sp
)&0x0f);
1266 *dp
++ = bin2hex (((*sp
)>>4)&0x0f);
1267 *dp
++ = bin2hex ((*sp
)&0x0f);
1276 static char *sj_to_hex(char *from
)
1278 pstrcpy(from
, sj_to_hex_static(from
));
1282 /*******************************************************************
1284 ********************************************************************/
1285 /* ":xx" CAP -> a byte */
1286 static char *cap_to_sj_static(const char *from
)
1291 sp
= (const char *) from
;
1293 while (*sp
&& (dp
- cvtbuf
< sizeof(cvtbuf
)-2)) {
1295 * The only change between this and hex_to_sj is here. sj_to_cap only
1296 * translates characters greater or equal to 0x80 - make sure that here
1297 * we only do the reverse (that's why the strchr is used rather than
1298 * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson).
1300 if (*sp
== hex_tag
&& (strchr ("89abcdefABCDEF", sp
[1]) != NULL
) && isxdigit((int)sp
[2])) {
1301 *dp
++ = (hex2bin (sp
[1])<<4) | (hex2bin (sp
[2]));
1310 static char *cap_to_sj(char *from
)
1312 pstrcpy(from
, cap_to_sj_static(from
));
1316 /*******************************************************************
1317 kanji/kana -> ":xx" - CAP format.
1318 ********************************************************************/
1319 static char *sj_to_cap_static(const char *from
)
1321 const unsigned char *sp
;
1324 sp
= (const uchar
*)from
;
1325 dp
= (unsigned char*) cvtbuf
;
1326 while (*sp
&& (((char *)dp
) - cvtbuf
< sizeof(cvtbuf
)-4)) {
1329 *dp
++ = bin2hex (((*sp
)>>4)&0x0f);
1330 *dp
++ = bin2hex ((*sp
)&0x0f);
1340 static char *sj_to_cap(char *from
)
1342 pstrcpy(from
, sj_to_cap_static(from
));
1346 /*******************************************************************
1348 ********************************************************************/
1350 static char *sj_to_sj_static(const char *from
)
1352 pstrcpy (cvtbuf
, from
);
1356 static char *sj_to_sj(char *from
)
1361 /*******************************************************************
1363 ********************************************************************/
1364 static char *cp_to_utf8_static(const char *from
)
1367 const unsigned char *src
;
1372 src
= (const unsigned char *)from
;
1373 dst
= (unsigned char *)cvtbuf
;
1374 while (*src
&& (((char *)dst
- cvtbuf
) < sizeof(cvtbuf
)-4)) {
1375 len
= _skip_multibyte_char(*src
);
1377 w
= (int)(*src
++ & 0xff);
1378 w
= (int)((w
<< 8)|(*src
++ & 0xff));
1380 w
= (int)(*src
++ & 0xff);
1382 val
= doscp2ucs2(w
);
1384 if ( val
<= 0x7f ) {
1385 *dst
++ = (char)(val
& 0xff);
1386 } else if ( val
<= 0x7ff ){
1387 *dst
++ = (char)( 0xc0 | ((val
>> 6) & 0xff));
1388 *dst
++ = (char)( 0x80 | ( val
& 0x3f ));
1390 *dst
++ = (char)( 0xe0 | ((val
>> 12) & 0x0f));
1391 *dst
++ = (char)( 0x80 | ((val
>> 6) & 0x3f));
1392 *dst
++ = (char)( 0x80 | (val
& 0x3f));
1400 static char *cp_to_utf8(char *from
)
1402 pstrcpy(from
, cp_to_utf8_static(from
));
1406 /*******************************************************************
1408 ********************************************************************/
1409 static char *utf8_to_cp_static(const char *from
)
1411 const unsigned char *src
;
1416 src
= (const unsigned char *)from
;
1417 dst
= (unsigned char *)cvtbuf
;
1419 while (*src
&& ((char *)dst
- cvtbuf
< sizeof(cvtbuf
)-4)) {
1420 val
= (*src
++ & 0xff);
1422 *dst
++ = (char)(val
& 0x7f);
1423 } else if ((0xc0 <= val
) && (val
<= 0xdf)
1424 && (0x80 <= *src
) && (*src
<= 0xbf)) {
1425 w
= ucs2doscp( ((val
& 31) << 6) | ((*src
++) & 63 ));
1426 *dst
++ = (char)((w
>> 8) & 0xff);
1427 *dst
++ = (char)(w
& 0xff);
1429 val
= (val
& 0x0f) << 12;
1430 val
|= ((*src
++ & 0x3f) << 6);
1431 val
|= (*src
++ & 0x3f);
1433 *dst
++ = (char)((w
>> 8) & 0xff);
1434 *dst
++ = (char)(w
& 0xff);
1441 static char *utf8_to_cp(char *from
)
1443 pstrcpy(from
, utf8_to_cp_static(from
));
1447 /************************************************************************
1449 _dos_to_unix _unix_to_dos
1450 ************************************************************************/
1452 static void setup_string_function(int codes
)
1456 _dos_to_unix
= dos2unix_format
;
1457 _dos_to_unix_static
= dos2unix_format_static
;
1458 _unix_to_dos
= unix2dos_format
;
1459 _unix_to_dos_static
= unix2dos_format_static
;
1463 _dos_to_unix
= sj_to_sj
;
1464 _dos_to_unix_static
= sj_to_sj_static
;
1465 _unix_to_dos
= sj_to_sj
;
1466 _unix_to_dos_static
= sj_to_sj_static
;
1470 _dos_to_unix
= sj_to_euc
;
1471 _dos_to_unix_static
= sj_to_euc_static
;
1472 _unix_to_dos
= euc_to_sj
;
1473 _unix_to_dos_static
= euc_to_sj_static
;
1477 _dos_to_unix
= sj_to_jis7
;
1478 _dos_to_unix_static
= sj_to_jis7_static
;
1479 _unix_to_dos
= jis7_to_sj
;
1480 _unix_to_dos_static
= jis7_to_sj_static
;
1484 _dos_to_unix
= sj_to_jis8
;
1485 _dos_to_unix_static
= sj_to_jis8_static
;
1486 _unix_to_dos
= jis8_to_sj
;
1487 _unix_to_dos_static
= jis8_to_sj_static
;
1491 _dos_to_unix
= sj_to_junet
;
1492 _dos_to_unix_static
= sj_to_junet_static
;
1493 _unix_to_dos
= junet_to_sj
;
1494 _unix_to_dos_static
= junet_to_sj_static
;
1498 _dos_to_unix
= sj_to_hex
;
1499 _dos_to_unix_static
= sj_to_hex_static
;
1500 _unix_to_dos
= hex_to_sj
;
1501 _unix_to_dos_static
= hex_to_sj_static
;
1505 _dos_to_unix
= sj_to_cap
;
1506 _dos_to_unix_static
= sj_to_cap_static
;
1507 _unix_to_dos
= cap_to_sj
;
1508 _unix_to_dos_static
= cap_to_sj_static
;
1512 _dos_to_unix
= cp_to_utf8
;
1513 _dos_to_unix_static
= cp_to_utf8_static
;
1514 _unix_to_dos
= utf8_to_cp
;
1515 _unix_to_dos_static
= utf8_to_cp_static
;
1519 _dos_to_unix
= sj_to_euc3
;
1520 _dos_to_unix_static
= sj_to_euc3_static
;
1521 _unix_to_dos
= euc3_to_sj
;
1522 _unix_to_dos_static
= euc3_to_sj_static
;
1527 /************************************************************************
1528 Interpret coding system.
1529 ************************************************************************/
1531 void interpret_coding_system(char *str
)
1533 int codes
= UNKNOWN_CODE
;
1535 if (strequal (str
, "sjis")) {
1537 } else if (strequal (str
, "euc")) {
1539 } else if (strequal (str
, "cap")) {
1542 } else if (strequal (str
, "hex")) {
1545 } else if (!strncasecmp (str
, "hex", 3)) {
1547 hex_tag
= (str
[3] ? str
[3] : HEXTAG
);
1548 } else if (strequal (str
, "j8bb")) {
1552 } else if (strequal (str
, "j8bj") || strequal (str
, "jis8")) {
1556 } else if (strequal (str
, "j8bh")) {
1560 } else if (strequal (str
, "j8@b")) {
1564 } else if (strequal (str
, "j8@j")) {
1568 } else if (strequal (str
, "j8@h")) {
1572 } else if (strequal (str
, "j7bb")) {
1576 } else if (strequal (str
, "j7bj") || strequal (str
, "jis7")) {
1580 } else if (strequal (str
, "j7bh")) {
1584 } else if (strequal (str
, "j7@b")) {
1588 } else if (strequal (str
, "j7@j")) {
1592 } else if (strequal (str
, "j7@h")) {
1596 } else if (strequal (str
, "jubb")) {
1600 } else if (strequal (str
, "jubj") || strequal (str
, "junet")) {
1604 } else if (strequal (str
, "jubh")) {
1608 } else if (strequal (str
, "ju@b")) {
1612 } else if (strequal (str
, "ju@j")) {
1616 } else if (strequal (str
, "ju@h")) {
1620 } else if (strequal (str
, "utf8")) {
1622 } else if (strequal (str
, "euc3")) {
1625 setup_string_function (codes
);
1628 /*******************************************************************
1629 Non multibyte char function.
1630 *******************************************************************/
1632 static size_t skip_non_multibyte_char(char c
)
1637 /*******************************************************************
1638 Function that always says a character isn't multibyte.
1639 *******************************************************************/
1641 static BOOL
not_multibyte_char_1(char c
)
1646 /*******************************************************************
1647 Setup the function pointers for the functions that are replaced
1648 when multi-byte codepages are used.
1650 The dos_to_unix and unix_to_dos function pointers are only
1651 replaced by setup_string_function called by interpret_coding_system
1653 *******************************************************************/
1655 void initialize_multibyte_vectors( int client_codepage
)
1657 switch( client_codepage
)
1659 case KANJI_CODEPAGE
:
1660 multibyte_strchr
= sj_strchr
;
1661 multibyte_strrchr
= sj_strrchr
;
1662 multibyte_strstr
= sj_strstr
;
1663 multibyte_strtok
= sj_strtok
;
1664 _skip_multibyte_char
= skip_kanji_multibyte_char
;
1665 is_multibyte_char_1
= is_kanji_multibyte_char_1
;
1666 global_is_multibyte_codepage
= True
;
1668 case HANGUL_CODEPAGE
:
1669 multibyte_strchr
= generic_multibyte_strchr
;
1670 multibyte_strrchr
= generic_multibyte_strrchr
;
1671 multibyte_strstr
= generic_multibyte_strstr
;
1672 multibyte_strtok
= generic_multibyte_strtok
;
1673 _skip_multibyte_char
= skip_generic_multibyte_char
;
1674 is_multibyte_char_1
= hangul_is_multibyte_char_1
;
1675 global_is_multibyte_codepage
= True
;
1678 multibyte_strchr
= generic_multibyte_strchr
;
1679 multibyte_strrchr
= generic_multibyte_strrchr
;
1680 multibyte_strstr
= generic_multibyte_strstr
;
1681 multibyte_strtok
= generic_multibyte_strtok
;
1682 _skip_multibyte_char
= skip_generic_multibyte_char
;
1683 is_multibyte_char_1
= big5_is_multibyte_char_1
;
1684 global_is_multibyte_codepage
= True
;
1686 case SIMPLIFIED_CHINESE_CODEPAGE
:
1687 multibyte_strchr
= generic_multibyte_strchr
;
1688 multibyte_strrchr
= generic_multibyte_strrchr
;
1689 multibyte_strstr
= generic_multibyte_strstr
;
1690 multibyte_strtok
= generic_multibyte_strtok
;
1691 _skip_multibyte_char
= skip_generic_multibyte_char
;
1692 is_multibyte_char_1
= simpch_is_multibyte_char_1
;
1693 global_is_multibyte_codepage
= True
;
1696 * Single char size code page.
1699 multibyte_strchr
= (const char *(*)(const char *, int )) strchr
;
1700 multibyte_strrchr
= (const char *(*)(const char *, int )) strrchr
;
1701 multibyte_strstr
= (const char *(*)(const char *, const char *)) strstr
;
1702 multibyte_strtok
= (char *(*)(char *, const char *)) strtok
;
1703 _skip_multibyte_char
= skip_non_multibyte_char
;
1704 is_multibyte_char_1
= not_multibyte_char_1
;
1705 global_is_multibyte_codepage
= False
;
1709 /* *******************************************************
1710 function(s) for "dynamic" encoding of SWAT output.
1711 in this version, only dos_to_dos, dos_to_unix, unix_to_dos
1712 are used for bug fix. conversion to web encoding
1713 (to catalog file encoding) is not needed because
1714 they are using same character codes.
1715 **************************************************** */
1716 static char *no_conversion_static(const char *str
)
1718 static pstring temp
;
1722 char *(*_dos_to_dos_static
)(const char *) = no_conversion_static
;