few edits
[Samba.git] / source / lib / kanji.c
blob940457d1681d1fc8e52c886f2a36318ed54a164e
1 /*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4 Kanji Extensions
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 Adding for Japanese language by <fujita@ainix.isac.co.jp> 1994.9.5
22 and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11
23 and add all jis codes sequence type at 1995.8.16
24 Notes: Hexadecimal code by <ohki@gssm.otuka.tsukuba.ac.jp>
25 Adding features about Machine dependent codes and User Defined Codes
26 by Hiroshi MIURA <miura@samba.gr.jp> 2000.3.19
29 #define _KANJI_C_
30 #include "includes.h"
33 * Function pointers that get overridden when multi-byte code pages
34 * are loaded.
37 const char *(*multibyte_strchr)(const char *, int ) = (const char *(*)(const char *, int )) strchr;
38 const char *(*multibyte_strrchr)(const char *, int ) = (const char *(*)(const char *, int )) strrchr;
39 const char *(*multibyte_strstr)(const char *, const char *) = (const char *(*)(const char *, const char *)) strstr;
40 char *(*multibyte_strtok)(char *, const char *) = (char *(*)(char *, const char *)) strtok;
43 * Kanji is treated differently here due to historical accident of
44 * it being the first non-English codepage added to Samba.
45 * The define 'KANJI' is being overloaded to mean 'use kanji codepage
46 * by default' and also 'this is the filename-to-disk conversion
47 * method to use'. This really should be removed and all control
48 * over this left in the smb.conf parameters 'client codepage'
49 * and 'coding system'.
52 #ifndef KANJI
55 * Set the default conversion to be the functions in
56 * charcnv.c.
59 static size_t skip_non_multibyte_char(char);
60 static BOOL not_multibyte_char_1(char);
62 char *(*_dos_to_unix)(char *) = dos2unix_format;
63 char *(*_dos_to_unix_static)(const char *) = dos2unix_format_static;
64 char *(*_unix_to_dos)(char *) = unix2dos_format;
65 char *(*_unix_to_dos_static)(const char *) = unix2dos_format_static;
66 size_t (*_skip_multibyte_char)(char) = skip_non_multibyte_char;
67 BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1;
69 #else /* KANJI */
72 * Set the default conversion to be the function
73 * sj_to_sj in this file.
76 static char *sj_to_sj(char *from);
77 static char *sj_to_sj_static(const char *from);
78 static size_t skip_kanji_multibyte_char(char);
79 static BOOL is_kanji_multibyte_char_1(char);
81 char *(*_dos_to_unix)(char *) = sj_to_sj;
82 char *(*_dos_to_unix_static)(const char *) = sj_to_sj_static;
83 char *(*_unix_to_dos)(char *) = sj_to_sj;
84 char *(*_unix_to_dos_static)(const char *) = sj_to_sj_static;
85 size_t (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char;
86 int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1;
88 #endif /* KANJI */
90 BOOL global_is_multibyte_codepage = False;
92 /* jis si/so sequence */
93 static char jis_kso = JIS_KSO;
94 static char jis_ksi = JIS_KSI;
95 static char hex_tag = HEXTAG;
97 /*******************************************************************
98 SHIFT JIS functions
99 ********************************************************************/
101 /*******************************************************************
102 search token from S1 separated any char of S2
103 S1 contains SHIFT JIS chars.
104 ********************************************************************/
106 static char *sj_strtok(char *s1, const char *s2)
108 static char *s = NULL;
109 char *q;
110 if (!s1) {
111 if (!s) {
112 return NULL;
114 s1 = s;
116 for (q = s1; *s1; ) {
117 if (is_shift_jis (*s1)) {
118 s1 += 2;
119 } else if (is_kana (*s1)) {
120 s1++;
121 } else {
122 char *p = strchr (s2, *s1);
123 if (p) {
124 if (s1 != q) {
125 s = s1 + 1;
126 *s1 = '\0';
127 return q;
129 q = s1 + 1;
131 s1++;
134 s = NULL;
135 if (*q) {
136 return q;
138 return NULL;
141 /*******************************************************************
142 search string S2 from S1
143 S1 contains SHIFT JIS chars.
144 ********************************************************************/
146 static const char *sj_strstr(const char *s1, const char *s2)
148 size_t len = strlen (s2);
149 if (!*s2)
150 return (const char *) s1;
151 for (;*s1;) {
152 if (*s1 == *s2) {
153 if (strncmp (s1, s2, len) == 0)
154 return (const char *) s1;
156 if (is_shift_jis (*s1)) {
157 s1 += 2;
158 } else {
159 s1++;
162 return NULL;
165 /*******************************************************************
166 Search char C from beginning of S.
167 S contains SHIFT JIS chars.
168 ********************************************************************/
170 static const char *sj_strchr (const char *s, int c)
172 for (; *s; ) {
173 if (*s == c)
174 return (const char *) s;
175 if (is_shift_jis (*s)) {
176 s += 2;
177 } else {
178 s++;
181 return NULL;
184 /*******************************************************************
185 Search char C end of S.
186 S contains SHIFT JIS chars.
187 ********************************************************************/
189 static const char *sj_strrchr(const char *s, int c)
191 const char *q;
193 for (q = 0; *s; ) {
194 if (*s == c) {
195 q = (const char *) s;
197 if (is_shift_jis (*s)) {
198 s += 2;
199 } else {
200 s++;
203 return q;
206 /*******************************************************************
207 Kanji multibyte char skip function.
208 *******************************************************************/
210 static size_t skip_kanji_multibyte_char(char c)
212 if(is_shift_jis(c)) {
213 return 2;
214 } else if (is_kana(c)) {
215 return 1;
217 return 0;
220 /*******************************************************************
221 Kanji multibyte char identification.
222 *******************************************************************/
224 static BOOL is_kanji_multibyte_char_1(char c)
226 return is_shift_jis(c);
229 /*******************************************************************
230 The following functions are the only ones needed to do multibyte
231 support for Hangul, Big5 and Simplified Chinese. Most of the
232 real work for these codepages is done in the generic multibyte
233 functions. The only reason these functions are needed at all
234 is that the is_xxx(c) calls are really preprocessor macros.
235 ********************************************************************/
237 /*******************************************************************
238 Hangul (Korean - code page 949) function.
239 ********************************************************************/
241 static BOOL hangul_is_multibyte_char_1(char c)
243 return is_hangul(c);
246 /*******************************************************************
247 Big5 Traditional Chinese (code page 950) function.
248 ********************************************************************/
250 static BOOL big5_is_multibyte_char_1(char c)
252 return is_big5_c1(c);
255 /*******************************************************************
256 Simplified Chinese (code page 936) function.
257 ********************************************************************/
259 static BOOL simpch_is_multibyte_char_1(char c)
261 return is_simpch_c1(c);
264 /*******************************************************************
265 Generic multibyte functions - used by Hangul, Big5 and Simplified
266 Chinese codepages.
267 ********************************************************************/
269 /*******************************************************************
270 search token from S1 separated any char of S2
271 S1 contains generic multibyte chars.
272 ********************************************************************/
274 static char *generic_multibyte_strtok(char *s1, const char *s2)
276 static char *s = NULL;
277 char *q;
278 if (!s1) {
279 if (!s) {
280 return NULL;
282 s1 = s;
284 for (q = s1; *s1; ) {
285 if ((*is_multibyte_char_1)(*s1)) {
286 s1 += 2;
287 } else {
288 char *p = strchr (s2, *s1);
289 if (p) {
290 if (s1 != q) {
291 s = s1 + 1;
292 *s1 = '\0';
293 return q;
295 q = s1 + 1;
297 s1++;
300 s = NULL;
301 if (*q) {
302 return q;
304 return NULL;
307 /*******************************************************************
308 search string S2 from S1
309 S1 contains generic multibyte chars.
310 ********************************************************************/
312 static const char *generic_multibyte_strstr(const char *s1, const char *s2)
314 size_t len = strlen (s2);
315 if (!*s2)
316 return (const char *) s1;
317 for (;*s1;) {
318 if (*s1 == *s2) {
319 if (strncmp (s1, s2, len) == 0)
320 return (const char *) s1;
322 if ((*is_multibyte_char_1)(*s1)) {
323 s1 += 2;
324 } else {
325 s1++;
328 return NULL;
331 /*******************************************************************
332 Search char C from beginning of S.
333 S contains generic multibyte chars.
334 ********************************************************************/
336 static const char *generic_multibyte_strchr(const char *s, int c)
338 for (; *s; ) {
339 if (*s == c)
340 return (const char *) s;
341 if ((*is_multibyte_char_1)(*s)) {
342 s += 2;
343 } else {
344 s++;
347 return NULL;
350 /*******************************************************************
351 Search char C end of S.
352 S contains generic multibyte chars.
353 ********************************************************************/
355 static const char *generic_multibyte_strrchr(const char *s, int c)
357 const char *q;
359 for (q = 0; *s; ) {
360 if (*s == c) {
361 q = (const char *) s;
363 if ((*is_multibyte_char_1)(*s)) {
364 s += 2;
365 } else {
366 s++;
369 return q;
372 /*******************************************************************
373 Generic multibyte char skip function.
374 *******************************************************************/
376 static size_t skip_generic_multibyte_char(char c)
378 if( (*is_multibyte_char_1)(c)) {
379 return 2;
381 return 0;
384 /*******************************************************************
385 Code conversion
386 ********************************************************************/
388 /* convesion buffer */
389 static char cvtbuf[2*sizeof(pstring)];
391 /*******************************************************************
392 EUC <-> SJIS
393 ********************************************************************/
395 static int euc2sjis (int hi, int lo)
397 int w;
398 int maxidx = SJISREVTBLSIZ;
399 int minidx = 0;
400 int i = 2;
402 if (hi & 1) {
403 hi = hi / 2 + (hi < 0xdf ? 0x31 : 0x71);
404 w = (hi << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61));
405 } else {
406 hi = hi / 2 + (hi < 0xdf ? 0x30 : 0x70);
407 w = (hi << 8) | (lo - 2);
409 if ( (0x87 < hi ) && (hi < 0xed ) ) {
410 return w;
412 while ( maxidx >= minidx ) {
413 if ( sjisrev[i].start > w ) {
414 maxidx = i-1;
415 } else if ( w > sjisrev[i].end ) {
416 minidx = i+1;
417 } else {
418 w -= sjisrev[i].start;
419 w += sjisrev[i].rstart;
420 break;
422 i = (int)( minidx + (maxidx - minidx) % 2 );
424 return w;
427 static int sjis2euc (int hi, int lo)
429 int minidx = 0;
430 int maxidx = SJISCONVTBLSIZ -1; /* max index 1 less than number of entries */
431 int i = ( 0 + SJISCONVTBLSIZ ) % 2;
432 int w = (int)((hi << 8) | lo);
434 if ( (sjisconv[0].start < w) && (w < sjisconv[SJISCONVTBLSIZ-1].end) ) {
435 while (maxidx >= minidx) {
436 if ( sjisconv[i].start > w ) {
437 maxidx = i-1;
438 } else if (w > sjisconv[i].end) {
439 minidx = i+1;
440 } else {
441 w -= sjisconv[i].start;
442 w += sjisconv[i].rstart;
443 break;
445 i = (int)( minidx + (maxidx-minidx)%2 );
447 hi = (int) ((w >> 8) & 0xff);
448 lo = (int) (w & 0xff);
450 if (hi >= 0xf0) {
451 hi = GETAHI;
452 lo = GETALO;
454 if (lo >= 0x9f)
455 return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
456 else
457 return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
458 (lo + (lo >= 0x7f ? 0x60 : 0x61));
461 /*******************************************************************
462 Convert FROM contain SHIFT JIS codes to EUC codes
463 return converted buffer
464 ********************************************************************/
466 static char *sj_to_euc_static(const char *from)
468 char *out;
470 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) {
471 if (is_shift_jis (*from)) {
472 int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
473 *out++ = (code >> 8) & 0xff;
474 *out++ = code & 0xff;
475 from += 2;
476 } else if (is_kana (*from)) {
477 *out++ = (char)euc_kana;
478 *out++ = *from++;
479 } else {
480 *out++ = *from++;
483 *out = 0;
484 return cvtbuf;
487 static char *sj_to_euc(char *from)
489 pstrcpy(from, sj_to_euc_static(from));
490 return from;
493 /*******************************************************************
494 Convert FROM contain EUC codes to SHIFT JIS codes
495 return converted buffer
496 ********************************************************************/
498 static char *euc_to_sj_static(const char *from)
500 char *out;
502 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3); ) {
503 if (is_euc (*from)) {
504 int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
505 *out++ = (code >> 8) & 0xff;
506 *out++ = code & 0xff;
507 from += 2;
508 } else if (is_euc_kana (*from)) {
509 *out++ = from[1];
510 from += 2;
511 } else {
512 *out++ = *from++;
515 *out = 0;
516 return cvtbuf;
519 static char *euc_to_sj(char *from)
521 pstrcpy(from, euc_to_sj_static(from));
522 return from;
525 /*******************************************************************
526 EUC3 <-> SJIS
527 ********************************************************************/
528 static int sjis3euc (int hi, int lo, int *len)
530 int i,w;
531 int minidx;
532 int maxidx;
534 w = (int)((hi << 8) | lo);
536 /* no sjis */
537 if ( ( 0x40 >= lo ) && (lo >= 0xfc) && (lo == 0x7f )) {
538 w = (GETAHI << 8) | GETALO;
540 /* IBM Extended Kanji */
541 } else if (( w == 0xfa54 )||( w == 0x81ca )) {
542 *len = 2;
543 return (0xa2cc);
545 } else if (( w == 0xfa5b )||( w == 0x81e6)) {
546 *len = 2;
547 return (0xa2e8);
549 } else if (( 0xfa <= hi ) && ( hi <= 0xfc ) ) {
550 i = w - 0xfa40 - ( hi - 0xfa )*( 0xfb40 - 0xfafc) - ((lo < 0x7f)? 0 : 1 );
551 if ( i <= EUC3CONVTBLSIZ ){
552 *len = 3;
553 return euc3conv[i];
556 /* NEC selected IBM Extend Kanji */
557 /* there are 3 code that is not good for conv */
558 } else if (( 0x8754 <= w ) && ( w <= 0x878a)) {
559 minidx = 0;
560 maxidx = EUC3CONV2TBLSIZ;
561 i = minidx + (maxidx - minidx) % 2;
562 while ( maxidx >= minidx ) {
563 if ( euc3conv2[i].sjis > w ) {
564 maxidx = i-1;
565 } else if ( w > euc3conv2[i].sjis ) {
566 minidx = i+1;
567 } else {
568 *len = 3;
569 return (euc3conv2[i].euc);
571 i = (int)( minidx + (maxidx - minidx) % 2 );
573 /* else normal EUC */
575 } else if (( w == 0xeef9 ) || ( w == 0x81ca )) {
576 *len = 2;
577 return (0xa2cc);
579 } else if (( 0xed <= hi ) && ( hi <= 0xef )) {
580 minidx = 0;
581 maxidx = SJISREVTBLSIZ;
582 i = 10;
583 while ( maxidx >= minidx ) {
584 if ( sjisrev[i].start > w ) {
585 maxidx = i-1;
586 } else if ( w > sjisrev[i].end ) {
587 minidx = i+1;
588 } else {
589 w -= sjisrev[i].start;
590 w += sjisrev[i].rstart;
591 break;
593 i = (int)( minidx + (maxidx - minidx) % 2 );
595 if ( w >= 0xfa40 ) {
596 i = w - 0xfa40 - ( hi - 0xfa )*( 0xfb40 - 0xfafc) - ((lo < 0x7f)? 0 : 1 );
597 if ( i <= EUC3CONVTBLSIZ ){
598 *len = 3;
599 return euc3conv[i];
600 } else {
601 w = (GETAHI << 8) | GETALO;
604 /* else normal EUC */
606 /* UDC half low*/
607 /* this area maps to the G2 UDC area: 0xf5a1 -- 0xfefe */
608 } else if ((0xf0 <= hi) && (hi <= 0xf4)) {
609 *len = 2;
610 if (lo >= 0x9f) {
611 return (((hi * 2 - 0xea) << 8) | (lo + 2));
612 } else {
613 return (((hi * 2 - 0xeb) << 8) | (lo + (lo >=0x7f ? 0x60: 0x61 )));
616 /* UDC half high*/
617 /* this area maps to the G3 UDC area: 0xf8f5a1 -- 0xf8fefe */
618 } else if ((0xf5 <= hi) && (hi <= 0xf9)) {
619 *len = 3;
620 if (lo >= 0x9f) {
621 return (((hi*2 - 0xf4) << 8) | (lo + 2));
622 } else {
623 return (((hi*2 - 0xf5) << 8) | (lo + (lo >= 0x7f ? 0x60: 0x61 )));
625 /* ....checked all special case */
628 /* These Normal 2 byte EUC */
629 *len = 2;
630 hi = (int) ((w >> 8) & 0xff);
631 lo = (int) (w & 0xff);
633 if (hi >= 0xf0) { /* Check range */
634 hi = GETAHI;
635 lo = GETALO;
638 if (lo >= 0x9f)
639 return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
640 else
641 return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
642 (lo + (lo >= 0x7f ? 0x60 : 0x61));
645 static int euc3sjis (int hi, int lo, BOOL is_3byte)
647 int w;
649 w = (int)((hi << 8) | lo);
650 if (is_3byte) {
651 if (( 0xf5 <= hi) && ( hi <= 0xfe)) {
652 /* UDC half high*/
653 /* this area maps to the G3 UDC area */
654 /* 0xf8f5a1 -- 0xf8fefe --> 0xf540 -- 0xf9fc */
655 if (hi & 1) {
656 return (((hi / 2 + 0x7b) << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61)));
657 } else {
658 return (((hi / 2 + 0x7a) << 8) | (lo - 2));
660 } else {
661 /* Using map table */
662 int minidx = 0;
663 int maxidx = EUC3REVTBLSIZ;
664 int i = minidx + (maxidx - minidx) % 2;
666 while ( maxidx >= minidx ) {
667 if (euc3rev[i].euc > w) {
668 maxidx = i-1;
669 } else if (euc3rev[i].euc < w) {
670 minidx = i+1;
671 } else {
672 return (euc3rev[i].sjis);
674 i = (int)( minidx + ( maxidx - minidx ) % 2);
676 return ((GETAHI << 8 ) | GETALO);
678 } else { /* is_2byte */
679 if ((0xf5 <= hi) && (hi <= 0xfe)) {
680 /* UDC half low*/
681 /* this area maps to the G2 UDC area */
682 /* 0xf5a1 -- 0xfefe --> 0xf040 -- 0xf4fc */
683 if (hi & 1) {
684 return (((hi / 2 + 0x76) << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61)));
685 } else {
686 return (((hi / 2 + 0x75) << 8) | (lo - 2));
688 } else { /* Normal EUC */
689 if (hi & 1) {
690 hi = hi / 2 + (hi < 0xdf ? 0x31 : 0x71);
691 return ((hi << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61)));
692 } else {
693 hi = hi / 2 + (hi < 0xdf ? 0x30 : 0x70);
694 return ((hi << 8) | (lo - 2));
700 /*******************************************************************
701 Convert FROM contain SHIFT JIS codes to EUC codes (with SS2)
702 return converted buffer
703 ********************************************************************/
705 static char *sj_to_euc3_static(const char *from)
707 char *out;
708 int len;
710 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4);) {
711 if (is_shift_jis (*from)) {
712 int code = sjis3euc ((int) from[0] & 0xff, (int) from[1] & 0xff, &len);
713 if (len == 3) {
714 *out++ = (char)euc_sup;
716 *out++ = (code >> 8) & 0xff;
717 *out++ = code & 0xff;
718 from += 2;
719 } else if (is_kana (*from)) {
720 *out++ = (char)euc_kana;
721 *out++ = *from++;
722 } else {
723 *out++ = *from++;
726 *out = 0;
727 return cvtbuf;
730 static char *sj_to_euc3(char *from)
732 pstrcpy(from, sj_to_euc3_static(from));
733 return from;
736 /*******************************************************************
737 Convert FROM contain EUC codes (with Sup-Kanji) to SHIFT JIS codes
738 return converted buffer
739 ********************************************************************/
741 static char *euc3_to_sj_static(const char *from)
743 char *out;
745 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3); ) {
746 if (is_euc_sup (*from)) {
747 int code = euc3sjis((int) from[1] & 0xff, (int) from[2] & 0xff, True);
748 *out++ = (code >> 8) & 0xff;
749 *out++ = code & 0xff;
750 from += 3;
751 } else if (is_euc (*from)) {
752 int code = euc3sjis ((int) from[0] & 0xff, (int) from[1] & 0xff,False);
753 *out++ = (code >> 8) & 0xff;
754 *out++ = code & 0xff;
755 from += 2;
756 } else if (is_euc_kana (*from)) {
757 *out++ = from[1];
758 from += 2;
759 } else {
760 *out++ = *from++;
763 *out = 0;
764 return cvtbuf;
767 static char *euc3_to_sj(char *from)
769 pstrcpy(from, euc3_to_sj_static(from));
770 return from;
773 /*******************************************************************
774 JIS7,JIS8,JUNET <-> SJIS
775 ********************************************************************/
777 static int sjis2jis(int hi, int lo)
779 int minidx = 0;
780 int maxidx = SJISCONVTBLSIZ -1; /* max index 1 less than number of entries */
781 int i = (0 + SJISCONVTBLSIZ) % 2;
782 int w = (int)((hi << 8) | lo);
784 if ((sjisconv[0].start < w) && (w < sjisconv[SJISCONVTBLSIZ-1].end)) {
785 while (maxidx >= minidx) {
786 if (sjisconv[i].start > w) {
787 maxidx = i-1;
788 } else if (w > sjisconv[i].end) {
789 minidx = i+1;
790 } else {
791 w -= sjisconv[i].start;
792 w += sjisconv[i].rstart;
793 break;
795 i = (int)( minidx + (maxidx-minidx) %2 );
797 hi = (int) ((w >> 8) & 0xff);
798 lo = (int) (w & 0xff);
800 if (hi >= 0xf0) {
801 hi = GETAHI;
802 lo = GETALO;
804 if (lo >= 0x9f)
805 return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
806 else
807 return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) |
808 (lo - (lo >= 0x7f ? 0x20 : 0x1f));
811 static int jis2sjis(int hi, int lo)
813 int w;
814 int minidx = 0;
815 int maxidx = SJISREVTBLSIZ;
816 int i = 2;
818 if (hi & 1) {
819 hi = hi / 2 + (hi < 0x5f ? 0x71 : 0xb1);
820 w = (hi << 8) | (lo + (lo >= 0x60 ? 0x20 : 0x1f));
821 } else {
822 hi = hi / 2 + (hi < 0x5f ? 0x70 : 0xb0);
823 w = (hi << 8) | (lo + 0x7e);
826 if (( 0x87 < hi ) && ( hi < 0xed )) {
827 return w;
829 while (maxidx >= minidx) {
830 if (sjisrev[i].start > w) {
831 maxidx = i-1;
832 } else if (w > sjisrev[i].end) {
833 minidx = i+1;
834 } else {
835 w -= sjisrev[i].start;
836 w += sjisrev[i].rstart;
837 break;
839 i = (int)( minidx + (maxidx-minidx) %2 );
841 return w;
844 /*******************************************************************
845 Convert FROM contain JIS codes to SHIFT JIS codes
846 return converted buffer
847 ********************************************************************/
849 static char *jis8_to_sj_static(const char *from)
851 char *out;
852 int shifted;
854 shifted = _KJ_ROMAN;
855 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) {
856 if (is_esc (*from)) {
857 if (is_so1 (from[1]) && is_so2 (from[2])) {
858 shifted = _KJ_KANJI;
859 from += 3;
860 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
861 shifted = _KJ_ROMAN;
862 from += 3;
863 } else { /* sequence error */
864 goto normal;
866 } else {
868 normal:
870 switch (shifted) {
871 default:
872 case _KJ_ROMAN:
873 *out++ = *from++;
874 break;
875 case _KJ_KANJI:
877 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
878 *out++ = (code >> 8) & 0xff;
879 *out++ = code;
880 from += 2;
881 break;
887 *out = 0;
888 return cvtbuf;
891 static char *jis8_to_sj(char *from)
893 pstrcpy(from, jis8_to_sj_static(from));
894 return from;
897 /*******************************************************************
898 Convert FROM contain SHIFT JIS codes to JIS codes
899 return converted buffer
900 ********************************************************************/
902 static char *sj_to_jis8_static(const char *from)
904 char *out;
905 int shifted;
907 shifted = _KJ_ROMAN;
908 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) {
909 if (is_shift_jis (*from)) {
910 int code;
911 switch (shifted) {
912 case _KJ_ROMAN: /* to KANJI */
913 *out++ = jis_esc;
914 *out++ = jis_so1;
915 *out++ = jis_kso;
916 shifted = _KJ_KANJI;
917 break;
919 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
920 *out++ = (code >> 8) & 0xff;
921 *out++ = code;
922 from += 2;
923 } else {
924 switch (shifted) {
925 case _KJ_KANJI: /* to ROMAN/KANA */
926 *out++ = jis_esc;
927 *out++ = jis_si1;
928 *out++ = jis_ksi;
929 shifted = _KJ_ROMAN;
930 break;
932 *out++ = *from++;
936 switch (shifted) {
937 case _KJ_KANJI: /* to ROMAN/KANA */
938 *out++ = jis_esc;
939 *out++ = jis_si1;
940 *out++ = jis_ksi;
941 shifted = _KJ_ROMAN;
942 break;
944 *out = 0;
945 return cvtbuf;
948 static char *sj_to_jis8(char *from)
950 pstrcpy(from, sj_to_jis8_static(from));
951 return from;
954 /*******************************************************************
955 Convert FROM contain 7 bits JIS codes to SHIFT JIS codes
956 return converted buffer
957 ********************************************************************/
959 static char *jis7_to_sj_static(const char *from)
961 char *out;
962 int shifted;
964 shifted = _KJ_ROMAN;
965 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) {
966 if (is_esc (*from)) {
967 if (is_so1 (from[1]) && is_so2 (from[2])) {
968 shifted = _KJ_KANJI;
969 from += 3;
970 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
971 shifted = _KJ_ROMAN;
972 from += 3;
973 } else { /* sequence error */
974 goto normal;
976 } else if (is_so (*from)) {
977 shifted = _KJ_KANA; /* to KANA */
978 from++;
979 } else if (is_si (*from)) {
980 shifted = _KJ_ROMAN; /* to ROMAN */
981 from++;
982 } else {
983 normal:
984 switch (shifted) {
985 default:
986 case _KJ_ROMAN:
987 *out++ = *from++;
988 break;
989 case _KJ_KANJI:
991 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
992 *out++ = (code >> 8) & 0xff;
993 *out++ = code;
994 from += 2;
996 break;
997 case _KJ_KANA:
998 *out++ = ((int) from[0]) + 0x80;
999 break;
1003 *out = 0;
1004 return cvtbuf;
1007 static char *jis7_to_sj(char *from)
1009 pstrcpy(from, jis7_to_sj_static(from));
1010 return from;
1013 /*******************************************************************
1014 Convert FROM contain SHIFT JIS codes to 7 bits JIS codes
1015 return converted buffer
1016 ********************************************************************/
1018 static char *sj_to_jis7_static(const char *from)
1020 char *out;
1021 int shifted;
1023 shifted = _KJ_ROMAN;
1024 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) {
1025 if (is_shift_jis (*from)) {
1026 int code;
1027 switch (shifted) {
1028 case _KJ_KANA:
1029 *out++ = jis_si; /* to ROMAN and through down */
1030 case _KJ_ROMAN: /* to KANJI */
1031 *out++ = jis_esc;
1032 *out++ = jis_so1;
1033 *out++ = jis_kso;
1034 shifted = _KJ_KANJI;
1035 break;
1037 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
1038 *out++ = (code >> 8) & 0xff;
1039 *out++ = code;
1040 from += 2;
1041 } else if (is_kana (from[0])) {
1042 switch (shifted) {
1043 case _KJ_KANJI: /* to ROMAN */
1044 *out++ = jis_esc;
1045 *out++ = jis_si1;
1046 *out++ = jis_ksi;
1047 case _KJ_ROMAN: /* to KANA */
1048 *out++ = jis_so;
1049 shifted = _KJ_KANA;
1050 break;
1052 *out++ = ((int) *from++) - 0x80;
1053 } else {
1054 switch (shifted) {
1055 case _KJ_KANA:
1056 *out++ = jis_si; /* to ROMAN */
1057 shifted = _KJ_ROMAN;
1058 break;
1059 case _KJ_KANJI: /* to ROMAN */
1060 *out++ = jis_esc;
1061 *out++ = jis_si1;
1062 *out++ = jis_ksi;
1063 shifted = _KJ_ROMAN;
1064 break;
1066 *out++ = *from++;
1069 switch (shifted) {
1070 case _KJ_KANA:
1071 *out++ = jis_si; /* to ROMAN */
1072 break;
1073 case _KJ_KANJI: /* to ROMAN */
1074 *out++ = jis_esc;
1075 *out++ = jis_si1;
1076 *out++ = jis_ksi;
1077 break;
1079 *out = 0;
1080 return cvtbuf;
1083 static char *sj_to_jis7(char *from)
1085 pstrcpy(from, sj_to_jis7_static(from));
1086 return from;
1089 /*******************************************************************
1090 Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes
1091 return converted buffer
1092 ********************************************************************/
1094 static char *junet_to_sj_static(const char *from)
1096 char *out;
1097 int shifted;
1099 shifted = _KJ_ROMAN;
1100 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) {
1101 if (is_esc (*from)) {
1102 if (is_so1 (from[1]) && is_so2 (from[2])) {
1103 shifted = _KJ_KANJI;
1104 from += 3;
1105 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
1106 shifted = _KJ_ROMAN;
1107 from += 3;
1108 } else if (is_juk1(from[1]) && is_juk2 (from[2])) {
1109 shifted = _KJ_KANA;
1110 from += 3;
1111 } else { /* sequence error */
1112 goto normal;
1114 } else {
1115 normal:
1116 switch (shifted) {
1117 default:
1118 case _KJ_ROMAN:
1119 *out++ = *from++;
1120 break;
1121 case _KJ_KANJI:
1123 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
1124 *out++ = (code >> 8) & 0xff;
1125 *out++ = code;
1126 from += 2;
1128 break;
1129 case _KJ_KANA:
1130 *out++ = ((int) from[0]) + 0x80;
1131 break;
1135 *out = 0;
1136 return cvtbuf;
1139 static char *junet_to_sj(char *from)
1141 pstrcpy(from, junet_to_sj_static(from));
1142 return from;
1145 /*******************************************************************
1146 Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes
1147 return converted buffer
1148 ********************************************************************/
1150 static char *sj_to_junet_static(const char *from)
1152 char *out;
1153 int shifted;
1155 shifted = _KJ_ROMAN;
1156 for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) {
1157 if (is_shift_jis (*from)) {
1158 int code;
1159 switch (shifted) {
1160 case _KJ_KANA:
1161 case _KJ_ROMAN: /* to KANJI */
1162 *out++ = jis_esc;
1163 *out++ = jis_so1;
1164 *out++ = jis_so2;
1165 shifted = _KJ_KANJI;
1166 break;
1168 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
1169 *out++ = (code >> 8) & 0xff;
1170 *out++ = code;
1171 from += 2;
1172 } else if (is_kana (from[0])) {
1173 switch (shifted) {
1174 case _KJ_KANJI: /* to ROMAN */
1175 case _KJ_ROMAN: /* to KANA */
1176 *out++ = jis_esc;
1177 *out++ = junet_kana1;
1178 *out++ = junet_kana2;
1179 shifted = _KJ_KANA;
1180 break;
1182 *out++ = ((int) *from++) - 0x80;
1183 } else {
1184 switch (shifted) {
1185 case _KJ_KANA:
1186 case _KJ_KANJI: /* to ROMAN */
1187 *out++ = jis_esc;
1188 *out++ = jis_si1;
1189 *out++ = jis_si2;
1190 shifted = _KJ_ROMAN;
1191 break;
1193 *out++ = *from++;
1196 switch (shifted) {
1197 case _KJ_KANA:
1198 case _KJ_KANJI: /* to ROMAN */
1199 *out++ = jis_esc;
1200 *out++ = jis_si1;
1201 *out++ = jis_si2;
1202 break;
1204 *out = 0;
1205 return cvtbuf;
1208 static char *sj_to_junet(char *from)
1210 pstrcpy(from, sj_to_junet_static(from));
1211 return from;
1214 /*******************************************************************
1215 HEX <-> SJIS
1216 ********************************************************************/
1217 /* ":xx" -> a byte */
1219 static char *hex_to_sj_static(const char *from)
1221 const char *sp;
1222 char *dp;
1224 sp = from;
1225 dp = cvtbuf;
1226 while (*sp && (dp - cvtbuf < sizeof(cvtbuf)-3)) {
1227 if (*sp == hex_tag && isxdigit((int)sp[1]) && isxdigit((int)sp[2])) {
1228 *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
1229 sp += 3;
1230 } else
1231 *dp++ = *sp++;
1233 *dp = '\0';
1234 return cvtbuf;
1237 static char *hex_to_sj(char *from)
1239 pstrcpy(from, hex_to_sj_static(from));
1240 return from;
1243 /*******************************************************************
1244 kanji/kana -> ":xx"
1245 ********************************************************************/
1247 static char *sj_to_hex_static(const char *from)
1249 const unsigned char *sp;
1250 unsigned char *dp;
1252 sp = (const uchar *)from;
1253 dp = (unsigned char*) cvtbuf;
1254 while (*sp && (((char *)dp)- cvtbuf < sizeof(cvtbuf)-7)) {
1255 if (is_kana(*sp)) {
1256 *dp++ = hex_tag;
1257 *dp++ = bin2hex (((*sp)>>4)&0x0f);
1258 *dp++ = bin2hex ((*sp)&0x0f);
1259 sp++;
1260 } else if (is_shift_jis (*sp) && is_shift_jis2 (sp[1])) {
1261 *dp++ = hex_tag;
1262 *dp++ = bin2hex (((*sp)>>4)&0x0f);
1263 *dp++ = bin2hex ((*sp)&0x0f);
1264 sp++;
1265 *dp++ = hex_tag;
1266 *dp++ = bin2hex (((*sp)>>4)&0x0f);
1267 *dp++ = bin2hex ((*sp)&0x0f);
1268 sp++;
1269 } else
1270 *dp++ = *sp++;
1272 *dp = '\0';
1273 return cvtbuf;
1276 static char *sj_to_hex(char *from)
1278 pstrcpy(from, sj_to_hex_static(from));
1279 return from;
1282 /*******************************************************************
1283 CAP <-> SJIS
1284 ********************************************************************/
1285 /* ":xx" CAP -> a byte */
1286 static char *cap_to_sj_static(const char *from)
1288 const char *sp;
1289 char *dp;
1291 sp = (const char *) from;
1292 dp = cvtbuf;
1293 while (*sp && (dp- cvtbuf < sizeof(cvtbuf)-2)) {
1295 * The only change between this and hex_to_sj is here. sj_to_cap only
1296 * translates characters greater or equal to 0x80 - make sure that here
1297 * we only do the reverse (that's why the strchr is used rather than
1298 * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson).
1300 if (*sp == hex_tag && (strchr ("89abcdefABCDEF", sp[1]) != NULL) && isxdigit((int)sp[2])) {
1301 *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
1302 sp += 3;
1303 } else
1304 *dp++ = *sp++;
1306 *dp = '\0';
1307 return cvtbuf;
1310 static char *cap_to_sj(char *from)
1312 pstrcpy(from, cap_to_sj_static(from));
1313 return from;
1316 /*******************************************************************
1317 kanji/kana -> ":xx" - CAP format.
1318 ********************************************************************/
1319 static char *sj_to_cap_static(const char *from)
1321 const unsigned char *sp;
1322 unsigned char *dp;
1324 sp = (const uchar *)from;
1325 dp = (unsigned char*) cvtbuf;
1326 while (*sp && (((char *)dp) - cvtbuf < sizeof(cvtbuf)-4)) {
1327 if (*sp >= 0x80) {
1328 *dp++ = hex_tag;
1329 *dp++ = bin2hex (((*sp)>>4)&0x0f);
1330 *dp++ = bin2hex ((*sp)&0x0f);
1331 sp++;
1332 } else {
1333 *dp++ = *sp++;
1336 *dp = '\0';
1337 return cvtbuf;
1340 static char *sj_to_cap(char *from)
1342 pstrcpy(from, sj_to_cap_static(from));
1343 return from;
1346 /*******************************************************************
1347 sj to sj
1348 ********************************************************************/
1350 static char *sj_to_sj_static(const char *from)
1352 pstrcpy (cvtbuf, from);
1353 return cvtbuf;
1356 static char *sj_to_sj(char *from)
1358 return from;
1361 /*******************************************************************
1362 cp to utf8
1363 ********************************************************************/
1364 static char *cp_to_utf8_static(const char *from)
1366 unsigned char *dst;
1367 const unsigned char *src;
1368 smb_ucs2_t val;
1369 int w;
1370 size_t len;
1372 src = (const unsigned char *)from;
1373 dst = (unsigned char *)cvtbuf;
1374 while (*src && (((char *)dst - cvtbuf) < sizeof(cvtbuf)-4)) {
1375 len = _skip_multibyte_char(*src);
1376 if ( len == 2 ) {
1377 w = (int)(*src++ & 0xff);
1378 w = (int)((w << 8)|(*src++ & 0xff));
1379 } else {
1380 w = (int)(*src++ & 0xff);
1382 val = doscp2ucs2(w);
1384 if ( val <= 0x7f ) {
1385 *dst++ = (char)(val & 0xff);
1386 } else if ( val <= 0x7ff ){
1387 *dst++ = (char)( 0xc0 | ((val >> 6) & 0xff));
1388 *dst++ = (char)( 0x80 | ( val & 0x3f ));
1389 } else {
1390 *dst++ = (char)( 0xe0 | ((val >> 12) & 0x0f));
1391 *dst++ = (char)( 0x80 | ((val >> 6) & 0x3f));
1392 *dst++ = (char)( 0x80 | (val & 0x3f));
1396 *dst++='\0';
1397 return cvtbuf;
1400 static char *cp_to_utf8(char *from)
1402 pstrcpy(from, cp_to_utf8_static(from));
1403 return from;
1406 /*******************************************************************
1407 utf8 to cp
1408 ********************************************************************/
1409 static char *utf8_to_cp_static(const char *from)
1411 const unsigned char *src;
1412 unsigned char *dst;
1413 smb_ucs2_t val;
1414 int w;
1416 src = (const unsigned char *)from;
1417 dst = (unsigned char *)cvtbuf;
1419 while (*src && ((char *)dst - cvtbuf < sizeof(cvtbuf)-4)) {
1420 val = (*src++ & 0xff);
1421 if (val < 0x80) {
1422 *dst++ = (char)(val & 0x7f);
1423 } else if ((0xc0 <= val) && (val <= 0xdf)
1424 && (0x80 <= *src) && (*src <= 0xbf)) {
1425 w = ucs2doscp( ((val & 31) << 6) | ((*src++) & 63 ));
1426 *dst++ = (char)((w >> 8) & 0xff);
1427 *dst++ = (char)(w & 0xff);
1428 } else {
1429 val = (val & 0x0f) << 12;
1430 val |= ((*src++ & 0x3f) << 6);
1431 val |= (*src++ & 0x3f);
1432 w = ucs2doscp(val);
1433 *dst++ = (char)((w >> 8) & 0xff);
1434 *dst++ = (char)(w & 0xff);
1437 *dst++='\0';
1438 return cvtbuf;
1441 static char *utf8_to_cp(char *from)
1443 pstrcpy(from, utf8_to_cp_static(from));
1444 return from;
1447 /************************************************************************
1448 conversion:
1449 _dos_to_unix _unix_to_dos
1450 ************************************************************************/
1452 static void setup_string_function(int codes)
1454 switch (codes) {
1455 default:
1456 _dos_to_unix = dos2unix_format;
1457 _dos_to_unix_static = dos2unix_format_static;
1458 _unix_to_dos = unix2dos_format;
1459 _unix_to_dos_static = unix2dos_format_static;
1460 break;
1462 case SJIS_CODE:
1463 _dos_to_unix = sj_to_sj;
1464 _dos_to_unix_static = sj_to_sj_static;
1465 _unix_to_dos = sj_to_sj;
1466 _unix_to_dos_static = sj_to_sj_static;
1467 break;
1469 case EUC_CODE:
1470 _dos_to_unix = sj_to_euc;
1471 _dos_to_unix_static = sj_to_euc_static;
1472 _unix_to_dos = euc_to_sj;
1473 _unix_to_dos_static = euc_to_sj_static;
1474 break;
1476 case JIS7_CODE:
1477 _dos_to_unix = sj_to_jis7;
1478 _dos_to_unix_static = sj_to_jis7_static;
1479 _unix_to_dos = jis7_to_sj;
1480 _unix_to_dos_static = jis7_to_sj_static;
1481 break;
1483 case JIS8_CODE:
1484 _dos_to_unix = sj_to_jis8;
1485 _dos_to_unix_static = sj_to_jis8_static;
1486 _unix_to_dos = jis8_to_sj;
1487 _unix_to_dos_static = jis8_to_sj_static;
1488 break;
1490 case JUNET_CODE:
1491 _dos_to_unix = sj_to_junet;
1492 _dos_to_unix_static = sj_to_junet_static;
1493 _unix_to_dos = junet_to_sj;
1494 _unix_to_dos_static = junet_to_sj_static;
1495 break;
1497 case HEX_CODE:
1498 _dos_to_unix = sj_to_hex;
1499 _dos_to_unix_static = sj_to_hex_static;
1500 _unix_to_dos = hex_to_sj;
1501 _unix_to_dos_static = hex_to_sj_static;
1502 break;
1504 case CAP_CODE:
1505 _dos_to_unix = sj_to_cap;
1506 _dos_to_unix_static = sj_to_cap_static;
1507 _unix_to_dos = cap_to_sj;
1508 _unix_to_dos_static = cap_to_sj_static;
1509 break;
1511 case UTF8_CODE:
1512 _dos_to_unix = cp_to_utf8;
1513 _dos_to_unix_static = cp_to_utf8_static;
1514 _unix_to_dos = utf8_to_cp;
1515 _unix_to_dos_static = utf8_to_cp_static;
1516 break;
1518 case EUC3_CODE:
1519 _dos_to_unix = sj_to_euc3;
1520 _dos_to_unix_static = sj_to_euc3_static;
1521 _unix_to_dos = euc3_to_sj;
1522 _unix_to_dos_static = euc3_to_sj_static;
1523 break;
1527 /************************************************************************
1528 Interpret coding system.
1529 ************************************************************************/
1531 void interpret_coding_system(char *str)
1533 int codes = UNKNOWN_CODE;
1535 if (strequal (str, "sjis")) {
1536 codes = SJIS_CODE;
1537 } else if (strequal (str, "euc")) {
1538 codes = EUC_CODE;
1539 } else if (strequal (str, "cap")) {
1540 codes = CAP_CODE;
1541 hex_tag = HEXTAG;
1542 } else if (strequal (str, "hex")) {
1543 codes = HEX_CODE;
1544 hex_tag = HEXTAG;
1545 } else if (!strncasecmp (str, "hex", 3)) {
1546 codes = HEX_CODE;
1547 hex_tag = (str[3] ? str[3] : HEXTAG);
1548 } else if (strequal (str, "j8bb")) {
1549 codes = JIS8_CODE;
1550 jis_kso = 'B';
1551 jis_ksi = 'B';
1552 } else if (strequal (str, "j8bj") || strequal (str, "jis8")) {
1553 codes = JIS8_CODE;
1554 jis_kso = 'B';
1555 jis_ksi = 'J';
1556 } else if (strequal (str, "j8bh")) {
1557 codes = JIS8_CODE;
1558 jis_kso = 'B';
1559 jis_ksi = 'H';
1560 } else if (strequal (str, "j8@b")) {
1561 codes = JIS8_CODE;
1562 jis_kso = '@';
1563 jis_ksi = 'B';
1564 } else if (strequal (str, "j8@j")) {
1565 codes = JIS8_CODE;
1566 jis_kso = '@';
1567 jis_ksi = 'J';
1568 } else if (strequal (str, "j8@h")) {
1569 codes = JIS8_CODE;
1570 jis_kso = '@';
1571 jis_ksi = 'H';
1572 } else if (strequal (str, "j7bb")) {
1573 codes = JIS7_CODE;
1574 jis_kso = 'B';
1575 jis_ksi = 'B';
1576 } else if (strequal (str, "j7bj") || strequal (str, "jis7")) {
1577 codes = JIS7_CODE;
1578 jis_kso = 'B';
1579 jis_ksi = 'J';
1580 } else if (strequal (str, "j7bh")) {
1581 codes = JIS7_CODE;
1582 jis_kso = 'B';
1583 jis_ksi = 'H';
1584 } else if (strequal (str, "j7@b")) {
1585 codes = JIS7_CODE;
1586 jis_kso = '@';
1587 jis_ksi = 'B';
1588 } else if (strequal (str, "j7@j")) {
1589 codes = JIS7_CODE;
1590 jis_kso = '@';
1591 jis_ksi = 'J';
1592 } else if (strequal (str, "j7@h")) {
1593 codes = JIS7_CODE;
1594 jis_kso = '@';
1595 jis_ksi = 'H';
1596 } else if (strequal (str, "jubb")) {
1597 codes = JUNET_CODE;
1598 jis_kso = 'B';
1599 jis_ksi = 'B';
1600 } else if (strequal (str, "jubj") || strequal (str, "junet")) {
1601 codes = JUNET_CODE;
1602 jis_kso = 'B';
1603 jis_ksi = 'J';
1604 } else if (strequal (str, "jubh")) {
1605 codes = JUNET_CODE;
1606 jis_kso = 'B';
1607 jis_ksi = 'H';
1608 } else if (strequal (str, "ju@b")) {
1609 codes = JUNET_CODE;
1610 jis_kso = '@';
1611 jis_ksi = 'B';
1612 } else if (strequal (str, "ju@j")) {
1613 codes = JUNET_CODE;
1614 jis_kso = '@';
1615 jis_ksi = 'J';
1616 } else if (strequal (str, "ju@h")) {
1617 codes = JUNET_CODE;
1618 jis_kso = '@';
1619 jis_ksi = 'H';
1620 } else if (strequal (str, "utf8")) {
1621 codes = UTF8_CODE;
1622 } else if (strequal (str, "euc3")) {
1623 codes = EUC3_CODE;
1625 setup_string_function (codes);
1628 /*******************************************************************
1629 Non multibyte char function.
1630 *******************************************************************/
1632 static size_t skip_non_multibyte_char(char c)
1634 return 0;
1637 /*******************************************************************
1638 Function that always says a character isn't multibyte.
1639 *******************************************************************/
1641 static BOOL not_multibyte_char_1(char c)
1643 return False;
1646 /*******************************************************************
1647 Setup the function pointers for the functions that are replaced
1648 when multi-byte codepages are used.
1650 The dos_to_unix and unix_to_dos function pointers are only
1651 replaced by setup_string_function called by interpret_coding_system
1652 above.
1653 *******************************************************************/
1655 void initialize_multibyte_vectors( int client_codepage)
1657 switch( client_codepage )
1659 case KANJI_CODEPAGE:
1660 multibyte_strchr = sj_strchr;
1661 multibyte_strrchr = sj_strrchr;
1662 multibyte_strstr = sj_strstr;
1663 multibyte_strtok = sj_strtok;
1664 _skip_multibyte_char = skip_kanji_multibyte_char;
1665 is_multibyte_char_1 = is_kanji_multibyte_char_1;
1666 global_is_multibyte_codepage = True;
1667 break;
1668 case HANGUL_CODEPAGE:
1669 multibyte_strchr = generic_multibyte_strchr;
1670 multibyte_strrchr = generic_multibyte_strrchr;
1671 multibyte_strstr = generic_multibyte_strstr;
1672 multibyte_strtok = generic_multibyte_strtok;
1673 _skip_multibyte_char = skip_generic_multibyte_char;
1674 is_multibyte_char_1 = hangul_is_multibyte_char_1;
1675 global_is_multibyte_codepage = True;
1676 break;
1677 case BIG5_CODEPAGE:
1678 multibyte_strchr = generic_multibyte_strchr;
1679 multibyte_strrchr = generic_multibyte_strrchr;
1680 multibyte_strstr = generic_multibyte_strstr;
1681 multibyte_strtok = generic_multibyte_strtok;
1682 _skip_multibyte_char = skip_generic_multibyte_char;
1683 is_multibyte_char_1 = big5_is_multibyte_char_1;
1684 global_is_multibyte_codepage = True;
1685 break;
1686 case SIMPLIFIED_CHINESE_CODEPAGE:
1687 multibyte_strchr = generic_multibyte_strchr;
1688 multibyte_strrchr = generic_multibyte_strrchr;
1689 multibyte_strstr = generic_multibyte_strstr;
1690 multibyte_strtok = generic_multibyte_strtok;
1691 _skip_multibyte_char = skip_generic_multibyte_char;
1692 is_multibyte_char_1 = simpch_is_multibyte_char_1;
1693 global_is_multibyte_codepage = True;
1694 break;
1696 * Single char size code page.
1698 default:
1699 multibyte_strchr = (const char *(*)(const char *, int )) strchr;
1700 multibyte_strrchr = (const char *(*)(const char *, int )) strrchr;
1701 multibyte_strstr = (const char *(*)(const char *, const char *)) strstr;
1702 multibyte_strtok = (char *(*)(char *, const char *)) strtok;
1703 _skip_multibyte_char = skip_non_multibyte_char;
1704 is_multibyte_char_1 = not_multibyte_char_1;
1705 global_is_multibyte_codepage = False;
1706 break;
1709 /* *******************************************************
1710 function(s) for "dynamic" encoding of SWAT output.
1711 in this version, only dos_to_dos, dos_to_unix, unix_to_dos
1712 are used for bug fix. conversion to web encoding
1713 (to catalog file encoding) is not needed because
1714 they are using same character codes.
1715 **************************************************** */
1716 static char *no_conversion_static(const char *str)
1718 static pstring temp;
1719 pstrcpy(temp, str);
1720 return temp;
1722 char *(*_dos_to_dos_static)(const char *) = no_conversion_static;