source/lib/kanji.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 1.9.
   4    Kanji Extensions
   5    Copyright (C) Andrew Tridgell 1992-1998
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20
  21    Adding for Japanese language by <fujita@ainix.isac.co.jp> 1994.9.5
  22      and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11
  23      and add all jis codes sequence type at 1995.8.16
  24      Notes: Hexadecimal code by <ohki@gssm.otuka.tsukuba.ac.jp>
  25    Adding features about Machine dependent codes and User Defined Codes
  26      by Hiroshi MIURA <miura@samba.gr.jp> 2000.3.19
  27 */
  28
  29 #define _KANJI_C_
  30 #include "includes.h"
  31
  32 /*
  33  * Function pointers that get overridden when multi-byte code pages
  34  * are loaded.
  35  */
  36
  37 const char *(*multibyte_strchr)(const char *, int ) = (const char *(*)(const char *, int )) strchr;
  38 const char *(*multibyte_strrchr)(const char *, int ) = (const char *(*)(const char *, int )) strrchr;
  39 const char *(*multibyte_strstr)(const char *, const char *) = (const char *(*)(const char *, const char *)) strstr;
  40 char *(*multibyte_strtok)(char *, const char *) = (char *(*)(char *, const char *)) strtok;
  41
  42 /*
  43  * Kanji is treated differently here due to historical accident of
  44  * it being the first non-English codepage added to Samba.
  45  * The define 'KANJI' is being overloaded to mean 'use kanji codepage
  46  * by default' and also 'this is the filename-to-disk conversion
  47  * method to use'. This really should be removed and all control
  48  * over this left in the smb.conf parameters 'client codepage'
  49  * and 'coding system'.
  50  */
  51
  52 #ifndef KANJI
  53
  54 /*
  55  * Set the default conversion to be the functions in
  56  * charcnv.c.
  57  */
  58
  59 static size_t skip_non_multibyte_char(char);
  60 static BOOL not_multibyte_char_1(char);
  61
  62 char *(*_dos_to_unix)(char *) = dos2unix_format;
  63 char *(*_dos_to_unix_static)(const char *) = dos2unix_format_static;
  64 char *(*_unix_to_dos)(char *) = unix2dos_format;
  65 char *(*_unix_to_dos_static)(const char *) = unix2dos_format_static;
  66 size_t (*_skip_multibyte_char)(char) = skip_non_multibyte_char;
  67 BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1;
  68
  69 #else /* KANJI */
  70
  71 /*
  72  * Set the default conversion to be the function
  73  * sj_to_sj in this file.
  74  */
  75
  76 static char *sj_to_sj(char *from);
  77 static char *sj_to_sj_static(const char *from);
  78 static size_t skip_kanji_multibyte_char(char);
  79 static BOOL is_kanji_multibyte_char_1(char);
  80
  81 char *(*_dos_to_unix)(char *) = sj_to_sj;
  82 char *(*_dos_to_unix_static)(const char *) = sj_to_sj_static;
  83 char *(*_unix_to_dos)(char *) = sj_to_sj;
  84 char *(*_unix_to_dos_static)(const char *) = sj_to_sj_static;
  85 size_t (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char;
  86 int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1;
  87
  88 #endif /* KANJI */
  89
  90 BOOL global_is_multibyte_codepage = False;
  91
  92 /* jis si/so sequence */
  93 static char jis_kso = JIS_KSO;
  94 static char jis_ksi = JIS_KSI;
  95 static char hex_tag = HEXTAG;
  96
  97 /*******************************************************************
  98   SHIFT JIS functions
  99 ********************************************************************/
 100
 101 /*******************************************************************
 102  search token from S1 separated any char of S2
 103  S1 contains SHIFT JIS chars.
 104 ********************************************************************/
 105
 106 static char *sj_strtok(char *s1, const char *s2)
 107 {
 108   static char *s = NULL;
 109   char *q;
 110   if (!s1) {
 111     if (!s) {
 112       return NULL;
 113     }
 114     s1 = s;
 115   }
 116   for (q = s1; *s1; ) {
 117     if (is_shift_jis (*s1)) {
 118       s1 += 2;
 119     } else if (is_kana (*s1)) {
 120       s1++;
 121     } else {
 122       char *p = strchr (s2, *s1);
 123       if (p) {
 124         if (s1 != q) {
 125           s = s1 + 1;
 126           *s1 = '\0';
 127           return q;
 128         }
 129         q = s1 + 1;
 130       }
 131       s1++;
 132     }
 133   }
 134   s = NULL;
 135   if (*q) {
 136     return q;
 137   }
 138   return NULL;
 139 }
 140
 141 /*******************************************************************
 142  search string S2 from S1
 143  S1 contains SHIFT JIS chars.
 144 ********************************************************************/
 145
 146 static const char *sj_strstr(const char *s1, const char *s2)
 147 {
 148   size_t len = strlen (s2);
 149   if (!*s2)
 150     return (const char *) s1;
 151   for (;*s1;) {
 152     if (*s1 == *s2) {
 153       if (strncmp (s1, s2, len) == 0)
 154         return (const char *) s1;
 155     }
 156     if (is_shift_jis (*s1)) {
 157       s1 += 2;
 158     } else {
 159       s1++;
 160     }
 161   }
 162   return NULL;
 163 }
 164
 165 /*******************************************************************
 166  Search char C from beginning of S.
 167  S contains SHIFT JIS chars.
 168 ********************************************************************/
 169
 170 static const char *sj_strchr (const char *s, int c)
 171 {
 172   for (; *s; ) {
 173     if (*s == c)
 174       return (const char *) s;
 175     if (is_shift_jis (*s)) {
 176       s += 2;
 177     } else {
 178       s++;
 179     }
 180   }
 181   return NULL;
 182 }
 183
 184 /*******************************************************************
 185  Search char C end of S.
 186  S contains SHIFT JIS chars.
 187 ********************************************************************/
 188
 189 static const char *sj_strrchr(const char *s, int c)
 190 {
 191   const char *q;
 192
 193   for (q = 0; *s; ) {
 194     if (*s == c) {
 195       q = (const char *) s;
 196     }
 197     if (is_shift_jis (*s)) {
 198       s += 2;
 199     } else {
 200       s++;
 201     }
 202   }
 203   return q;
 204 }
 205
 206 /*******************************************************************
 207  Kanji multibyte char skip function.
 208 *******************************************************************/
 209
 210 static size_t skip_kanji_multibyte_char(char c)
 211 {
 212   if(is_shift_jis(c)) {
 213     return 2;
 214   } else if (is_kana(c)) {
 215     return 1;
 216   }
 217   return 0;
 218 }
 219
 220 /*******************************************************************
 221  Kanji multibyte char identification.
 222 *******************************************************************/
 223
 224 static BOOL is_kanji_multibyte_char_1(char c)
 225 {
 226   return is_shift_jis(c);
 227 }
 228
 229 /*******************************************************************
 230  The following functions are the only ones needed to do multibyte
 231  support for Hangul, Big5 and Simplified Chinese. Most of the
 232  real work for these codepages is done in the generic multibyte
 233  functions. The only reason these functions are needed at all
 234  is that the is_xxx(c) calls are really preprocessor macros.
 235 ********************************************************************/
 236
 237 /*******************************************************************
 238   Hangul (Korean - code page 949) function.
 239 ********************************************************************/
 240
 241 static BOOL hangul_is_multibyte_char_1(char c)
 242 {
 243   return is_hangul(c);
 244 }
 245
 246 /*******************************************************************
 247   Big5 Traditional Chinese (code page 950) function.
 248 ********************************************************************/
 249
 250 static BOOL big5_is_multibyte_char_1(char c)
 251 {
 252   return is_big5_c1(c);
 253 }
 254
 255 /*******************************************************************
 256   Simplified Chinese (code page 936) function.
 257 ********************************************************************/
 258
 259 static BOOL simpch_is_multibyte_char_1(char c)
 260 {
 261   return is_simpch_c1(c);
 262 }
 263
 264 /*******************************************************************
 265   Generic multibyte functions - used by Hangul, Big5 and Simplified
 266   Chinese codepages.
 267 ********************************************************************/
 268
 269 /*******************************************************************
 270  search token from S1 separated any char of S2
 271  S1 contains generic multibyte chars.
 272 ********************************************************************/
 273
 274 static char *generic_multibyte_strtok(char *s1, const char *s2)
 275 {
 276   static char *s = NULL;
 277   char *q;
 278   if (!s1) {
 279     if (!s) {
 280       return NULL;
 281     }
 282     s1 = s;
 283   }
 284   for (q = s1; *s1; ) {
 285     if ((*is_multibyte_char_1)(*s1)) {
 286         s1 += 2;
 287     } else {
 288       char *p = strchr (s2, *s1);
 289       if (p) {
 290         if (s1 != q) {
 291           s = s1 + 1;
 292           *s1 = '\0';
 293           return q;
 294         }
 295         q = s1 + 1;
 296       }
 297     s1++;
 298     }
 299   }
 300   s = NULL;
 301   if (*q) {
 302     return q;
 303   }
 304   return NULL;
 305 }
 306
 307 /*******************************************************************
 308  search string S2 from S1
 309  S1 contains generic multibyte chars.
 310 ********************************************************************/
 311
 312 static const char *generic_multibyte_strstr(const char *s1, const char *s2)
 313 {
 314   size_t len = strlen (s2);
 315   if (!*s2)
 316     return (const char *) s1;
 317   for (;*s1;) {
 318     if (*s1 == *s2) {
 319       if (strncmp (s1, s2, len) == 0)
 320         return (const char *) s1;
 321     }
 322     if ((*is_multibyte_char_1)(*s1)) {
 323       s1 += 2;
 324     } else {
 325       s1++;
 326     }
 327   }
 328   return NULL;
 329 }
 330
 331 /*******************************************************************
 332  Search char C from beginning of S.
 333  S contains generic multibyte chars.
 334 ********************************************************************/
 335
 336 static const char *generic_multibyte_strchr(const char *s, int c)
 337 {
 338   for (; *s; ) {
 339     if (*s == c)
 340       return (const char *) s;
 341     if ((*is_multibyte_char_1)(*s)) {
 342       s += 2;
 343     } else {
 344       s++;
 345     }
 346   }
 347   return NULL;
 348 }
 349
 350 /*******************************************************************
 351  Search char C end of S.
 352  S contains generic multibyte chars.
 353 ********************************************************************/
 354
 355 static const char *generic_multibyte_strrchr(const char *s, int c)
 356 {
 357   const char *q;
 358
 359   for (q = 0; *s; ) {
 360     if (*s == c) {
 361       q = (const char *) s;
 362     }
 363     if ((*is_multibyte_char_1)(*s)) {
 364       s += 2;
 365     } else {
 366       s++;
 367     }
 368   }
 369   return q;
 370 }
 371
 372 /*******************************************************************
 373  Generic multibyte char skip function.
 374 *******************************************************************/
 375
 376 static size_t skip_generic_multibyte_char(char c)
 377 {
 378   if( (*is_multibyte_char_1)(c)) {
 379     return 2;
 380   }
 381   return 0;
 382 }
 383
 384 /*******************************************************************
 385   Code conversion
 386 ********************************************************************/
 387
 388 /* convesion buffer */
 389 static char cvtbuf[2*sizeof(pstring)];
 390
 391 /*******************************************************************
 392   EUC <-> SJIS
 393 ********************************************************************/
 394
 395 static int euc2sjis (int hi, int lo)
 396 {
 397   int w;
 398   int maxidx = SJISREVTBLSIZ;
 399   int minidx = 0;
 400   int i = 2;
 401
 402   if (hi & 1) {
 403     hi = hi / 2 + (hi < 0xdf ? 0x31 : 0x71);
 404     w =  (hi << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61));
 405   } else {
 406     hi = hi / 2 + (hi < 0xdf ? 0x30 : 0x70);
 407     w = (hi << 8) | (lo - 2);
 408   }
 409   if  ( (0x87 < hi ) && (hi < 0xed ) ) {
 410     return w;
 411   }
 412   while ( maxidx >= minidx ) {
 413     if ( sjisrev[i].start > w ) {
 414       maxidx = i-1;
 415     } else if ( w > sjisrev[i].end ) {
 416       minidx = i+1;
 417     } else {
 418       w -= sjisrev[i].start;
 419       w += sjisrev[i].rstart;
 420       break;
 421     }
 422    i = (int)( minidx + (maxidx - minidx) % 2 );
 423   }
 424   return w;
 425 }
 426
 427 static int sjis2euc (int hi, int lo)
 428 {
 429   int minidx = 0;
 430   int maxidx = SJISCONVTBLSIZ -1; /* max index 1 less than number of entries */
 431   int i = ( 0 + SJISCONVTBLSIZ ) % 2;
 432   int w = (int)((hi << 8) | lo);
 433
 434   if ( (sjisconv[0].start < w) && (w < sjisconv[SJISCONVTBLSIZ-1].end) ) {
 435     while (maxidx >= minidx) {
 436       if ( sjisconv[i].start > w ) {
 437         maxidx = i-1;
 438       } else if (w > sjisconv[i].end) {
 439         minidx = i+1;
 440       } else {
 441         w -= sjisconv[i].start;
 442         w += sjisconv[i].rstart;
 443         break;
 444       }
 445       i = (int)( minidx + (maxidx-minidx)%2 );
 446     }
 447     hi = (int) ((w >> 8) & 0xff);
 448     lo = (int) (w & 0xff);
 449   }
 450   if (hi >= 0xf0) {
 451      hi = GETAHI;
 452      lo = GETALO;
 453   }
 454   if (lo >= 0x9f)
 455     return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
 456   else
 457     return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
 458             (lo + (lo >= 0x7f ? 0x60 : 0x61));
 459 }
 460
 461 /*******************************************************************
 462  Convert FROM contain SHIFT JIS codes to EUC codes
 463  return converted buffer
 464 ********************************************************************/
 465
 466 static char *sj_to_euc_static(const char *from)
 467 {
 468   char *out;
 469
 470   for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) {
 471     if (is_shift_jis (*from)) {
 472       int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
 473       *out++ = (code >> 8) & 0xff;
 474       *out++ = code & 0xff;
 475       from += 2;
 476     } else if (is_kana (*from)) {
 477       *out++ = (char)euc_kana;
 478       *out++ = *from++;
 479     } else {
 480       *out++ = *from++;
 481     }
 482   }
 483   *out = 0;
 484   return cvtbuf;
 485 }
 486
 487 static char *sj_to_euc(char *from)
 488 {
 489   pstrcpy(from, sj_to_euc_static(from));
 490   return from;
 491 }
 492
 493 /*******************************************************************
 494  Convert FROM contain EUC codes to SHIFT JIS codes
 495  return converted buffer
 496 ********************************************************************/
 497
 498 static char *euc_to_sj_static(const char *from)
 499 {
 500   char *out;
 501
 502   for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3); ) {
 503     if (is_euc (*from)) {
 504       int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 505       *out++ = (code >> 8) & 0xff;
 506       *out++ = code & 0xff;
 507       from += 2;
 508     } else if (is_euc_kana (*from)) {
 509       *out++ = from[1];
 510       from += 2;
 511     } else {
 512       *out++ = *from++;
 513     }
 514   }
 515   *out = 0;
 516   return cvtbuf;
 517 }
 518
 519 static char *euc_to_sj(char *from)
 520 {
 521   pstrcpy(from, euc_to_sj_static(from));
 522   return from;
 523 }
 524
 525 /*******************************************************************
 526   EUC3 <-> SJIS
 527 ********************************************************************/
 528 static int sjis3euc (int hi, int lo, int *len)
 529 {
 530   int i,w;
 531   int minidx;
 532   int maxidx;
 533
 534   w = (int)((hi << 8) | lo);
 535
 536   /* no sjis */
 537  if ( ( 0x40 >= lo ) && (lo >= 0xfc) && (lo == 0x7f )) {
 538      w = (GETAHI << 8) | GETALO;
 539
 540  /* IBM Extended Kanji */
 541  } else  if (( w == 0xfa54 )||( w == 0x81ca )) {
 542     *len = 2;
 543     return (0xa2cc);
 544
 545   } else if (( w ==  0xfa5b )||( w == 0x81e6)) {
 546     *len = 2;
 547     return (0xa2e8);
 548
 549   } else if (( 0xfa <= hi ) && ( hi <= 0xfc ) ) {
 550     i = w - 0xfa40 - ( hi - 0xfa )*( 0xfb40 - 0xfafc) - ((lo < 0x7f)? 0 : 1 );
 551     if ( i <= EUC3CONVTBLSIZ ){
 552       *len = 3;
 553       return euc3conv[i];
 554     }
 555
 556 /* NEC selected IBM Extend Kanji */
 557     /* there are 3 code that is not good for conv */
 558   } else if (( 0x8754 <= w ) && ( w <= 0x878a)) {
 559     minidx = 0;
 560     maxidx = EUC3CONV2TBLSIZ;
 561     i = minidx + (maxidx - minidx) % 2;
 562     while ( maxidx >= minidx ) {
 563       if ( euc3conv2[i].sjis > w ) {
 564         maxidx = i-1;
 565       } else if ( w > euc3conv2[i].sjis ) {
 566         minidx = i+1;
 567       } else {
 568         *len = 3;
 569         return (euc3conv2[i].euc);
 570       }
 571       i = (int)( minidx + (maxidx - minidx) % 2 );
 572     }
 573     /* else normal EUC */
 574
 575   } else if (( w == 0xeef9 ) || ( w == 0x81ca )) {
 576     *len = 2;
 577     return (0xa2cc);
 578
 579   } else if (( 0xed <= hi ) && ( hi <= 0xef )) {
 580     minidx = 0;
 581     maxidx = SJISREVTBLSIZ;
 582     i = 10;
 583     while ( maxidx >= minidx ) {
 584       if ( sjisrev[i].start > w ) {
 585         maxidx = i-1;
 586       } else if ( w > sjisrev[i].end ) {
 587         minidx = i+1;
 588       } else {
 589         w -= sjisrev[i].start;
 590         w += sjisrev[i].rstart;
 591         break;
 592       }
 593       i = (int)( minidx + (maxidx - minidx) % 2 );
 594     }
 595     if ( w >= 0xfa40 ) {
 596       i = w - 0xfa40 - ( hi - 0xfa )*( 0xfb40 - 0xfafc) - ((lo < 0x7f)? 0 : 1 );
 597       if ( i <= EUC3CONVTBLSIZ ){
 598         *len = 3;
 599         return euc3conv[i];
 600       } else {
 601         w = (GETAHI << 8) | GETALO;
 602       }
 603     }
 604     /* else normal EUC */
 605
 606 /* UDC half low*/
 607 /* this area maps to the G2 UDC area: 0xf5a1 -- 0xfefe */
 608   } else if ((0xf0 <= hi) && (hi <= 0xf4)) {
 609     *len = 2;
 610     if (lo >= 0x9f) {
 611       return (((hi * 2 - 0xea) << 8) | (lo + 2));
 612     } else {
 613       return (((hi * 2 - 0xeb) << 8) | (lo + (lo >=0x7f ? 0x60: 0x61 )));
 614     }
 615
 616 /* UDC half high*/
 617 /* this area maps to the G3 UDC area: 0xf8f5a1 -- 0xf8fefe */
 618   } else if ((0xf5 <= hi) && (hi <= 0xf9)) {
 619     *len = 3;
 620     if (lo >= 0x9f) {
 621       return (((hi*2 - 0xf4) << 8) | (lo + 2));
 622     } else {
 623       return (((hi*2 - 0xf5) << 8) | (lo + (lo >= 0x7f ? 0x60: 0x61 )));
 624     }
 625     /* ....checked all special case */
 626   }
 627
 628   /*  These Normal 2 byte EUC */
 629   *len = 2;
 630   hi = (int) ((w >> 8) & 0xff);
 631   lo = (int) (w & 0xff);
 632
 633   if (hi >= 0xf0) {    /* Check range */
 634      hi = GETAHI;
 635      lo = GETALO;
 636   }
 637
 638   if (lo >= 0x9f)
 639     return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
 640   else
 641     return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
 642             (lo + (lo >= 0x7f ? 0x60 : 0x61));
 643 }
 644
 645 static int  euc3sjis (int hi, int lo, BOOL is_3byte)
 646 {
 647   int w;
 648
 649   w = (int)((hi << 8) | lo);
 650   if (is_3byte) {
 651     if (( 0xf5 <= hi) && ( hi <= 0xfe)) {
 652      /* UDC half high*/
 653      /* this area maps to the G3 UDC area */
 654      /* 0xf8f5a1 -- 0xf8fefe --> 0xf540 -- 0xf9fc */
 655       if (hi & 1) {
 656         return (((hi / 2 + 0x7b) << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61)));
 657       } else {
 658         return (((hi / 2 + 0x7a) << 8) | (lo - 2));
 659       }
 660     } else {
 661       /* Using map table */
 662       int minidx = 0;
 663       int maxidx = EUC3REVTBLSIZ;
 664       int i = minidx + (maxidx - minidx) % 2;
 665
 666       while ( maxidx >= minidx ) {
 667         if (euc3rev[i].euc > w) {
 668           maxidx = i-1;
 669         } else if (euc3rev[i].euc < w) {
 670           minidx = i+1;
 671         } else {
 672           return (euc3rev[i].sjis);
 673         }
 674         i = (int)( minidx + ( maxidx - minidx ) % 2);
 675       }
 676       return ((GETAHI << 8 ) | GETALO);
 677     }
 678   } else { /* is_2byte */
 679     if ((0xf5 <= hi) && (hi <= 0xfe)) {
 680       /* UDC half low*/
 681       /* this area maps to the G2 UDC area */
 682       /* 0xf5a1 -- 0xfefe  --> 0xf040 -- 0xf4fc */
 683       if (hi & 1) {
 684         return (((hi / 2 + 0x76) << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61)));
 685       } else {
 686         return (((hi / 2 + 0x75) << 8) | (lo - 2));
 687       }
 688     } else { /* Normal EUC */
 689       if (hi & 1) {
 690         hi = hi / 2 + (hi < 0xdf ? 0x31 : 0x71);
 691         return ((hi << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61)));
 692       } else {
 693         hi = hi / 2 + (hi < 0xdf ? 0x30 : 0x70);
 694         return ((hi << 8) | (lo - 2));
 695       }
 696     }
 697   }
 698 }
 699
 700 /*******************************************************************
 701  Convert FROM contain SHIFT JIS codes to EUC codes (with SS2)
 702  return converted buffer
 703 ********************************************************************/
 704
 705 static char *sj_to_euc3_static(const char *from)
 706 {
 707   char *out;
 708   int len;
 709
 710   for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4);) {
 711     if (is_shift_jis (*from)) {
 712       int code = sjis3euc ((int) from[0] & 0xff, (int) from[1] & 0xff, &len);
 713       if (len == 3) {
 714         *out++ = (char)euc_sup;
 715       }
 716       *out++ = (code >> 8) & 0xff;
 717       *out++ = code & 0xff;
 718       from += 2;
 719     } else if (is_kana (*from)) {
 720       *out++ = (char)euc_kana;
 721       *out++ = *from++;
 722     } else {
 723       *out++ = *from++;
 724     }
 725   }
 726   *out = 0;
 727   return cvtbuf;
 728 }
 729
 730 static char *sj_to_euc3(char *from)
 731 {
 732   pstrcpy(from, sj_to_euc3_static(from));
 733   return from;
 734 }
 735
 736 /*******************************************************************
 737  Convert FROM contain EUC codes (with Sup-Kanji) to SHIFT JIS codes
 738  return converted buffer
 739 ********************************************************************/
 740
 741 static char *euc3_to_sj_static(const char *from)
 742 {
 743   char *out;
 744
 745   for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3); ) {
 746     if (is_euc_sup (*from)) {
 747       int code = euc3sjis((int) from[1] & 0xff, (int) from[2] & 0xff, True);
 748       *out++ = (code >> 8) & 0xff;
 749       *out++ = code & 0xff;
 750       from += 3;
 751     } else if (is_euc (*from)) {
 752       int code = euc3sjis ((int) from[0] & 0xff, (int) from[1] & 0xff,False);
 753       *out++ = (code >> 8) & 0xff;
 754       *out++ = code & 0xff;
 755       from += 2;
 756     } else if (is_euc_kana (*from)) {
 757       *out++ = from[1];
 758       from += 2;
 759     } else {
 760       *out++ = *from++;
 761     }
 762   }
 763   *out = 0;
 764   return cvtbuf;
 765 }
 766
 767 static char *euc3_to_sj(char *from)
 768 {
 769   pstrcpy(from, euc3_to_sj_static(from));
 770   return from;
 771 }
 772
 773 /*******************************************************************
 774   JIS7,JIS8,JUNET <-> SJIS
 775 ********************************************************************/
 776
 777 static int sjis2jis(int hi, int lo)
 778 {
 779   int minidx = 0;
 780   int maxidx = SJISCONVTBLSIZ -1; /* max index 1 less than number of entries */
 781   int i = (0 + SJISCONVTBLSIZ) % 2;
 782   int w = (int)((hi << 8) | lo);
 783
 784   if ((sjisconv[0].start < w) && (w < sjisconv[SJISCONVTBLSIZ-1].end)) {
 785     while (maxidx >= minidx) {
 786       if (sjisconv[i].start > w) {
 787         maxidx = i-1;
 788       } else if (w > sjisconv[i].end) {
 789         minidx = i+1;
 790       } else {
 791         w -= sjisconv[i].start;
 792         w += sjisconv[i].rstart;
 793         break;
 794       }
 795       i = (int)( minidx + (maxidx-minidx) %2 );
 796     }
 797     hi = (int) ((w >> 8) & 0xff);
 798     lo = (int) (w & 0xff);
 799   }
 800   if (hi >= 0xf0) {
 801      hi = GETAHI;
 802      lo = GETALO;
 803   }
 804   if (lo >= 0x9f)
 805     return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
 806   else
 807     return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) |
 808             (lo - (lo >= 0x7f ? 0x20 : 0x1f));
 809 }
 810
 811 static int jis2sjis(int hi, int lo)
 812 {
 813   int w;
 814   int minidx = 0;
 815   int maxidx = SJISREVTBLSIZ;
 816   int i = 2;
 817
 818   if (hi & 1) {
 819     hi = hi / 2 + (hi < 0x5f ? 0x71 : 0xb1);
 820     w  = (hi << 8) | (lo + (lo >= 0x60 ? 0x20 : 0x1f));
 821   } else {
 822     hi = hi / 2 + (hi < 0x5f ? 0x70 : 0xb0);
 823     w  = (hi << 8) | (lo + 0x7e);
 824   }
 825
 826   if  (( 0x87 < hi ) && ( hi < 0xed )) {
 827     return w;
 828   }
 829   while (maxidx >= minidx) {
 830     if (sjisrev[i].start > w) {
 831       maxidx = i-1;
 832     } else if (w > sjisrev[i].end) {
 833       minidx = i+1;
 834     } else {
 835       w -= sjisrev[i].start;
 836       w += sjisrev[i].rstart;
 837       break;
 838     }
 839     i = (int)( minidx + (maxidx-minidx) %2 );
 840   }
 841   return w;
 842 }
 843
 844 /*******************************************************************
 845  Convert FROM contain JIS codes to SHIFT JIS codes
 846  return converted buffer
 847 ********************************************************************/
 848
 849 static char *jis8_to_sj_static(const char *from)
 850 {
 851   char *out;
 852   int shifted;
 853
 854   shifted = _KJ_ROMAN;
 855   for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) {
 856     if (is_esc (*from)) {
 857       if (is_so1 (from[1]) && is_so2 (from[2])) {
 858         shifted = _KJ_KANJI;
 859         from += 3;
 860       } else if (is_si1 (from[1]) && is_si2 (from[2])) {
 861         shifted = _KJ_ROMAN;
 862         from += 3;
 863       } else { /* sequence error */
 864         goto normal;
 865       }
 866     } else {
 867
 868 normal:
 869
 870       switch (shifted) {
 871       default:
 872       case _KJ_ROMAN:
 873         *out++ = *from++;
 874         break;
 875       case _KJ_KANJI:
 876         {
 877           int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 878           *out++ = (code >> 8) & 0xff;
 879           *out++ = code;
 880           from += 2;
 881           break;
 882         }
 883       }
 884     }
 885   }
 886
 887   *out = 0;
 888   return cvtbuf;
 889 }
 890
 891 static char *jis8_to_sj(char *from)
 892 {
 893   pstrcpy(from, jis8_to_sj_static(from));
 894   return from;
 895 }
 896
 897 /*******************************************************************
 898  Convert FROM contain SHIFT JIS codes to JIS codes
 899  return converted buffer
 900 ********************************************************************/
 901
 902 static char *sj_to_jis8_static(const char *from)
 903 {
 904   char *out;
 905   int shifted;
 906
 907   shifted = _KJ_ROMAN;
 908   for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) {
 909     if (is_shift_jis (*from)) {
 910       int code;
 911       switch (shifted) {
 912       case _KJ_ROMAN: /* to KANJI */
 913         *out++ = jis_esc;
 914         *out++ = jis_so1;
 915         *out++ = jis_kso;
 916         shifted = _KJ_KANJI;
 917         break;
 918       }
 919       code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 920       *out++ = (code >> 8) & 0xff;
 921       *out++ = code;
 922       from += 2;
 923     } else {
 924       switch (shifted) {
 925       case _KJ_KANJI: /* to ROMAN/KANA */
 926         *out++ = jis_esc;
 927         *out++ = jis_si1;
 928         *out++ = jis_ksi;
 929         shifted = _KJ_ROMAN;
 930         break;
 931       }
 932       *out++ = *from++;
 933     }
 934   }
 935
 936   switch (shifted) {
 937   case _KJ_KANJI: /* to ROMAN/KANA */
 938     *out++ = jis_esc;
 939     *out++ = jis_si1;
 940     *out++ = jis_ksi;
 941     shifted = _KJ_ROMAN;
 942     break;
 943   }
 944   *out = 0;
 945   return cvtbuf;
 946 }
 947
 948 static char *sj_to_jis8(char *from)
 949 {
 950   pstrcpy(from, sj_to_jis8_static(from));
 951   return from;
 952 }
 953
 954 /*******************************************************************
 955  Convert FROM contain 7 bits JIS codes to SHIFT JIS codes
 956  return converted buffer
 957 ********************************************************************/
 958
 959 static char *jis7_to_sj_static(const char *from)
 960 {
 961     char *out;
 962     int shifted;
 963
 964     shifted = _KJ_ROMAN;
 965     for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) {
 966         if (is_esc (*from)) {
 967             if (is_so1 (from[1]) && is_so2 (from[2])) {
 968                 shifted = _KJ_KANJI;
 969                 from += 3;
 970             } else if (is_si1 (from[1]) && is_si2 (from[2])) {
 971                 shifted = _KJ_ROMAN;
 972                 from += 3;
 973             } else {                    /* sequence error */
 974                 goto normal;
 975             }
 976         } else if (is_so (*from)) {
 977             shifted = _KJ_KANA;         /* to KANA */
 978             from++;
 979         } else if (is_si (*from)) {
 980             shifted = _KJ_ROMAN;        /* to ROMAN */
 981             from++;
 982         } else {
 983         normal:
 984             switch (shifted) {
 985             default:
 986             case _KJ_ROMAN:
 987                 *out++ = *from++;
 988                 break;
 989             case _KJ_KANJI:
 990                 {
 991                     int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
 992                     *out++ = (code >> 8) & 0xff;
 993                     *out++ = code;
 994                     from += 2;
 995                 }
 996                 break;
 997             case _KJ_KANA:
 998                 *out++ = ((int) from[0]) + 0x80;
 999                 break;
1000             }
1001         }
1002     }
1003     *out = 0;
1004     return cvtbuf;
1005 }
1006
1007 static char *jis7_to_sj(char *from)
1008 {
1009   pstrcpy(from, jis7_to_sj_static(from));
1010   return from;
1011 }
1012
1013 /*******************************************************************
1014  Convert FROM contain SHIFT JIS codes to 7 bits JIS codes
1015  return converted buffer
1016 ********************************************************************/
1017
1018 static char *sj_to_jis7_static(const char *from)
1019 {
1020     char *out;
1021     int shifted;
1022
1023     shifted = _KJ_ROMAN;
1024     for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) {
1025         if (is_shift_jis (*from)) {
1026             int code;
1027             switch (shifted) {
1028             case _KJ_KANA:
1029                 *out++ = jis_si;        /* to ROMAN and through down */
1030             case _KJ_ROMAN:             /* to KANJI */
1031                 *out++ = jis_esc;
1032                 *out++ = jis_so1;
1033                 *out++ = jis_kso;
1034                 shifted = _KJ_KANJI;
1035                 break;
1036             }
1037             code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
1038             *out++ = (code >> 8) & 0xff;
1039             *out++ = code;
1040             from += 2;
1041         } else if (is_kana (from[0])) {
1042             switch (shifted) {
1043             case _KJ_KANJI:             /* to ROMAN */
1044                 *out++ = jis_esc;
1045                 *out++ = jis_si1;
1046                 *out++ = jis_ksi;
1047             case _KJ_ROMAN:             /* to KANA */
1048                 *out++ = jis_so;
1049                 shifted = _KJ_KANA;
1050                 break;
1051             }
1052             *out++ = ((int) *from++) - 0x80;
1053         } else {
1054             switch (shifted) {
1055             case _KJ_KANA:
1056                 *out++ = jis_si;        /* to ROMAN */
1057                 shifted = _KJ_ROMAN;
1058                 break;
1059             case _KJ_KANJI:             /* to ROMAN */
1060                 *out++ = jis_esc;
1061                 *out++ = jis_si1;
1062                 *out++ = jis_ksi;
1063                 shifted = _KJ_ROMAN;
1064                 break;
1065             }
1066             *out++ = *from++;
1067         }
1068     }
1069     switch (shifted) {
1070     case _KJ_KANA:
1071         *out++ = jis_si;                /* to ROMAN */
1072         break;
1073     case _KJ_KANJI:                     /* to ROMAN */
1074         *out++ = jis_esc;
1075         *out++ = jis_si1;
1076         *out++ = jis_ksi;
1077         break;
1078     }
1079     *out = 0;
1080     return cvtbuf;
1081 }
1082
1083 static char *sj_to_jis7(char *from)
1084 {
1085   pstrcpy(from, sj_to_jis7_static(from));
1086   return from;
1087 }
1088
1089 /*******************************************************************
1090  Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes
1091  return converted buffer
1092 ********************************************************************/
1093
1094 static char *junet_to_sj_static(const char *from)
1095 {
1096     char *out;
1097     int shifted;
1098
1099     shifted = _KJ_ROMAN;
1100     for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) {
1101         if (is_esc (*from)) {
1102             if (is_so1 (from[1]) && is_so2 (from[2])) {
1103                 shifted = _KJ_KANJI;
1104                 from += 3;
1105             } else if (is_si1 (from[1]) && is_si2 (from[2])) {
1106                 shifted = _KJ_ROMAN;
1107                 from += 3;
1108             } else if (is_juk1(from[1]) && is_juk2 (from[2])) {
1109                 shifted = _KJ_KANA;
1110                 from += 3;
1111             } else {                    /* sequence error */
1112                 goto normal;
1113             }
1114         } else {
1115         normal:
1116             switch (shifted) {
1117             default:
1118             case _KJ_ROMAN:
1119                 *out++ = *from++;
1120                 break;
1121             case _KJ_KANJI:
1122                 {
1123                     int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
1124                     *out++ = (code >> 8) & 0xff;
1125                     *out++ = code;
1126                     from += 2;
1127                 }
1128                 break;
1129             case _KJ_KANA:
1130                 *out++ = ((int) from[0]) + 0x80;
1131                 break;
1132             }
1133         }
1134     }
1135     *out = 0;
1136     return cvtbuf;
1137 }
1138
1139 static char *junet_to_sj(char *from)
1140 {
1141   pstrcpy(from, junet_to_sj_static(from));
1142   return from;
1143 }
1144
1145 /*******************************************************************
1146  Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes
1147  return converted buffer
1148 ********************************************************************/
1149
1150 static char *sj_to_junet_static(const char *from)
1151 {
1152     char *out;
1153     int shifted;
1154
1155     shifted = _KJ_ROMAN;
1156     for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) {
1157         if (is_shift_jis (*from)) {
1158             int code;
1159             switch (shifted) {
1160             case _KJ_KANA:
1161             case _KJ_ROMAN:             /* to KANJI */
1162                 *out++ = jis_esc;
1163                 *out++ = jis_so1;
1164                 *out++ = jis_so2;
1165                 shifted = _KJ_KANJI;
1166                 break;
1167             }
1168             code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
1169             *out++ = (code >> 8) & 0xff;
1170             *out++ = code;
1171             from += 2;
1172         } else if (is_kana (from[0])) {
1173             switch (shifted) {
1174             case _KJ_KANJI:             /* to ROMAN */
1175             case _KJ_ROMAN:             /* to KANA */
1176                 *out++ = jis_esc;
1177                 *out++ = junet_kana1;
1178                 *out++ = junet_kana2;
1179                 shifted = _KJ_KANA;
1180                 break;
1181             }
1182             *out++ = ((int) *from++) - 0x80;
1183         } else {
1184             switch (shifted) {
1185             case _KJ_KANA:
1186             case _KJ_KANJI:             /* to ROMAN */
1187                 *out++ = jis_esc;
1188                 *out++ = jis_si1;
1189                 *out++ = jis_si2;
1190                 shifted = _KJ_ROMAN;
1191                 break;
1192             }
1193             *out++ = *from++;
1194         }
1195     }
1196     switch (shifted) {
1197     case _KJ_KANA:
1198     case _KJ_KANJI:                     /* to ROMAN */
1199         *out++ = jis_esc;
1200         *out++ = jis_si1;
1201         *out++ = jis_si2;
1202         break;
1203     }
1204     *out = 0;
1205     return cvtbuf;
1206 }
1207
1208 static char *sj_to_junet(char *from)
1209 {
1210   pstrcpy(from, sj_to_junet_static(from));
1211   return from;
1212 }
1213
1214 /*******************************************************************
1215   HEX <-> SJIS
1216 ********************************************************************/
1217 /* ":xx" -> a byte */
1218
1219 static char *hex_to_sj_static(const char *from)
1220 {
1221     const char *sp;
1222     char *dp;
1223
1224     sp = from;
1225     dp = cvtbuf;
1226     while (*sp && (dp - cvtbuf < sizeof(cvtbuf)-3)) {
1227         if (*sp == hex_tag && isxdigit((int)sp[1]) && isxdigit((int)sp[2])) {
1228             *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
1229             sp += 3;
1230         } else
1231             *dp++ = *sp++;
1232     }
1233     *dp = '\0';
1234     return cvtbuf;
1235 }
1236
1237 static char *hex_to_sj(char *from)
1238 {
1239   pstrcpy(from, hex_to_sj_static(from));
1240   return from;
1241 }
1242
1243 /*******************************************************************
1244   kanji/kana -> ":xx"
1245 ********************************************************************/
1246
1247 static char *sj_to_hex_static(const char *from)
1248 {
1249     const unsigned char *sp;
1250     unsigned char *dp;
1251
1252     sp = (const uchar *)from;
1253     dp = (unsigned char*) cvtbuf;
1254     while (*sp && (((char *)dp)- cvtbuf < sizeof(cvtbuf)-7)) {
1255         if (is_kana(*sp)) {
1256             *dp++ = hex_tag;
1257             *dp++ = bin2hex (((*sp)>>4)&0x0f);
1258             *dp++ = bin2hex ((*sp)&0x0f);
1259             sp++;
1260         } else if (is_shift_jis (*sp) && is_shift_jis2 (sp[1])) {
1261             *dp++ = hex_tag;
1262             *dp++ = bin2hex (((*sp)>>4)&0x0f);
1263             *dp++ = bin2hex ((*sp)&0x0f);
1264             sp++;
1265             *dp++ = hex_tag;
1266             *dp++ = bin2hex (((*sp)>>4)&0x0f);
1267             *dp++ = bin2hex ((*sp)&0x0f);
1268             sp++;
1269         } else
1270             *dp++ = *sp++;
1271     }
1272     *dp = '\0';
1273     return cvtbuf;
1274 }
1275
1276 static char *sj_to_hex(char *from)
1277 {
1278   pstrcpy(from, sj_to_hex_static(from));
1279   return from;
1280 }
1281
1282 /*******************************************************************
1283   CAP <-> SJIS
1284 ********************************************************************/
1285 /* ":xx" CAP -> a byte */
1286 static char *cap_to_sj_static(const char *from)
1287 {
1288     const char *sp;
1289     char *dp;
1290
1291     sp = (const char *) from;
1292     dp = cvtbuf;
1293     while (*sp && (dp- cvtbuf < sizeof(cvtbuf)-2)) {
1294         /*
1295          * The only change between this and hex_to_sj is here. sj_to_cap only
1296          * translates characters greater or equal to 0x80 - make sure that here
1297          * we only do the reverse (that's why the strchr is used rather than
1298          * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson).
1299          */
1300         if (*sp == hex_tag && (strchr ("89abcdefABCDEF", sp[1]) != NULL) && isxdigit((int)sp[2])) {
1301             *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
1302             sp += 3;
1303         } else
1304             *dp++ = *sp++;
1305     }
1306     *dp = '\0';
1307     return cvtbuf;
1308 }
1309
1310 static char *cap_to_sj(char *from)
1311 {
1312   pstrcpy(from, cap_to_sj_static(from));
1313   return from;
1314 }
1315
1316 /*******************************************************************
1317   kanji/kana -> ":xx" - CAP format.
1318 ********************************************************************/
1319 static char *sj_to_cap_static(const char *from)
1320 {
1321     const unsigned char *sp;
1322     unsigned char *dp;
1323
1324     sp = (const uchar *)from;
1325     dp = (unsigned char*) cvtbuf;
1326     while (*sp && (((char *)dp) - cvtbuf < sizeof(cvtbuf)-4)) {
1327         if (*sp >= 0x80) {
1328             *dp++ = hex_tag;
1329             *dp++ = bin2hex (((*sp)>>4)&0x0f);
1330             *dp++ = bin2hex ((*sp)&0x0f);
1331             sp++;
1332         } else {
1333             *dp++ = *sp++;
1334         }
1335     }
1336     *dp = '\0';
1337     return cvtbuf;
1338 }
1339
1340 static char *sj_to_cap(char *from)
1341 {
1342   pstrcpy(from, sj_to_cap_static(from));
1343   return from;
1344 }
1345
1346 /*******************************************************************
1347  sj to sj
1348 ********************************************************************/
1349
1350 static char *sj_to_sj_static(const char *from)
1351 {
1352         pstrcpy (cvtbuf, from);
1353         return cvtbuf;
1354 }
1355
1356 static char *sj_to_sj(char *from)
1357 {
1358         return from;
1359 }
1360
1361 /*******************************************************************
1362  cp to utf8
1363 ********************************************************************/
1364 static char *cp_to_utf8_static(const char *from)
1365 {
1366   unsigned char *dst;
1367   const unsigned char *src;
1368   smb_ucs2_t val;
1369   int w;
1370   size_t len;
1371
1372   src = (const unsigned char *)from;
1373   dst = (unsigned char *)cvtbuf;
1374   while (*src && (((char *)dst - cvtbuf) < sizeof(cvtbuf)-4)) {
1375     len = _skip_multibyte_char(*src);
1376     if ( len == 2 ) {
1377       w = (int)(*src++ & 0xff);
1378       w = (int)((w << 8)|(*src++ & 0xff));
1379     } else {
1380       w = (int)(*src++ & 0xff);
1381     }
1382     val = doscp2ucs2(w);
1383
1384     if ( val <= 0x7f ) {
1385       *dst++ = (char)(val & 0xff);
1386     } else if ( val <= 0x7ff ){
1387       *dst++ = (char)( 0xc0 | ((val >> 6) & 0xff));
1388       *dst++ = (char)( 0x80 | ( val & 0x3f ));
1389     } else {
1390       *dst++ = (char)( 0xe0 | ((val >> 12) & 0x0f));
1391       *dst++ = (char)( 0x80 | ((val >> 6)  & 0x3f));
1392       *dst++ = (char)( 0x80 | (val & 0x3f));
1393     }
1394
1395   }
1396   *dst++='\0';
1397   return cvtbuf;
1398 }
1399
1400 static char *cp_to_utf8(char *from)
1401 {
1402   pstrcpy(from, cp_to_utf8_static(from));
1403   return from;
1404 }
1405
1406 /*******************************************************************
1407  utf8 to cp
1408 ********************************************************************/
1409 static char *utf8_to_cp_static(const char *from)
1410 {
1411   const unsigned char *src;
1412   unsigned char *dst;
1413   smb_ucs2_t val;
1414   int w;
1415
1416   src = (const unsigned char *)from;
1417   dst = (unsigned char *)cvtbuf;
1418
1419   while (*src && ((char *)dst - cvtbuf < sizeof(cvtbuf)-4)) {
1420     val = (*src++ & 0xff);
1421     if (val < 0x80) {
1422       *dst++ = (char)(val & 0x7f);
1423     } else if ((0xc0 <= val) && (val <= 0xdf)
1424                && (0x80 <= *src) && (*src <= 0xbf)) {
1425       w = ucs2doscp( ((val & 31) << 6)  | ((*src++) & 63 ));
1426       *dst++ = (char)((w >> 8) & 0xff);
1427       *dst++ = (char)(w & 0xff);
1428     } else {
1429       val  = (val & 0x0f) << 12;
1430       val |= ((*src++ & 0x3f) << 6);
1431       val |= (*src++ & 0x3f);
1432       w = ucs2doscp(val);
1433       *dst++ = (char)((w >> 8) & 0xff);
1434       *dst++ = (char)(w & 0xff);
1435     }
1436   }
1437   *dst++='\0';
1438   return cvtbuf;
1439 }
1440
1441 static char *utf8_to_cp(char *from)
1442 {
1443   pstrcpy(from, utf8_to_cp_static(from));
1444   return from;
1445 }
1446
1447 /************************************************************************
1448  conversion:
1449  _dos_to_unix           _unix_to_dos
1450 ************************************************************************/
1451
1452 static void setup_string_function(int codes)
1453 {
1454     switch (codes) {
1455     default:
1456         _dos_to_unix = dos2unix_format;
1457         _dos_to_unix_static = dos2unix_format_static;
1458         _unix_to_dos = unix2dos_format;
1459         _unix_to_dos_static = unix2dos_format_static;
1460         break;
1461
1462     case SJIS_CODE:
1463         _dos_to_unix = sj_to_sj;
1464         _dos_to_unix_static = sj_to_sj_static;
1465         _unix_to_dos = sj_to_sj;
1466         _unix_to_dos_static = sj_to_sj_static;
1467         break;
1468
1469     case EUC_CODE:
1470         _dos_to_unix = sj_to_euc;
1471         _dos_to_unix_static = sj_to_euc_static;
1472         _unix_to_dos = euc_to_sj;
1473         _unix_to_dos_static = euc_to_sj_static;
1474         break;
1475
1476     case JIS7_CODE:
1477         _dos_to_unix = sj_to_jis7;
1478         _dos_to_unix_static = sj_to_jis7_static;
1479         _unix_to_dos = jis7_to_sj;
1480         _unix_to_dos_static = jis7_to_sj_static;
1481         break;
1482
1483     case JIS8_CODE:
1484         _dos_to_unix = sj_to_jis8;
1485         _dos_to_unix_static = sj_to_jis8_static;
1486         _unix_to_dos = jis8_to_sj;
1487         _unix_to_dos_static = jis8_to_sj_static;
1488         break;
1489
1490     case JUNET_CODE:
1491         _dos_to_unix = sj_to_junet;
1492         _dos_to_unix_static = sj_to_junet_static;
1493         _unix_to_dos = junet_to_sj;
1494         _unix_to_dos_static = junet_to_sj_static;
1495         break;
1496
1497     case HEX_CODE:
1498         _dos_to_unix = sj_to_hex;
1499         _dos_to_unix_static = sj_to_hex_static;
1500         _unix_to_dos = hex_to_sj;
1501         _unix_to_dos_static = hex_to_sj_static;
1502         break;
1503
1504     case CAP_CODE:
1505         _dos_to_unix = sj_to_cap;
1506         _dos_to_unix_static = sj_to_cap_static;
1507         _unix_to_dos = cap_to_sj;
1508         _unix_to_dos_static = cap_to_sj_static;
1509         break;
1510
1511     case UTF8_CODE:
1512         _dos_to_unix = cp_to_utf8;
1513         _dos_to_unix_static = cp_to_utf8_static;
1514         _unix_to_dos = utf8_to_cp;
1515         _unix_to_dos_static = utf8_to_cp_static;
1516         break;
1517
1518     case EUC3_CODE:
1519         _dos_to_unix = sj_to_euc3;
1520         _dos_to_unix_static = sj_to_euc3_static;
1521         _unix_to_dos = euc3_to_sj;
1522         _unix_to_dos_static = euc3_to_sj_static;
1523         break;
1524     }
1525 }
1526
1527 /************************************************************************
1528  Interpret coding system.
1529 ************************************************************************/
1530
1531 void interpret_coding_system(char *str)
1532 {
1533     int codes = UNKNOWN_CODE;
1534
1535     if (strequal (str, "sjis")) {
1536         codes = SJIS_CODE;
1537     } else if (strequal (str, "euc")) {
1538         codes = EUC_CODE;
1539     } else if (strequal (str, "cap")) {
1540         codes = CAP_CODE;
1541         hex_tag = HEXTAG;
1542     } else if (strequal (str, "hex")) {
1543         codes = HEX_CODE;
1544         hex_tag = HEXTAG;
1545     } else if (!strncasecmp (str, "hex", 3)) {
1546         codes = HEX_CODE;
1547         hex_tag = (str[3] ? str[3] : HEXTAG);
1548     } else if (strequal (str, "j8bb")) {
1549         codes = JIS8_CODE;
1550         jis_kso = 'B';
1551         jis_ksi = 'B';
1552     } else if (strequal (str, "j8bj") || strequal (str, "jis8")) {
1553         codes = JIS8_CODE;
1554         jis_kso = 'B';
1555         jis_ksi = 'J';
1556     } else if (strequal (str, "j8bh")) {
1557         codes = JIS8_CODE;
1558         jis_kso = 'B';
1559         jis_ksi = 'H';
1560     } else if (strequal (str, "j8@b")) {
1561         codes = JIS8_CODE;
1562         jis_kso = '@';
1563         jis_ksi = 'B';
1564     } else if (strequal (str, "j8@j")) {
1565         codes = JIS8_CODE;
1566         jis_kso = '@';
1567         jis_ksi = 'J';
1568     } else if (strequal (str, "j8@h")) {
1569         codes = JIS8_CODE;
1570         jis_kso = '@';
1571         jis_ksi = 'H';
1572     } else if (strequal (str, "j7bb")) {
1573         codes = JIS7_CODE;
1574         jis_kso = 'B';
1575         jis_ksi = 'B';
1576     } else if (strequal (str, "j7bj") || strequal (str, "jis7")) {
1577         codes = JIS7_CODE;
1578         jis_kso = 'B';
1579         jis_ksi = 'J';
1580     } else if (strequal (str, "j7bh")) {
1581         codes = JIS7_CODE;
1582         jis_kso = 'B';
1583         jis_ksi = 'H';
1584     } else if (strequal (str, "j7@b")) {
1585         codes = JIS7_CODE;
1586         jis_kso = '@';
1587         jis_ksi = 'B';
1588     } else if (strequal (str, "j7@j")) {
1589         codes = JIS7_CODE;
1590         jis_kso = '@';
1591         jis_ksi = 'J';
1592     } else if (strequal (str, "j7@h")) {
1593         codes = JIS7_CODE;
1594         jis_kso = '@';
1595         jis_ksi = 'H';
1596     } else if (strequal (str, "jubb")) {
1597         codes = JUNET_CODE;
1598         jis_kso = 'B';
1599         jis_ksi = 'B';
1600     } else if (strequal (str, "jubj") || strequal (str, "junet")) {
1601         codes = JUNET_CODE;
1602         jis_kso = 'B';
1603         jis_ksi = 'J';
1604     } else if (strequal (str, "jubh")) {
1605         codes = JUNET_CODE;
1606         jis_kso = 'B';
1607         jis_ksi = 'H';
1608     } else if (strequal (str, "ju@b")) {
1609         codes = JUNET_CODE;
1610         jis_kso = '@';
1611         jis_ksi = 'B';
1612     } else if (strequal (str, "ju@j")) {
1613         codes = JUNET_CODE;
1614         jis_kso = '@';
1615         jis_ksi = 'J';
1616     } else if (strequal (str, "ju@h")) {
1617         codes = JUNET_CODE;
1618         jis_kso = '@';
1619         jis_ksi = 'H';
1620     } else if (strequal (str, "utf8")) {
1621       codes = UTF8_CODE;
1622     } else if (strequal (str, "euc3")) {
1623       codes = EUC3_CODE;
1624     }
1625     setup_string_function (codes);
1626 }
1627
1628 /*******************************************************************
1629  Non multibyte char function.
1630 *******************************************************************/
1631
1632 static size_t skip_non_multibyte_char(char c)
1633 {
1634   return 0;
1635 }
1636
1637 /*******************************************************************
1638  Function that always says a character isn't multibyte.
1639 *******************************************************************/
1640
1641 static BOOL not_multibyte_char_1(char c)
1642 {
1643   return False;
1644 }
1645
1646 /*******************************************************************
1647  Setup the function pointers for the functions that are replaced
1648  when multi-byte codepages are used.
1649
1650  The dos_to_unix and unix_to_dos function pointers are only
1651  replaced by setup_string_function called by interpret_coding_system
1652  above.
1653 *******************************************************************/
1654
1655 void initialize_multibyte_vectors( int client_codepage)
1656 {
1657   switch( client_codepage )
1658   {
1659   case KANJI_CODEPAGE:
1660     multibyte_strchr = sj_strchr;
1661     multibyte_strrchr = sj_strrchr;
1662     multibyte_strstr = sj_strstr;
1663     multibyte_strtok = sj_strtok;
1664     _skip_multibyte_char = skip_kanji_multibyte_char;
1665     is_multibyte_char_1 = is_kanji_multibyte_char_1;
1666     global_is_multibyte_codepage = True;
1667     break;
1668   case HANGUL_CODEPAGE:
1669     multibyte_strchr = generic_multibyte_strchr;
1670     multibyte_strrchr = generic_multibyte_strrchr;
1671     multibyte_strstr = generic_multibyte_strstr;
1672     multibyte_strtok = generic_multibyte_strtok;
1673     _skip_multibyte_char = skip_generic_multibyte_char;
1674     is_multibyte_char_1 = hangul_is_multibyte_char_1;
1675     global_is_multibyte_codepage = True;
1676     break;
1677   case BIG5_CODEPAGE:
1678     multibyte_strchr = generic_multibyte_strchr;
1679     multibyte_strrchr = generic_multibyte_strrchr;
1680     multibyte_strstr = generic_multibyte_strstr;
1681     multibyte_strtok = generic_multibyte_strtok;
1682     _skip_multibyte_char = skip_generic_multibyte_char;
1683     is_multibyte_char_1 = big5_is_multibyte_char_1;
1684     global_is_multibyte_codepage = True;
1685     break;
1686   case SIMPLIFIED_CHINESE_CODEPAGE:
1687     multibyte_strchr = generic_multibyte_strchr;
1688     multibyte_strrchr = generic_multibyte_strrchr;
1689     multibyte_strstr = generic_multibyte_strstr;
1690     multibyte_strtok = generic_multibyte_strtok;
1691     _skip_multibyte_char = skip_generic_multibyte_char;
1692     is_multibyte_char_1 = simpch_is_multibyte_char_1;
1693     global_is_multibyte_codepage = True;
1694     break;
1695   /*
1696    * Single char size code page.
1697    */
1698   default:
1699     multibyte_strchr = (const char *(*)(const char *, int )) strchr;
1700     multibyte_strrchr = (const char *(*)(const char *, int )) strrchr;
1701     multibyte_strstr = (const char *(*)(const char *, const char *)) strstr;
1702     multibyte_strtok = (char *(*)(char *, const char *)) strtok;
1703     _skip_multibyte_char = skip_non_multibyte_char;
1704     is_multibyte_char_1 = not_multibyte_char_1;
1705     global_is_multibyte_codepage = False;
1706     break;
1707   }
1708 }
1709 /* *******************************************************
1710    function(s) for "dynamic" encoding of SWAT output.
1711    in this version, only dos_to_dos, dos_to_unix, unix_to_dos
1712    are used for bug fix. conversion to web encoding
1713    (to catalog file encoding) is not needed because
1714    they are using same character codes.
1715    **************************************************** */
1716 static char *no_conversion_static(const char *str)
1717 {
1718        static pstring temp;
1719        pstrcpy(temp, str);
1720        return temp;
1721 }
1722 char *(*_dos_to_dos_static)(const char *) = no_conversion_static;