source/lib/util_unistr.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 1.9.
   4    Samba utility functions
   5    Copyright (C) Andrew Tridgell 1992-1998
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 */
  21
  22 #include "includes.h"
  23
  24  smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
  25                                                                 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
  26                                                                 (smb_ucs2_t)'\r', 0 };
  27 /*
  28  * The following are the codepage to ucs2 and vica versa maps.
  29  * These are dynamically loaded from a unicode translation file.
  30  */
  31
  32 static smb_ucs2_t *doscp_to_ucs2;
  33 static uint16 *ucs2_to_doscp;
  34
  35 static smb_ucs2_t *unixcp_to_ucs2;
  36 static uint16 *ucs2_to_unixcp;
  37
  38 #ifndef MAXUNI
  39 #define MAXUNI 1024
  40 #endif
  41
  42 /*******************************************************************
  43  Write a string in (little-endian) unicode format. src is in
  44  the current UNIX character set. len is the length in bytes of the
  45  string pointed to by dst.
  46
  47  if null_terminate is True then null terminate the packet (adds 2 bytes)
  48
  49  the return value is the length in bytes consumed by the string, including the
  50  null termination if applied
  51 ********************************************************************/
  52
  53 size_t unix_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
  54 {
  55         size_t ret = 0;
  56         while (*src && (len >= 2)) {
  57                 size_t skip = get_character_len(*src);
  58                 smb_ucs2_t val = (*src & 0xff);
  59
  60                 /*
  61                  * If this is a multibyte character (and all DOS/Windows
  62                  * codepages have at maximum 2 byte multibyte characters)
  63                  * then work out the index value for the unicode conversion.
  64                  */
  65
  66                 if (skip == 2)
  67                         val = ((val << 8) | (src[1] & 0xff));
  68
  69                 SSVAL(dst,ret,unixcp_to_ucs2[val]);
  70                 ret += 2;
  71                 len -= 2;
  72                 if (skip)
  73                         src += skip;
  74                 else
  75                         src++;
  76         }
  77         if (null_terminate) {
  78                 SSVAL(dst,ret,0);
  79                 ret += 2;
  80         }
  81         return(ret);
  82 }
  83
  84 /*******************************************************************
  85  Write a string in (little-endian) unicode format. src is in
  86  the current DOS codepage. len is the length in bytes of the
  87  string pointed to by dst.
  88
  89  if null_terminate is True then null terminate the packet (adds 2 bytes)
  90
  91  the return value is the length in bytes consumed by the string, including the
  92  null termination if applied
  93 ********************************************************************/
  94
  95 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
  96 {
  97         size_t ret = 0;
  98         while (*src && (len >= 2)) {
  99                 size_t skip = get_character_len(*src);
 100                 smb_ucs2_t val = (*src & 0xff);
 101
 102                 /*
 103                  * If this is a multibyte character (and all DOS/Windows
 104                  * codepages have at maximum 2 byte multibyte characters)
 105                  * then work out the index value for the unicode conversion.
 106                  */
 107
 108                 if (skip == 2)
 109                         val = ((val << 8) | (src[1] & 0xff));
 110
 111                 SSVAL(dst,ret,doscp_to_ucs2[val]);
 112                 ret += 2;
 113                 len -= 2;
 114                 if (skip)
 115                         src += skip;
 116                 else
 117                         src++;
 118         }
 119         if (null_terminate) {
 120                 SSVAL(dst,ret,0);
 121                 ret += 2;
 122         }
 123         return(ret);
 124 }
 125
 126 /*******************************************************************
 127  Pull a DOS codepage string out of a UNICODE array. len is in bytes.
 128 ********************************************************************/
 129
 130 void unistr_to_dos(char *dest, const char *src, size_t len)
 131 {
 132         char *destend = dest + len;
 133
 134         while (dest < destend) {
 135                 uint16 ucs2_val = SVAL(src,0);
 136                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 137
 138                 src += 2;
 139
 140                 if (ucs2_val == 0)
 141                         break;
 142
 143                 if (cp_val < 256)
 144                         *dest++ = (char)cp_val;
 145                 else {
 146                         *dest++ = (cp_val >> 8) & 0xff;
 147                         *dest++ = (cp_val & 0xff);
 148                 }
 149         }
 150
 151         *dest = 0;
 152 }
 153
 154 /*******************************************************************
 155  Skip past a unicode string, but not more than len. Always move
 156  past a terminating zero if found.
 157 ********************************************************************/
 158
 159 char *skip_unibuf(char *src, size_t len)
 160 {
 161     char *srcend = src + len;
 162
 163     while (src < srcend && SVAL(src,0))
 164         src += 2;
 165
 166     if(!SVAL(src,0))
 167         src += 2;
 168
 169     return src;
 170 }
 171
 172 /*******************************************************************
 173  Return a DOS codepage version of a little-endian unicode string.
 174  len is the filename length (ignoring any terminating zero) in uin16
 175  units. Always null terminates.
 176  Hack alert: uses fixed buffer(s).
 177  len is in 2 byte (unicode) units.
 178 ********************************************************************/
 179
 180 char *dos_unistrn2(uint16 *src, int len)
 181 {
 182         static char lbufs[8][MAXUNI];
 183         static int nexti;
 184         char *lbuf = lbufs[nexti];
 185         char *p;
 186
 187         nexti = (nexti+1)%8;
 188
 189         for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
 190                 uint16 ucs2_val = SVAL(src,0);
 191                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 192
 193                 if (cp_val < 256)
 194                         *p++ = (char)cp_val;
 195                 else {
 196                         *p++ = (cp_val >> 8) & 0xff;
 197                         *p++ = (cp_val & 0xff);
 198                 }
 199         }
 200
 201         *p = 0;
 202         return lbuf;
 203 }
 204
 205 static char lbufs[8][MAXUNI];
 206 static int nexti;
 207
 208 /*******************************************************************
 209  Return a DOS codepage version of a little-endian unicode string.
 210  Hack alert: uses fixed buffer(s).
 211 ********************************************************************/
 212
 213 char *dos_unistr2(uint16 *src)
 214 {
 215         char *lbuf = lbufs[nexti];
 216         char *p;
 217
 218         nexti = (nexti+1)%8;
 219
 220         for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
 221                 uint16 ucs2_val = SVAL(src,0);
 222                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 223
 224                 if (cp_val < 256)
 225                         *p++ = (char)cp_val;
 226                 else {
 227                         *p++ = (cp_val >> 8) & 0xff;
 228                         *p++ = (cp_val & 0xff);
 229                 }
 230         }
 231
 232         *p = 0;
 233         return lbuf;
 234 }
 235
 236 /*******************************************************************
 237 Return a DOS codepage version of a little-endian unicode string
 238 ********************************************************************/
 239
 240 char *dos_unistr2_to_str(UNISTR2 *str)
 241 {
 242         char *lbuf = lbufs[nexti];
 243         char *p;
 244         uint16 *src = str->buffer;
 245
 246         nexti = (nexti+1)%8;
 247
 248         for (p = lbuf; (p - lbuf < MAXUNI-3) && (src - str->buffer < str->uni_str_len) && *src; src++) {
 249                 uint16 ucs2_val = SVAL(src,0);
 250                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 251
 252                 if (cp_val < 256)
 253                         *p++ = (char)cp_val;
 254                 else {
 255                         *p++ = (cp_val >> 8) & 0xff;
 256                         *p++ = (cp_val & 0xff);
 257                 }
 258         }
 259
 260         *p = 0;
 261         return lbuf;
 262 }
 263
 264 /*******************************************************************
 265  Put an ASCII string into a UNICODE array (uint16's).
 266  use little-endian ucs2
 267  ********************************************************************/
 268 void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
 269 {
 270         uint16 *destend = dest + maxlen;
 271         char c;
 272
 273         while (dest < destend) {
 274                 c = *(src++);
 275                 if (c == 0)
 276                         break;
 277
 278                 SSVAL(dest, 0, c);
 279                 dest++;
 280         }
 281
 282         *dest = 0;
 283 }
 284
 285 /*******************************************************************
 286  Pull an ASCII string out of a UNICODE array (uint16's).
 287  ********************************************************************/
 288
 289 void unistr_to_ascii(char *dest, const uint16 *src, int len)
 290 {
 291         char *destend = dest + len;
 292         uint16 c;
 293
 294         if (src == NULL) {
 295                 *dest = '\0';
 296                 return;
 297         }
 298
 299         /* normal code path for a valid 'src' */
 300         while (dest < destend) {
 301                 c = SVAL(src, 0);
 302                 src++;
 303                 if (c == 0)
 304                         break;
 305
 306                 *(dest++) = (char)c;
 307         }
 308
 309         *dest = 0;
 310         return;
 311 }
 312
 313 /*******************************************************************
 314  Convert a (little-endian) UNISTR2 structure to an ASCII string, either
 315  DOS or UNIX codepage.
 316 ********************************************************************/
 317
 318 static void unistr2_to_mbcp(char *dest, const UNISTR2 *str, size_t maxlen, uint16 *ucs2_to_mbcp)
 319 {
 320         char *p;
 321         uint16 *src;
 322         size_t len;
 323
 324         if (str == NULL) {
 325                 *dest='\0';
 326                 return;
 327         }
 328
 329         src = str->buffer;
 330
 331         len = MIN(str->uni_str_len, maxlen);
 332         if (len == 0) {
 333                 *dest='\0';
 334                 return;
 335         }
 336
 337         for (p = dest; (p-dest < maxlen-3) && (src - str->buffer < str->uni_str_len) && *src; src++) {
 338                 uint16 ucs2_val = SVAL(src,0);
 339                 uint16 cp_val = ucs2_to_mbcp[ucs2_val];
 340
 341                 if (cp_val < 256)
 342                         *p++ = (char)cp_val;
 343                 else {
 344                         *p++ = (cp_val >> 8) & 0xff;
 345                         *p++ = (cp_val & 0xff);
 346                 }
 347         }
 348
 349         *p = 0;
 350 }
 351
 352 /*******************************************************************
 353  Convert a (little-endian) UNISTR2 structure to an ASCII string
 354  Warning: this version does DOS codepage.
 355 ********************************************************************/
 356
 357 void unistr2_to_dos(char *dest, const UNISTR2 *str, size_t maxlen)
 358 {
 359         unistr2_to_mbcp(dest, str, maxlen, ucs2_to_doscp);
 360 }
 361
 362 /*******************************************************************
 363  Convert a (little-endian) UNISTR2 structure to an ASCII string
 364  Warning: this version does UNIX codepage.
 365 ********************************************************************/
 366
 367 void unistr2_to_unix(char *dest, const UNISTR2 *str, size_t maxlen)
 368 {
 369         unistr2_to_mbcp(dest, str, maxlen, ucs2_to_unixcp);
 370 }
 371
 372 /*******************************************************************
 373 Return a number stored in a buffer
 374 ********************************************************************/
 375
 376 uint32 buffer2_to_uint32(BUFFER2 *str)
 377 {
 378         if (str->buf_len == 4)
 379                 return IVAL(str->buffer, 0);
 380         else
 381                 return 0;
 382 }
 383
 384 /*******************************************************************
 385 Return a DOS codepage version of a NOTunicode string
 386 ********************************************************************/
 387
 388 char *dos_buffer2_to_str(BUFFER2 *str)
 389 {
 390         char *lbuf = lbufs[nexti];
 391         char *p;
 392         uint16 *src = str->buffer;
 393
 394         nexti = (nexti+1)%8;
 395
 396         for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2) && *src; src++) {
 397                 uint16 ucs2_val = SVAL(src,0);
 398                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 399
 400                 if (cp_val < 256)
 401                         *p++ = (char)cp_val;
 402                 else {
 403                         *p++ = (cp_val >> 8) & 0xff;
 404                         *p++ = (cp_val & 0xff);
 405                 }
 406         }
 407
 408         *p = 0;
 409         return lbuf;
 410 }
 411
 412 /*******************************************************************
 413  Return a dos codepage version of a NOTunicode string
 414 ********************************************************************/
 415
 416 char *dos_buffer2_to_multistr(BUFFER2 *str)
 417 {
 418         char *lbuf = lbufs[nexti];
 419         char *p;
 420         uint16 *src = str->buffer;
 421
 422         nexti = (nexti+1)%8;
 423
 424         for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2); src++) {
 425                 if (*src == 0) {
 426                         *p++ = ' ';
 427                 } else {
 428                         uint16 ucs2_val = SVAL(src,0);
 429                         uint16 cp_val = ucs2_to_doscp[ucs2_val];
 430
 431                         if (cp_val < 256)
 432                                 *p++ = (char)cp_val;
 433                         else {
 434                                 *p++ = (cp_val >> 8) & 0xff;
 435                                 *p++ = (cp_val & 0xff);
 436                         }
 437                 }
 438         }
 439
 440         *p = 0;
 441         return lbuf;
 442 }
 443
 444 /*******************************************************************
 445  Create a null-terminated unicode string from a null-terminated DOS
 446  codepage string.
 447  Return number of unicode chars copied, excluding the null character.
 448  Unicode strings created are in little-endian format.
 449  max_len is in bytes.
 450 ********************************************************************/
 451
 452 size_t dos_struni2(char *dst, const char *src, size_t max_len)
 453 {
 454         size_t len = 0;
 455
 456         if (dst == NULL)
 457                 return 0;
 458
 459         if (src != NULL) {
 460                 for (; ((len*2) < max_len-2) && *src; len++, dst +=2) {
 461                         size_t skip = get_character_len(*src);
 462                         smb_ucs2_t val = (*src & 0xff);
 463
 464                         /*
 465                          * If this is a multibyte character (and all DOS/Windows
 466                          * codepages have at maximum 2 byte multibyte characters)
 467                          * then work out the index value for the unicode conversion.
 468                          */
 469
 470                         if (skip == 2)
 471                                 val = ((val << 8) | (src[1] & 0xff));
 472
 473                         SSVAL(dst,0,doscp_to_ucs2[val]);
 474                         if (skip)
 475                                 src += skip;
 476                         else
 477                                 src++;
 478                 }
 479         }
 480
 481         SSVAL(dst,0,0);
 482
 483         return len;
 484 }
 485
 486 /*******************************************************************
 487  Return a DOS codepage version of a little-endian unicode string.
 488  Hack alert: uses fixed buffer(s).
 489 ********************************************************************/
 490
 491 char *dos_unistr(char *buf)
 492 {
 493         char *lbuf = lbufs[nexti];
 494         uint16 *src = (uint16 *)buf;
 495         char *p;
 496
 497         nexti = (nexti+1)%8;
 498
 499         for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
 500                 uint16 ucs2_val = SVAL(src,0);
 501                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 502
 503                 if (cp_val < 256)
 504                         *p++ = (char)cp_val;
 505                 else {
 506                         *p++ = (cp_val >> 8) & 0xff;
 507                         *p++ = (cp_val & 0xff);
 508                 }
 509         }
 510
 511         *p = 0;
 512         return lbuf;
 513 }
 514
 515 /*******************************************************************
 516  returns the length in number of wide characters
 517  ******************************************************************/
 518 int unistrlen(uint16 *s)
 519 {
 520         int len;
 521
 522         if (!s)
 523                 return -1;
 524
 525         for (len=0; *s; s++,len++);
 526
 527         return len;
 528 }
 529
 530 /*******************************************************************
 531  Strcpy for unicode strings.  returns length (in num of wide chars)
 532 ********************************************************************/
 533
 534 int unistrcpy(uint16 *dst, uint16 *src)
 535 {
 536         int num_wchars = 0;
 537
 538         while (*src) {
 539                 *dst++ = *src++;
 540                 num_wchars++;
 541         }
 542         *dst = 0;
 543
 544         return num_wchars;
 545 }
 546
 547 /*******************************************************************
 548  Free any existing maps.
 549 ********************************************************************/
 550
 551 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 552 {
 553         /* this handles identity mappings where we share the pointer */
 554         if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
 555                 *pp_ucs2_to_cp = NULL;
 556         }
 557
 558         SAFE_FREE(*pp_cp_to_ucs2);
 559         SAFE_FREE(*pp_ucs2_to_cp);
 560 }
 561
 562 /*******************************************************************
 563  Build a default (null) codepage to unicode map.
 564 ********************************************************************/
 565
 566 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 567 {
 568   int i;
 569
 570   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 571
 572   if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
 573     DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
 574     abort();
 575   }
 576
 577   *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
 578   for (i = 0; i < 65536; i++)
 579     (*pp_cp_to_ucs2)[i] = i;
 580 }
 581
 582 /*******************************************************************
 583  Load a codepage to unicode and vica-versa map.
 584 ********************************************************************/
 585
 586 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 587 {
 588   pstring unicode_map_file_name;
 589   FILE *fp = NULL;
 590   SMB_STRUCT_STAT st;
 591   smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
 592   uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
 593   size_t cp_to_ucs2_size;
 594   size_t ucs2_to_cp_size;
 595   size_t i;
 596   size_t size;
 597   char buf[UNICODE_MAP_HEADER_SIZE];
 598
 599   DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
 600
 601   if (*codepage == '\0')
 602     goto clean_and_exit;
 603
 604   if(strlen(lp_codepagedir()) + 13 + strlen(codepage) >
 605      sizeof(unicode_map_file_name)) {
 606     DEBUG(0,("load_unicode_map: filename too long to load\n"));
 607     goto clean_and_exit;
 608   }
 609
 610   pstrcpy(unicode_map_file_name, lp_codepagedir());
 611   pstrcat(unicode_map_file_name, "/");
 612   pstrcat(unicode_map_file_name, "unicode_map.");
 613   pstrcat(unicode_map_file_name, codepage);
 614
 615   if(sys_stat(unicode_map_file_name,&st)!=0) {
 616     DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
 617               unicode_map_file_name));
 618     goto clean_and_exit;
 619   }
 620
 621   size = st.st_size;
 622
 623   if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
 624     DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
 625 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
 626     goto clean_and_exit;
 627   }
 628
 629   if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
 630     DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
 631               unicode_map_file_name, strerror(errno)));
 632     goto clean_and_exit;
 633   }
 634
 635   if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
 636     DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
 637               unicode_map_file_name, strerror(errno)));
 638     goto clean_and_exit;
 639   }
 640
 641   /* Check the version value */
 642   if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
 643     DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
 644 Needed %hu, got %hu.\n",
 645           unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
 646           SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
 647     goto clean_and_exit;
 648   }
 649
 650   /* Check the codepage value */
 651   if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
 652     DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
 653 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
 654     goto clean_and_exit;
 655   }
 656
 657   ucs2_to_cp_size = 2*65536;
 658   if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
 659     /*
 660      * This is a multibyte code page.
 661      */
 662     cp_to_ucs2_size = 2*65536;
 663   } else {
 664     /*
 665      * Single byte code page.
 666      */
 667     cp_to_ucs2_size = 2*256;
 668   }
 669
 670   /*
 671    * Free any old translation tables.
 672    */
 673
 674   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 675
 676   if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
 677     DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
 678     goto clean_and_exit;
 679   }
 680
 681   if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
 682     DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
 683     goto clean_and_exit;
 684   }
 685
 686   if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
 687     DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
 688               unicode_map_file_name, strerror(errno)));
 689     goto clean_and_exit;
 690   }
 691
 692   if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
 693     DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
 694               unicode_map_file_name, strerror(errno)));
 695     goto clean_and_exit;
 696   }
 697
 698   /*
 699    * Now ensure the 16 bit values are in the correct endianness.
 700    */
 701
 702   for (i = 0; i < cp_to_ucs2_size/2; i++)
 703     cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
 704
 705   for (i = 0; i < ucs2_to_cp_size/2; i++)
 706     ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
 707
 708   fclose(fp);
 709
 710   *pp_cp_to_ucs2 = cp_to_ucs2;
 711   *pp_ucs2_to_cp = ucs2_to_cp;
 712
 713   return True;
 714
 715 clean_and_exit:
 716
 717   /* pseudo destructor :-) */
 718
 719   if(fp != NULL)
 720     fclose(fp);
 721
 722   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 723
 724   default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
 725
 726   return False;
 727 }
 728
 729 /*******************************************************************
 730  Load a dos codepage to unicode and vica-versa map.
 731 ********************************************************************/
 732
 733 BOOL load_dos_unicode_map(int codepage)
 734 {
 735   fstring codepage_str;
 736
 737   slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
 738   DEBUG(10,("load_dos_unicode_map: %s\n", codepage_str));
 739   return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
 740 }
 741
 742 /*******************************************************************
 743  Load a UNIX codepage to unicode and vica-versa map.
 744 ********************************************************************/
 745
 746 BOOL load_unix_unicode_map(const char *unix_char_set, BOOL override)
 747 {
 748         static BOOL init_done;
 749         fstring upper_unix_char_set;
 750
 751         fstrcpy(upper_unix_char_set, unix_char_set);
 752         strupper(upper_unix_char_set);
 753
 754         DEBUG(10,("load_unix_unicode_map: %s (init_done=%d, override=%d)\n",
 755                 upper_unix_char_set, (int)init_done, (int)override ));
 756
 757         if (!init_done)
 758                 init_done = True;
 759         else if (!override)
 760                 return True;
 761
 762         return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
 763 }
 764
 765 /*******************************************************************
 766  The following functions reproduce many of the non-UNICODE standard
 767  string functions in Samba.
 768 ********************************************************************/
 769
 770 /*******************************************************************
 771  Convert a UNICODE string to multibyte format. Note that the 'src' is in
 772  native byte order, not little endian. Always zero terminates.
 773  dst_len is in bytes.
 774 ********************************************************************/
 775
 776 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
 777                                   size_t dst_len, const uint16 *ucs2_to_cp)
 778 {
 779         size_t dst_pos;
 780
 781         for(dst_pos = 0; (dst_pos < dst_len - 1) && *src;) {
 782                 smb_ucs2_t val = ucs2_to_cp[*src++];
 783                 if(val < 256) {
 784                         dst[dst_pos++] = (char)val;
 785                 } else {
 786
 787                         if(dst_pos >= dst_len - 2)
 788                                 break;
 789
 790                         /*
 791                          * A 2 byte value is always written as
 792                          * high/low into the buffer stream.
 793                          */
 794
 795                         dst[dst_pos++] = (char)((val >> 8) & 0xff);
 796                         dst[dst_pos++] = (char)(val & 0xff);
 797                 }
 798         }
 799
 800         dst[dst_pos] = '\0';
 801
 802         return dst;
 803 }
 804
 805 /*******************************************************************
 806  Convert a multibyte string to UNICODE format. Note that the 'dst' is in
 807  native byte order, not little endian. Always zero terminates.
 808  dst_len is in bytes.
 809 ********************************************************************/
 810
 811 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
 812                                  size_t dst_len, smb_ucs2_t *cp_to_ucs2)
 813 {
 814         size_t i;
 815
 816         dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
 817
 818         for(i = 0; (i < (dst_len  - 1)) && *src;) {
 819                 size_t skip = skip_multibyte_char(*src);
 820                 smb_ucs2_t val = (*src & 0xff);
 821
 822                 /*
 823                  * If this is a multibyte character
 824                  * then work out the index value for the unicode conversion.
 825                  */
 826
 827                 if (skip == 2)
 828                         val = ((val << 8) | (src[1] & 0xff));
 829
 830                 dst[i++] = cp_to_ucs2[val];
 831                 if (skip)
 832                         src += skip;
 833                 else
 834                         src++;
 835         }
 836
 837         dst[i] = 0;
 838
 839         return dst;
 840 }
 841
 842 /*******************************************************************
 843  Convert a UNICODE string to multibyte format. Note that the 'src' is in
 844  native byte order, not little endian. Always zero terminates.
 845  This function may be replaced if the MB  codepage format is an
 846  encoded one (ie. utf8, hex). See the code in lib/kanji.c
 847  for details. dst_len is in bytes.
 848 ********************************************************************/
 849
 850 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
 851 {
 852         return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
 853 }
 854
 855 /*******************************************************************
 856  Convert a UNIX string to UNICODE format. Note that the 'dst' is in
 857  native byte order, not little endian. Always zero terminates.
 858  This function may be replaced if the UNIX codepage format is a
 859  multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
 860  for details. dst_len is in bytes, not ucs2 units.
 861 ********************************************************************/
 862
 863 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
 864 {
 865         return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
 866 }
 867
 868 /*******************************************************************
 869  Convert a single UNICODE character to unix character. Returns the
 870  number of bytes in the unix character.
 871 ********************************************************************/
 872
 873 size_t unicode_to_unix_char(char *dst, const smb_ucs2_t src)
 874 {
 875         smb_ucs2_t val = ucs2_to_unixcp[src];
 876         if(val < 256) {
 877                 *dst = (char)val;
 878                 return (size_t)1;
 879         }
 880         /*
 881          * A 2 byte value is always written as
 882          * high/low into the buffer stream.
 883          */
 884
 885         dst[0] = (char)((val >> 8) & 0xff);
 886         dst[1] = (char)(val & 0xff);
 887         return (size_t)2;
 888 }
 889
 890 /*******************************************************************
 891  Convert a UNICODE string to DOS format. Note that the 'src' is in
 892  native byte order, not little endian. Always zero terminates.
 893  dst_len is in bytes.
 894 ********************************************************************/
 895
 896 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
 897 {
 898         return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
 899 }
 900
 901 /*******************************************************************
 902  Convert a single UNICODE character to DOS codepage. Returns the
 903  number of bytes in the DOS codepage character.
 904 ********************************************************************/
 905
 906 size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src)
 907 {
 908         smb_ucs2_t val = ucs2_to_doscp[src];
 909         if(val < 256) {
 910                 *dst = (char)val;
 911                 return (size_t)1;
 912         }
 913         /*
 914          * A 2 byte value is always written as
 915          * high/low into the buffer stream.
 916          */
 917
 918         dst[0] = (char)((val >> 8) & 0xff);
 919         dst[1] = (char)(val & 0xff);
 920         return (size_t)2;
 921 }
 922
 923 /*******************************************************************
 924  Convert a DOS string to UNICODE format. Note that the 'dst' is in
 925  native byte order, not little endian. Always zero terminates.
 926  This function may be replaced if the DOS codepage format is a
 927  multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
 928  for details. dst_len is in bytes, not ucs2 units.
 929 ********************************************************************/
 930
 931 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
 932 {
 933         return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
 934 }
 935
 936 /*******************************************************************
 937  Count the number of characters in a smb_ucs2_t string.
 938 ********************************************************************/
 939
 940 size_t strlen_w(const smb_ucs2_t *src)
 941 {
 942   size_t len;
 943
 944   for(len = 0; *src++; len++)
 945     ;
 946
 947   return len;
 948 }
 949
 950 /*******************************************************************
 951  Safe wstring copy into a known length string. maxlength includes
 952  the terminating zero. maxlength is in ucs2 units.
 953 ********************************************************************/
 954
 955 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
 956 {
 957     size_t ucs2_len;
 958
 959     if (!dest) {
 960         DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
 961         return NULL;
 962     }
 963
 964     if (!src) {
 965         *dest = 0;
 966         return dest;
 967     }
 968
 969         maxlength /= sizeof(smb_ucs2_t);
 970
 971         ucs2_len = strlen_w(src);
 972
 973     if (ucs2_len >= maxlength) {
 974                 fstring out;
 975         DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
 976                         (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
 977                         unicode_to_unix(out,src,sizeof(out))) );
 978                 ucs2_len = maxlength - 1;
 979     }
 980
 981     memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
 982     dest[ucs2_len] = 0;
 983     return dest;
 984 }
 985
 986 /*******************************************************************
 987  Safe string cat into a string. maxlength includes the terminating zero.
 988  maxlength is in ucs2 units.
 989 ********************************************************************/
 990
 991 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
 992 {
 993     size_t ucs2_src_len, ucs2_dest_len;
 994
 995     if (!dest) {
 996         DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
 997         return NULL;
 998     }
 999
1000     if (!src)
1001         return dest;
1002
1003     ucs2_src_len = strlen_w(src);
1004     ucs2_dest_len = strlen_w(dest);
1005
1006     if (ucs2_src_len + ucs2_dest_len >= maxlength) {
1007                 fstring out;
1008                 int new_len = maxlength - ucs2_dest_len - 1;
1009         DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
1010                         (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
1011                         unicode_to_unix(out,src,sizeof(out))) );
1012         ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
1013     }
1014
1015     memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
1016     dest[ucs2_dest_len + ucs2_src_len] = 0;
1017     return dest;
1018 }
1019
1020 /*******************************************************************
1021  Compare the two strings s1 and s2.
1022 ********************************************************************/
1023
1024 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1025 {
1026         smb_ucs2_t c1, c2;
1027
1028         for (;;) {
1029                 c1 = *s1++;
1030                 c2 = *s2++;
1031
1032                 if (c1 != c2)
1033                         return c1 - c2;
1034
1035                 if (c1 == 0)
1036                         break;
1037         }
1038         return 0;
1039 }
1040
1041 /*******************************************************************
1042  Compare the first n characters of s1 to s2. len is in ucs2 units.
1043 ********************************************************************/
1044
1045 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
1046 {
1047         smb_ucs2_t c1, c2;
1048
1049         for (; len != 0; --len) {
1050                 c1 = *s1++;
1051                 c2 = *s2++;
1052
1053                 if (c1 != c2)
1054                         return c1 - c2;
1055
1056                 if (c1 == 0)
1057                         break;
1058
1059         }
1060         return 0;
1061 }
1062
1063 /*******************************************************************
1064  Search string s2 from s1.
1065 ********************************************************************/
1066
1067 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1068 {
1069         size_t len = strlen_w(s2);
1070
1071         if (!*s2)
1072                 return (smb_ucs2_t *)s1;
1073
1074         for(;*s1; s1++) {
1075                 if (*s1 == *s2) {
1076                         if (strncmp_w(s1, s2, len) == 0)
1077                                 return (smb_ucs2_t *)s1;
1078                 }
1079         }
1080         return NULL;
1081 }
1082
1083 /*******************************************************************
1084  Search for ucs2 char c from the beginning of s.
1085 ********************************************************************/
1086
1087 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1088 {
1089         do {
1090                 if (*s == c)
1091                         return (smb_ucs2_t *)s;
1092         } while (*s++);
1093
1094         return NULL;
1095 }
1096
1097 /*******************************************************************
1098  Search for ucs2 char c from the end of s.
1099 ********************************************************************/
1100
1101 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1102 {
1103         smb_ucs2_t *retval = 0;
1104
1105         do {
1106                 if (*s == c)
1107                         retval = (smb_ucs2_t *)s;
1108         } while (*s++);
1109
1110         return retval;
1111 }
1112
1113 /*******************************************************************
1114  Search token from s1 separated by any ucs2 char of s2.
1115 ********************************************************************/
1116
1117 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
1118 {
1119         static smb_ucs2_t *s = NULL;
1120         smb_ucs2_t *q;
1121
1122         if (!s1) {
1123                 if (!s)
1124                         return NULL;
1125                 s1 = s;
1126         }
1127
1128         for (q = s1; *s1; s1++) {
1129                 smb_ucs2_t *p = strchr_w(s2, *s1);
1130                 if (p) {
1131                         if (s1 != q) {
1132                                 s = s1 + 1;
1133                                 *s1 = '\0';
1134                                 return q;
1135                         }
1136                         q = s1 + 1;
1137                 }
1138         }
1139
1140         s = NULL;
1141         if (*q)
1142                 return q;
1143
1144         return NULL;
1145 }
1146
1147 /*******************************************************************
1148  Duplicate a ucs2 string.
1149 ********************************************************************/
1150
1151 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1152 {
1153         size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1154         smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1155     if (newstr == NULL)
1156         return NULL;
1157     safe_strcpy_w(newstr, s, newlen);
1158     return newstr;
1159 }
1160
1161 /*******************************************************************
1162  Mapping tables for UNICODE character. Allows toupper/tolower and
1163  isXXX functions to work.
1164
1165  tridge: split into 2 pieces. This saves us 5/6 of the memory
1166  with a small speed penalty
1167  The magic constants are the lower/upper range of the tables two
1168  parts
1169 ********************************************************************/
1170
1171 typedef struct {
1172         smb_ucs2_t lower;
1173         smb_ucs2_t upper;
1174         unsigned char flags;
1175 } smb_unicode_table_t;
1176
1177 #define TABLE1_BOUNDARY 9450
1178 #define TABLE2_BOUNDARY 64256
1179
1180 static smb_unicode_table_t map_table1[] = {
1181 #include "unicode_map_table1.h"
1182 };
1183
1184 static smb_unicode_table_t map_table2[] = {
1185 #include "unicode_map_table2.h"
1186 };
1187
1188 static unsigned char map_table_flags(smb_ucs2_t v)
1189 {
1190         if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1191         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1192         return 0;
1193 }
1194
1195 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1196 {
1197         if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1198         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1199         return v;
1200 }
1201
1202 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1203 {
1204         if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1205         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1206         return v;
1207 }
1208
1209 /*******************************************************************
1210  Is an upper case wchar.
1211 ********************************************************************/
1212
1213 int isupper_w( smb_ucs2_t val)
1214 {
1215         return (map_table_flags(val) & UNI_UPPER);
1216 }
1217
1218 /*******************************************************************
1219  Is a lower case wchar.
1220 ********************************************************************/
1221
1222 int islower_w( smb_ucs2_t val)
1223 {
1224         return (map_table_flags(val) & UNI_LOWER);
1225 }
1226
1227 /*******************************************************************
1228  Is a digit wchar.
1229 ********************************************************************/
1230
1231 int isdigit_w( smb_ucs2_t val)
1232 {
1233         return (map_table_flags(val) & UNI_DIGIT);
1234 }
1235
1236 /*******************************************************************
1237  Is a hex digit wchar.
1238 ********************************************************************/
1239
1240 int isxdigit_w( smb_ucs2_t val)
1241 {
1242         return (map_table_flags(val) & UNI_XDIGIT);
1243 }
1244
1245 /*******************************************************************
1246  Is a space wchar.
1247 ********************************************************************/
1248
1249 int isspace_w( smb_ucs2_t val)
1250 {
1251         return (map_table_flags(val) & UNI_SPACE);
1252 }
1253
1254 /*******************************************************************
1255  Convert a wchar to upper case.
1256 ********************************************************************/
1257
1258 smb_ucs2_t toupper_w( smb_ucs2_t val )
1259 {
1260         return map_table_upper(val);
1261 }
1262
1263 /*******************************************************************
1264  Convert a wchar to lower case.
1265 ********************************************************************/
1266
1267 smb_ucs2_t tolower_w( smb_ucs2_t val )
1268 {
1269         return map_table_lower(val);
1270 }
1271
1272 static smb_ucs2_t *last_ptr = NULL;
1273
1274 void set_first_token_w(smb_ucs2_t *ptr)
1275 {
1276         last_ptr = ptr;
1277 }
1278
1279 /****************************************************************************
1280  Get the next token from a string, return False if none found
1281  handles double-quotes.
1282  Based on a routine by GJC@VILLAGE.COM.
1283  Extensively modified by Andrew.Tridgell@anu.edu.au
1284  bufsize is in bytes.
1285 ****************************************************************************/
1286
1287 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t',  (smb_ucs2_t)'\n',  (smb_ucs2_t)'\r', 0};
1288 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1289
1290 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1291 {
1292         smb_ucs2_t *s;
1293         BOOL quoted;
1294         size_t len=1;
1295
1296         /*
1297          * Convert bufsize to smb_ucs2_t units.
1298          */
1299
1300         bufsize /= sizeof(smb_ucs2_t);
1301
1302         if (!ptr)
1303                 ptr = &last_ptr;
1304         if (!ptr)
1305                 return(False);
1306
1307         s = *ptr;
1308
1309         /*
1310          * Default to simple separators.
1311          */
1312
1313         if (!sep)
1314                 sep = sep_list;
1315
1316         /*
1317          * Find the first non sep char.
1318          */
1319
1320         while(*s && strchr_w(sep,*s))
1321                 s++;
1322
1323         /*
1324          * Nothing left ?
1325          */
1326
1327         if (!*s)
1328                 return(False);
1329
1330         /*
1331          * Copy over the token.
1332          */
1333
1334         for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1335                 if (*s == quotechar) {
1336                         quoted = !quoted;
1337                 } else {
1338                         len++;
1339                         *buff++ = *s;
1340                 }
1341         }
1342
1343         *ptr = (*s) ? s+1 : s;
1344         *buff = 0;
1345         last_ptr = *ptr;
1346
1347         return(True);
1348 }
1349
1350 /****************************************************************************
1351  Convert list of tokens to array; dependent on above routine.
1352  Uses last_ptr from above - bit of a hack.
1353 ****************************************************************************/
1354
1355 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1356 {
1357         smb_ucs2_t *s=last_ptr;
1358         int ictok=0;
1359         smb_ucs2_t **ret, **iret;
1360
1361         if (!sep)
1362                 sep = sep_list;
1363
1364         while(*s && strchr_w(sep,*s))
1365                 s++;
1366
1367         /*
1368          * Nothing left ?
1369          */
1370
1371         if (!*s)
1372                 return(NULL);
1373
1374         do {
1375                 ictok++;
1376                 while(*s && (!strchr_w(sep,*s)))
1377                         s++;
1378                 while(*s && strchr_w(sep,*s))
1379                         *s++=0;
1380         } while(*s);
1381
1382         *ctok = ictok;
1383         s = last_ptr;
1384
1385         if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1386                 return NULL;
1387
1388         while(ictok--) {
1389                 *iret++=s;
1390                 while(*s++)
1391                         ;
1392                 while(!*s)
1393                         s++;
1394         }
1395
1396         return ret;
1397 }
1398
1399 /*******************************************************************
1400  Case insensitive string compararison.
1401 ********************************************************************/
1402
1403 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1404 {
1405         /*
1406          * Compare until we run out of string, either t or s, or find a difference.
1407          */
1408
1409         while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1410                 s++;
1411                 t++;
1412         }
1413
1414         return(toupper_w(*s) - toupper_w(*t));
1415 }
1416
1417 /*******************************************************************
1418  Case insensitive string compararison, length limited.
1419  n is in ucs2 units.
1420 ********************************************************************/
1421
1422 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1423 {
1424         /*
1425          * Compare until we run out of string, either t or s, or chars.
1426          */
1427
1428         while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1429                 s++;
1430                 t++;
1431                 n--;
1432         }
1433
1434     /*
1435          * Not run out of chars - strings are different lengths.
1436          */
1437
1438     if (n)
1439       return(toupper_w(*s) - toupper_w(*t));
1440
1441     /*
1442          * Identical up to where we run out of chars,
1443          * and strings are same length.
1444          */
1445
1446         return(0);
1447 }
1448
1449 /*******************************************************************
1450  Compare 2 strings.
1451 ********************************************************************/
1452
1453 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1454 {
1455         if (s1 == s2)
1456                 return(True);
1457         if (!s1 || !s2)
1458                 return(False);
1459
1460         return(StrCaseCmp_w(s1,s2)==0);
1461 }
1462
1463 /*******************************************************************
1464  Compare 2 strings up to and including the nth char. n is in ucs2
1465  units.
1466 ******************************************************************/
1467
1468 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1469 {
1470         if (s1 == s2)
1471                 return(True);
1472         if (!s1 || !s2 || !n)
1473                 return(False);
1474
1475         return(StrnCaseCmp_w(s1,s2,n)==0);
1476 }
1477
1478 /*******************************************************************
1479  Compare 2 strings (case sensitive).
1480 ********************************************************************/
1481
1482 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1483 {
1484         if (s1 == s2)
1485                 return(True);
1486         if (!s1 || !s2)
1487                 return(False);
1488
1489         return(strcmp_w(s1,s2)==0);
1490 }
1491
1492 /*******************************************************************
1493  Convert a string to lower case.
1494 ********************************************************************/
1495
1496 void strlower_w(smb_ucs2_t *s)
1497 {
1498         while (*s) {
1499                 if (isupper_w(*s))
1500                         *s = tolower_w(*s);
1501                 s++;
1502         }
1503 }
1504
1505 /*******************************************************************
1506  Convert a string to upper case.
1507 ********************************************************************/
1508
1509 void strupper_w(smb_ucs2_t *s)
1510 {
1511         while (*s) {
1512                 if (islower_w(*s))
1513                         *s = toupper_w(*s);
1514                 s++;
1515         }
1516 }
1517
1518 /*******************************************************************
1519  Convert a string to "normal" form.
1520 ********************************************************************/
1521
1522 void strnorm_w(smb_ucs2_t *s)
1523 {
1524         extern int case_default;
1525         if (case_default == CASE_UPPER)
1526                 strupper_w(s);
1527         else
1528                 strlower_w(s);
1529 }
1530
1531 /*******************************************************************
1532  Check if a string is in "normal" case.
1533 ********************************************************************/
1534
1535 BOOL strisnormal_w(smb_ucs2_t *s)
1536 {
1537         extern int case_default;
1538         if (case_default == CASE_UPPER)
1539                 return(!strhaslower_w(s));
1540
1541         return(!strhasupper_w(s));
1542 }
1543
1544 /****************************************************************************
1545  String replace.
1546 ****************************************************************************/
1547
1548 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1549 {
1550         while (*s) {
1551                 if (oldc == *s)
1552                         *s = newc;
1553                 s++;
1554         }
1555 }
1556
1557 /*******************************************************************
1558  Skip past some strings in a buffer. n is in bytes.
1559 ********************************************************************/
1560
1561 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1562 {
1563         while (n--)
1564                 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1565         return(buf);
1566 }
1567
1568 /*******************************************************************
1569  Count the number of characters in a string. Same as strlen_w in
1570  smb_ucs2_t string units.
1571 ********************************************************************/
1572
1573 size_t str_charnum_w(const smb_ucs2_t *s)
1574 {
1575         return strlen_w(s);
1576 }
1577
1578 /*******************************************************************
1579  Trim the specified elements off the front and back of a string.
1580 ********************************************************************/
1581
1582 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1583 {
1584         BOOL ret = False;
1585         size_t front_len = (front && *front) ? strlen_w(front) : 0;
1586         size_t back_len = (back && *back) ? strlen_w(back) : 0;
1587         size_t s_len;
1588
1589         while (front_len && strncmp_w(s, front, front_len) == 0) {
1590                 smb_ucs2_t *p = s;
1591                 ret = True;
1592
1593                 while (1) {
1594                         if (!(*p = p[front_len]))
1595                                 break;
1596                         p++;
1597                 }
1598         }
1599
1600         if(back_len) {
1601                 s_len = strlen_w(s);
1602                 while ((s_len >= back_len) &&
1603                         (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1604                         ret = True;
1605                         s[s_len - back_len] = 0;
1606                         s_len = strlen_w(s);
1607                 }
1608         }
1609
1610         return(ret);
1611 }
1612
1613 /****************************************************************************
1614  Does a string have any uppercase chars in it ?
1615 ****************************************************************************/
1616
1617 BOOL strhasupper_w(const smb_ucs2_t *s)
1618 {
1619         while (*s) {
1620                 if (isupper_w(*s))
1621                         return(True);
1622                 s++;
1623         }
1624         return(False);
1625 }
1626
1627 /****************************************************************************
1628  Does a string have any lowercase chars in it ?
1629 ****************************************************************************/
1630
1631 BOOL strhaslower_w(const smb_ucs2_t *s)
1632 {
1633         while (*s) {
1634                 if (islower(*s))
1635                         return(True);
1636                 s++;
1637         }
1638         return(False);
1639 }
1640
1641 /****************************************************************************
1642  Find the number of 'c' chars in a string.
1643 ****************************************************************************/
1644
1645 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1646 {
1647         size_t count=0;
1648
1649         while (*s) {
1650                 if (*s == c)
1651                         count++;
1652                 s++;
1653         }
1654         return(count);
1655 }
1656
1657 /*******************************************************************
1658  Return True if a string consists only of one particular character.
1659 ********************************************************************/
1660
1661 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1662 {
1663         if(s == NULL)
1664                 return False;
1665         if(!*s)
1666                 return False;
1667
1668         while (*s) {
1669                 if (*s != c)
1670                         return False;
1671                 s++;
1672         }
1673         return True;
1674 }
1675
1676 /*******************************************************************
1677  Paranoid strcpy into a buffer of given length (includes terminating
1678  zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1679  does *NOT* check for multibyte characters. Don't change it !
1680  maxlength is in ucs2 units.
1681 ********************************************************************/
1682
1683 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const smb_ucs2_t *other_safe_chars, size_t maxlength)
1684 {
1685         size_t len, i;
1686         smb_ucs2_t nullstr_w = (smb_ucs2_t)0;
1687
1688         if (!dest) {
1689                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1690                 return NULL;
1691         }
1692
1693         if (!src) {
1694                 *dest = 0;
1695                 return dest;
1696         }
1697
1698         len = strlen_w(src);
1699         if (len >= maxlength)
1700                 len = maxlength - 1;
1701
1702         if (!other_safe_chars)
1703                 other_safe_chars = &nullstr_w;
1704
1705         for(i = 0; i < len; i++) {
1706                 smb_ucs2_t val = src[i];
1707                 if(isupper_w(val) ||islower_w(val) || isdigit_w(val) || strchr_w(other_safe_chars, val))
1708                         dest[i] = src[i];
1709                 else
1710                         dest[i] = (smb_ucs2_t)'_';
1711         }
1712
1713         dest[i] = 0;
1714
1715         return dest;
1716 }
1717
1718 /****************************************************************************
1719  Like strncpy but always null terminates. Make sure there is room !
1720  The variable n should always be one less than the available size and is in
1721  ucs2 units.
1722 ****************************************************************************/
1723
1724 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1725 {
1726         smb_ucs2_t *d = dest;
1727         if (!dest)
1728                 return(NULL);
1729         if (!src) {
1730                 *dest = 0;
1731                 return(dest);
1732         }
1733
1734         while (n-- && (*d++ = *src++))
1735                 ;
1736         *d = 0;
1737         return(dest);
1738 }
1739
1740 /****************************************************************************
1741  Like strncpy but copies up to the character marker. Always null terminates.
1742  returns a pointer to the character marker in the source string (src).
1743  n is in ucs2 units.
1744 ****************************************************************************/
1745
1746 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1747 {
1748         smb_ucs2_t *p;
1749         size_t str_len;
1750
1751         p = strchr_w(src, c);
1752         if (p == NULL) {
1753                 fstring cval;
1754                 smb_ucs2_t mbcval[2];
1755                 mbcval[0] = c;
1756                 mbcval[1] = 0;
1757                 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1758                         unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1759                 return NULL;
1760         }
1761
1762         str_len = PTR_DIFF(p, src) + 1;
1763         safe_strcpy_w(dest, src, MIN(n, str_len));
1764
1765         return p;
1766 }
1767
1768 /*************************************************************
1769  Routine to get hex characters and turn them into a 16 byte array.
1770  The array can be variable length, and any non-hex-numeric
1771  characters are skipped.  "0xnn" or "0Xnn" is specially catered
1772  for. len is in bytes.
1773  Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1774 **************************************************************/
1775
1776 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1777 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1778                                                                 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1779                                                                 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1780                                                                 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1781
1782 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1783 {
1784         size_t i;
1785         size_t num_chars = 0;
1786         unsigned char   lonybble, hinybble;
1787         smb_ucs2_t *p1 = NULL, *p2 = NULL;
1788
1789         /*
1790          * Convert to smb_ucs2_t units.
1791          */
1792
1793         len /= sizeof(smb_ucs2_t);
1794
1795         for (i = 0; i < len && strhex[i] != 0; i++) {
1796                 if (strnequal_w(hexchars, hexprefix, 2)) {
1797                         i++; /* skip two chars */
1798                         continue;
1799                 }
1800
1801                 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1802                         break;
1803
1804                 i++; /* next hex digit */
1805
1806                 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1807                         break;
1808
1809                 /* get the two nybbles */
1810                 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1811                 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1812
1813                 p[num_chars] = (hinybble << 4) | lonybble;
1814                 num_chars++;
1815
1816                 p1 = NULL;
1817                 p2 = NULL;
1818         }
1819         return num_chars;
1820 }
1821
1822 /****************************************************************************
1823  Check if a string is part of a list.
1824 ****************************************************************************/
1825
1826 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1827 {
1828         wpstring tok;
1829         smb_ucs2_t *p=list;
1830
1831         if (!list)
1832                 return(False);
1833
1834         while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1835                 if (casesensitive) {
1836                         if (strcmp_w(tok,s) == 0)
1837                                 return(True);
1838                 } else {
1839                         if (StrCaseCmp_w(tok,s) == 0)
1840                                 return(True);
1841                 }
1842         }
1843         return(False);
1844 }
1845
1846 /* This is used to prevent lots of mallocs of size 2 */
1847 static smb_ucs2_t *null_string = NULL;
1848
1849 /****************************************************************************
1850  Set a string value, allocing the space for the string.
1851 ****************************************************************************/
1852
1853 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1854 {
1855         size_t l;
1856
1857         if (!null_string) {
1858                 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1859                         DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1860                 return False;
1861                 }
1862                 *null_string = 0;
1863         }
1864
1865         if (!src)
1866                 src = null_string;
1867
1868         l = strlen_w(src);
1869
1870         if (l == 0)
1871                 *dest = null_string;
1872         else {
1873                 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1874                 if ((*dest) == NULL) {
1875                         DEBUG(0,("Out of memory in string_init_w\n"));
1876                         return False;
1877                 }
1878
1879                 wpstrcpy(*dest,src);
1880         }
1881         return(True);
1882 }
1883
1884 /****************************************************************************
1885  Free a string value.
1886 ****************************************************************************/
1887
1888 void string_free_w(smb_ucs2_t **s)
1889 {
1890         if (!s || !(*s))
1891                 return;
1892         if (*s == null_string)
1893                 *s = NULL;
1894         SAFE_FREE(*s);
1895 }
1896
1897 /****************************************************************************
1898  Set a string value, allocing the space for the string, and deallocating any
1899  existing space.
1900 ****************************************************************************/
1901
1902 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1903 {
1904         string_free_w(dest);
1905
1906         return(string_init_w(dest,src));
1907 }
1908
1909 /****************************************************************************
1910  Substitute a string for a pattern in another string. Make sure there is
1911  enough room !
1912
1913  This routine looks for pattern in s and replaces it with
1914  insert. It may do multiple replacements.
1915
1916  Any of " ; ' $ or ` in the insert string are replaced with _
1917  if len==0 then no length check is performed
1918  len is in ucs2 units.
1919 ****************************************************************************/
1920
1921 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1922 {
1923         smb_ucs2_t *p;
1924         ssize_t ls,lp,li, i;
1925
1926         if (!insert || !pattern || !s)
1927                 return;
1928
1929         ls = (ssize_t)strlen_w(s);
1930         lp = (ssize_t)strlen_w(pattern);
1931         li = (ssize_t)strlen_w(insert);
1932
1933         if (!*pattern)
1934                 return;
1935
1936         while (lp <= ls && (p = strstr_w(s,pattern))) {
1937                 if (len && (ls + (li-lp) >= len)) {
1938                         fstring out;
1939                         DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1940                                  (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1941                                  unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1942                         break;
1943                 }
1944                 if (li != lp)
1945                         memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1946
1947                 for (i=0;i<li;i++) {
1948                         switch (insert[i]) {
1949                         case (smb_ucs2_t)'`':
1950                         case (smb_ucs2_t)'"':
1951                         case (smb_ucs2_t)'\'':
1952                         case (smb_ucs2_t)';':
1953                         case (smb_ucs2_t)'$':
1954                         case (smb_ucs2_t)'%':
1955                         case (smb_ucs2_t)'\r':
1956                         case (smb_ucs2_t)'\n':
1957                                 p[i] = (smb_ucs2_t)'_';
1958                                 break;
1959                         default:
1960                                 p[i] = insert[i];
1961                         }
1962                 }
1963                 s = p + li;
1964                 ls += (li-lp);
1965         }
1966 }
1967
1968 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1969 {
1970         string_sub_w(s, pattern, insert, sizeof(wfstring));
1971 }
1972
1973 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1974 {
1975         string_sub_w(s, pattern, insert, sizeof(wpstring));
1976 }
1977
1978 /****************************************************************************
1979  Similar to string_sub() but allows for any character to be substituted.
1980  Use with caution !
1981  if len==0 then no length check is performed.
1982 ****************************************************************************/
1983
1984 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1985 {
1986         smb_ucs2_t *p;
1987         ssize_t ls,lp,li;
1988
1989         if (!insert || !pattern || !s)
1990                 return;
1991
1992         ls = (ssize_t)strlen_w(s);
1993         lp = (ssize_t)strlen_w(pattern);
1994         li = (ssize_t)strlen_w(insert);
1995
1996         if (!*pattern)
1997                 return;
1998
1999         while (lp <= ls && (p = strstr_w(s,pattern))) {
2000                 if (len && (ls + (li-lp) >= len)) {
2001                         fstring out;
2002                         DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
2003                                  (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
2004                                  unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
2005                         break;
2006                 }
2007                 if (li != lp)
2008                         memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
2009
2010                 memcpy(p, insert, li*sizeof(smb_ucs2_t));
2011                 s = p + li;
2012                 ls += (li-lp);
2013         }
2014 }
2015
2016 /****************************************************************************
2017  Splits out the front and back at a separator.
2018 ****************************************************************************/
2019
2020 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
2021 {
2022     smb_ucs2_t *p = strrchr_w(path, sep);
2023
2024         if (p != NULL)
2025                 *p = 0;
2026
2027         if (front != NULL)
2028                 wpstrcpy(front, path);
2029
2030         if (p != NULL) {
2031                 if (back != NULL)
2032                         wpstrcpy(back, p+1);
2033                 *p = (smb_ucs2_t)'\\';
2034         } else {
2035                 if (back != NULL)
2036                         back[0] = 0;
2037         }
2038 }
2039
2040
2041 /****************************************************************************
2042  Write an octal as a string.
2043 ****************************************************************************/
2044
2045 smb_ucs2_t *octal_string_w(int i)
2046 {
2047         static smb_ucs2_t wret[64];
2048         char ret[64];
2049
2050         if (i == -1)
2051                 slprintf(ret, sizeof(ret)-1, "-1");
2052         else
2053                 slprintf(ret, sizeof(ret)-1, "0%o", i);
2054         return unix_to_unicode(wret, ret, sizeof(wret));
2055 }
2056
2057
2058 /****************************************************************************
2059  Truncate a string at a specified length.
2060  length is in ucs2 units.
2061 ****************************************************************************/
2062
2063 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
2064 {
2065         if (s && strlen_w(s) > length)
2066                 s[length] = 0;
2067
2068         return s;
2069 }
2070
2071 /******************************************************************
2072  functions for UTF8 support (using in kanji.c)
2073  ******************************************************************/
2074 smb_ucs2_t doscp2ucs2(int w)
2075 {
2076   return ((smb_ucs2_t)doscp_to_ucs2[w]);
2077 }
2078
2079 int ucs2doscp(smb_ucs2_t w)
2080 {
2081   return ((int)ucs2_to_doscp[w]);
2082 }
2083
2084 /* Temporary fix until 3.0... JRA */
2085
2086 int rpcstr_pull(char* dest, void *src, int dest_len, int src_len, int flags)
2087 {
2088         if(dest_len==-1)
2089                 dest_len=MAXUNI-3;
2090
2091         if (flags & STR_TERMINATE)
2092                 src_len = strlen_w(src)*2+2;
2093
2094         dest_len = MIN((src_len/2), (dest_len-1));
2095         unistr_to_ascii(dest, src, dest_len);
2096         return src_len;
2097 }