source/lib/util_unistr.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 1.9.
   4    Samba utility functions
   5    Copyright (C) Andrew Tridgell 1992-1998
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 */
  21
  22 #include "includes.h"
  23
  24  smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
  25                                                                 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
  26                                                                 (smb_ucs2_t)'\r', 0 };
  27 /*
  28  * The following are the codepage to ucs2 and vica versa maps.
  29  * These are dynamically loaded from a unicode translation file.
  30  */
  31
  32 static smb_ucs2_t *doscp_to_ucs2;
  33 static uint16 *ucs2_to_doscp;
  34
  35 static smb_ucs2_t *unixcp_to_ucs2;
  36 static uint16 *ucs2_to_unixcp;
  37
  38 #ifndef MAXUNI
  39 #define MAXUNI 1024
  40 #endif
  41
  42 /*******************************************************************
  43  Write a string in (little-endian) unicode format. src is in
  44  the current UNIX character set. len is the length in bytes of the
  45  string pointed to by dst.
  46
  47  if null_terminate is True then null terminate the packet (adds 2 bytes)
  48
  49  the return value is the length in bytes consumed by the string, including the
  50  null termination if applied
  51 ********************************************************************/
  52
  53 size_t unix_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
  54 {
  55         size_t ret = 0;
  56         while (*src && (len >= 2)) {
  57                 size_t skip = get_character_len(*src);
  58                 smb_ucs2_t val = (*src & 0xff);
  59
  60                 /*
  61                  * If this is a multibyte character (and all DOS/Windows
  62                  * codepages have at maximum 2 byte multibyte characters)
  63                  * then work out the index value for the unicode conversion.
  64                  */
  65
  66                 if (skip == 2)
  67                         val = ((val << 8) | (src[1] & 0xff));
  68
  69                 SSVAL(dst,ret,unixcp_to_ucs2[val]);
  70                 ret += 2;
  71                 len -= 2;
  72                 if (skip)
  73                         src += skip;
  74                 else
  75                         src++;
  76         }
  77         if (null_terminate) {
  78                 SSVAL(dst,ret,0);
  79                 ret += 2;
  80         }
  81         return(ret);
  82 }
  83
  84 /*******************************************************************
  85  Write a string in (little-endian) unicode format. src is in
  86  the current DOS codepage. len is the length in bytes of the
  87  string pointed to by dst.
  88
  89  if null_terminate is True then null terminate the packet (adds 2 bytes)
  90
  91  the return value is the length in bytes consumed by the string, including the
  92  null termination if applied
  93 ********************************************************************/
  94
  95 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
  96 {
  97         size_t ret = 0;
  98         while (*src && (len >= 2)) {
  99                 size_t skip = get_character_len(*src);
 100                 smb_ucs2_t val = (*src & 0xff);
 101
 102                 /*
 103                  * If this is a multibyte character (and all DOS/Windows
 104                  * codepages have at maximum 2 byte multibyte characters)
 105                  * then work out the index value for the unicode conversion.
 106                  */
 107
 108                 if (skip == 2)
 109                         val = ((val << 8) | (src[1] & 0xff));
 110
 111                 SSVAL(dst,ret,doscp_to_ucs2[val]);
 112                 ret += 2;
 113                 len -= 2;
 114                 if (skip)
 115                         src += skip;
 116                 else
 117                         src++;
 118         }
 119         if (null_terminate) {
 120                 SSVAL(dst,ret,0);
 121                 ret += 2;
 122         }
 123         return(ret);
 124 }
 125
 126 /*******************************************************************
 127  Pull a DOS codepage string out of a UNICODE array. len is in bytes.
 128 ********************************************************************/
 129
 130 void unistr_to_dos(char *dest, const char *src, size_t len)
 131 {
 132         char *destend = dest + len;
 133
 134         while (dest < destend) {
 135                 uint16 ucs2_val = SVAL(src,0);
 136                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 137
 138                 src += 2;
 139
 140                 if (ucs2_val == 0)
 141                         break;
 142
 143                 if (cp_val < 256)
 144                         *dest++ = (char)cp_val;
 145                 else {
 146                         *dest++ = (cp_val >> 8) & 0xff;
 147                         *dest++ = (cp_val & 0xff);
 148                 }
 149         }
 150
 151         *dest = 0;
 152 }
 153
 154 /*******************************************************************
 155  Skip past a unicode string, but not more than len. Always move
 156  past a terminating zero if found.
 157 ********************************************************************/
 158
 159 char *skip_unibuf(char *src, size_t len)
 160 {
 161     char *srcend = src + len;
 162
 163     while (src < srcend && SVAL(src,0))
 164         src += 2;
 165
 166     if(!SVAL(src,0))
 167         src += 2;
 168
 169     return src;
 170 }
 171
 172 /*******************************************************************
 173  Return a DOS codepage version of a little-endian unicode string.
 174  len is the filename length (ignoring any terminating zero) in uin16
 175  units. Always null terminates.
 176  Hack alert: uses fixed buffer(s).
 177 ********************************************************************/
 178
 179 char *dos_unistrn2(uint16 *src, int len)
 180 {
 181         static char lbufs[8][MAXUNI];
 182         static int nexti;
 183         char *lbuf = lbufs[nexti];
 184         char *p;
 185
 186         nexti = (nexti+1)%8;
 187
 188         for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
 189                 uint16 ucs2_val = SVAL(src,0);
 190                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 191
 192                 if (cp_val < 256)
 193                         *p++ = (char)cp_val;
 194                 else {
 195                         *p++ = (cp_val >> 8) & 0xff;
 196                         *p++ = (cp_val & 0xff);
 197                 }
 198         }
 199
 200         *p = 0;
 201         return lbuf;
 202 }
 203
 204 static char lbufs[8][MAXUNI];
 205 static int nexti;
 206
 207 /*******************************************************************
 208  Return a DOS codepage version of a little-endian unicode string.
 209  Hack alert: uses fixed buffer(s).
 210 ********************************************************************/
 211
 212 char *dos_unistr2(uint16 *src)
 213 {
 214         char *lbuf = lbufs[nexti];
 215         char *p;
 216
 217         nexti = (nexti+1)%8;
 218
 219         for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
 220                 uint16 ucs2_val = SVAL(src,0);
 221                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 222
 223                 if (cp_val < 256)
 224                         *p++ = (char)cp_val;
 225                 else {
 226                         *p++ = (cp_val >> 8) & 0xff;
 227                         *p++ = (cp_val & 0xff);
 228                 }
 229         }
 230
 231         *p = 0;
 232         return lbuf;
 233 }
 234
 235 /*******************************************************************
 236 Return a DOS codepage version of a little-endian unicode string
 237 ********************************************************************/
 238
 239 char *dos_unistr2_to_str(UNISTR2 *str)
 240 {
 241         char *lbuf = lbufs[nexti];
 242         char *p;
 243         uint16 *src = str->buffer;
 244
 245         nexti = (nexti+1)%8;
 246
 247         for (p = lbuf; (p - lbuf < MAXUNI-3) && (src - str->buffer < str->uni_str_len) && *src; src++) {
 248                 uint16 ucs2_val = SVAL(src,0);
 249                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 250
 251                 if (cp_val < 256)
 252                         *p++ = (char)cp_val;
 253                 else {
 254                         *p++ = (cp_val >> 8) & 0xff;
 255                         *p++ = (cp_val & 0xff);
 256                 }
 257         }
 258
 259         *p = 0;
 260         return lbuf;
 261 }
 262
 263 /*******************************************************************
 264  Put an ASCII string into a UNICODE array (uint16's).
 265  use little-endian ucs2
 266  ********************************************************************/
 267 void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
 268 {
 269         uint16 *destend = dest + maxlen;
 270         char c;
 271
 272         while (dest < destend) {
 273                 c = *(src++);
 274                 if (c == 0)
 275                         break;
 276
 277                 SSVAL(dest, 0, c);
 278                 dest++;
 279         }
 280
 281         *dest = 0;
 282 }
 283
 284 /*******************************************************************
 285  Pull an ASCII string out of a UNICODE array (uint16's).
 286  ********************************************************************/
 287
 288 void unistr_to_ascii(char *dest, const uint16 *src, int len)
 289 {
 290         char *destend = dest + len;
 291         uint16 c;
 292
 293         if (src == NULL) {
 294                 *dest = '\0';
 295                 return;
 296         }
 297
 298         /* normal code path for a valid 'src' */
 299         while (dest < destend) {
 300                 c = SVAL(src, 0);
 301                 src++;
 302                 if (c == 0)
 303                         break;
 304
 305                 *(dest++) = (char)c;
 306         }
 307
 308         *dest = 0;
 309         return;
 310 }
 311
 312 /*******************************************************************
 313  Convert a (little-endian) UNISTR2 structure to an ASCII string, either
 314  DOS or UNIX codepage.
 315 ********************************************************************/
 316
 317 static void unistr2_to_mbcp(char *dest, const UNISTR2 *str, size_t maxlen, uint16 *ucs2_to_mbcp)
 318 {
 319         char *p;
 320         uint16 *src;
 321         size_t len;
 322
 323         if (str == NULL) {
 324                 *dest='\0';
 325                 return;
 326         }
 327
 328         src = str->buffer;
 329
 330         len = MIN(str->uni_str_len, maxlen);
 331         if (len == 0) {
 332                 *dest='\0';
 333                 return;
 334         }
 335
 336         for (p = dest; (p-dest < maxlen-3) && (src - str->buffer < str->uni_str_len) && *src; src++) {
 337                 uint16 ucs2_val = SVAL(src,0);
 338                 uint16 cp_val = ucs2_to_mbcp[ucs2_val];
 339
 340                 if (cp_val < 256)
 341                         *p++ = (char)cp_val;
 342                 else {
 343                         *p++ = (cp_val >> 8) & 0xff;
 344                         *p++ = (cp_val & 0xff);
 345                 }
 346         }
 347
 348         *p = 0;
 349 }
 350
 351 /*******************************************************************
 352  Convert a (little-endian) UNISTR2 structure to an ASCII string
 353  Warning: this version does DOS codepage.
 354 ********************************************************************/
 355
 356 void unistr2_to_dos(char *dest, const UNISTR2 *str, size_t maxlen)
 357 {
 358         unistr2_to_mbcp(dest, str, maxlen, ucs2_to_doscp);
 359 }
 360
 361 /*******************************************************************
 362  Convert a (little-endian) UNISTR2 structure to an ASCII string
 363  Warning: this version does UNIX codepage.
 364 ********************************************************************/
 365
 366 void unistr2_to_unix(char *dest, const UNISTR2 *str, size_t maxlen)
 367 {
 368         unistr2_to_mbcp(dest, str, maxlen, ucs2_to_unixcp);
 369 }
 370
 371 /*******************************************************************
 372 Return a number stored in a buffer
 373 ********************************************************************/
 374
 375 uint32 buffer2_to_uint32(BUFFER2 *str)
 376 {
 377         if (str->buf_len == 4)
 378                 return IVAL(str->buffer, 0);
 379         else
 380                 return 0;
 381 }
 382
 383 /*******************************************************************
 384 Return a DOS codepage version of a NOTunicode string
 385 ********************************************************************/
 386
 387 char *dos_buffer2_to_str(BUFFER2 *str)
 388 {
 389         char *lbuf = lbufs[nexti];
 390         char *p;
 391         uint16 *src = str->buffer;
 392
 393         nexti = (nexti+1)%8;
 394
 395         for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2) && *src; src++) {
 396                 uint16 ucs2_val = SVAL(src,0);
 397                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 398
 399                 if (cp_val < 256)
 400                         *p++ = (char)cp_val;
 401                 else {
 402                         *p++ = (cp_val >> 8) & 0xff;
 403                         *p++ = (cp_val & 0xff);
 404                 }
 405         }
 406
 407         *p = 0;
 408         return lbuf;
 409 }
 410
 411 /*******************************************************************
 412  Return a dos codepage version of a NOTunicode string
 413 ********************************************************************/
 414
 415 char *dos_buffer2_to_multistr(BUFFER2 *str)
 416 {
 417         char *lbuf = lbufs[nexti];
 418         char *p;
 419         uint16 *src = str->buffer;
 420
 421         nexti = (nexti+1)%8;
 422
 423         for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2); src++) {
 424                 if (*src == 0) {
 425                         *p++ = ' ';
 426                 } else {
 427                         uint16 ucs2_val = SVAL(src,0);
 428                         uint16 cp_val = ucs2_to_doscp[ucs2_val];
 429
 430                         if (cp_val < 256)
 431                                 *p++ = (char)cp_val;
 432                         else {
 433                                 *p++ = (cp_val >> 8) & 0xff;
 434                                 *p++ = (cp_val & 0xff);
 435                         }
 436                 }
 437         }
 438
 439         *p = 0;
 440         return lbuf;
 441 }
 442
 443 /*******************************************************************
 444  Create a null-terminated unicode string from a null-terminated DOS
 445  codepage string.
 446  Return number of unicode chars copied, excluding the null character.
 447  Unicode strings created are in little-endian format.
 448 ********************************************************************/
 449
 450 size_t dos_struni2(char *dst, const char *src, size_t max_len)
 451 {
 452         size_t len = 0;
 453
 454         if (dst == NULL)
 455                 return 0;
 456
 457         if (src != NULL) {
 458                 for (; (len < max_len-2) && *src; len++, dst +=2) {
 459                         size_t skip = get_character_len(*src);
 460                         smb_ucs2_t val = (*src & 0xff);
 461
 462                         /*
 463                          * If this is a multibyte character (and all DOS/Windows
 464                          * codepages have at maximum 2 byte multibyte characters)
 465                          * then work out the index value for the unicode conversion.
 466                          */
 467
 468                         if (skip == 2)
 469                                 val = ((val << 8) | (src[1] & 0xff));
 470
 471                         SSVAL(dst,0,doscp_to_ucs2[val]);
 472                         if (skip)
 473                                 src += skip;
 474                         else
 475                                 src++;
 476                 }
 477         }
 478
 479         SSVAL(dst,0,0);
 480
 481         return len;
 482 }
 483
 484 /*******************************************************************
 485  Return a DOS codepage version of a little-endian unicode string.
 486  Hack alert: uses fixed buffer(s).
 487 ********************************************************************/
 488
 489 char *dos_unistr(char *buf)
 490 {
 491         char *lbuf = lbufs[nexti];
 492         uint16 *src = (uint16 *)buf;
 493         char *p;
 494
 495         nexti = (nexti+1)%8;
 496
 497         for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
 498                 uint16 ucs2_val = SVAL(src,0);
 499                 uint16 cp_val = ucs2_to_doscp[ucs2_val];
 500
 501                 if (cp_val < 256)
 502                         *p++ = (char)cp_val;
 503                 else {
 504                         *p++ = (cp_val >> 8) & 0xff;
 505                         *p++ = (cp_val & 0xff);
 506                 }
 507         }
 508
 509         *p = 0;
 510         return lbuf;
 511 }
 512
 513 /*******************************************************************
 514  returns the length in number of wide characters
 515  ******************************************************************/
 516 int unistrlen(uint16 *s)
 517 {
 518         int len;
 519
 520         if (!s)
 521                 return -1;
 522
 523         for (len=0; *s; s++,len++);
 524
 525         return len;
 526 }
 527
 528 /*******************************************************************
 529  Strcpy for unicode strings.  returns length (in num of wide chars)
 530 ********************************************************************/
 531
 532 int unistrcpy(uint16 *dst, uint16 *src)
 533 {
 534         int num_wchars = 0;
 535
 536         while (*src) {
 537                 *dst++ = *src++;
 538                 num_wchars++;
 539         }
 540         *dst = 0;
 541
 542         return num_wchars;
 543 }
 544
 545 /*******************************************************************
 546  Free any existing maps.
 547 ********************************************************************/
 548
 549 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 550 {
 551         /* this handles identity mappings where we share the pointer */
 552         if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
 553                 *pp_ucs2_to_cp = NULL;
 554         }
 555
 556         SAFE_FREE(*pp_cp_to_ucs2);
 557         SAFE_FREE(*pp_ucs2_to_cp);
 558 }
 559
 560 /*******************************************************************
 561  Build a default (null) codepage to unicode map.
 562 ********************************************************************/
 563
 564 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 565 {
 566   int i;
 567
 568   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 569
 570   if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
 571     DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
 572     abort();
 573   }
 574
 575   *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
 576   for (i = 0; i < 65536; i++)
 577     (*pp_cp_to_ucs2)[i] = i;
 578 }
 579
 580 /*******************************************************************
 581  Load a codepage to unicode and vica-versa map.
 582 ********************************************************************/
 583
 584 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 585 {
 586   pstring unicode_map_file_name;
 587   FILE *fp = NULL;
 588   SMB_STRUCT_STAT st;
 589   smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
 590   uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
 591   size_t cp_to_ucs2_size;
 592   size_t ucs2_to_cp_size;
 593   size_t i;
 594   size_t size;
 595   char buf[UNICODE_MAP_HEADER_SIZE];
 596
 597   DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
 598
 599   if (*codepage == '\0')
 600     goto clean_and_exit;
 601
 602   if(strlen(lp_codepagedir()) + 13 + strlen(codepage) >
 603      sizeof(unicode_map_file_name)) {
 604     DEBUG(0,("load_unicode_map: filename too long to load\n"));
 605     goto clean_and_exit;
 606   }
 607
 608   pstrcpy(unicode_map_file_name, lp_codepagedir());
 609   pstrcat(unicode_map_file_name, "/");
 610   pstrcat(unicode_map_file_name, "unicode_map.");
 611   pstrcat(unicode_map_file_name, codepage);
 612
 613   if(sys_stat(unicode_map_file_name,&st)!=0) {
 614     DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
 615               unicode_map_file_name));
 616     goto clean_and_exit;
 617   }
 618
 619   size = st.st_size;
 620
 621   if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
 622     DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
 623 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
 624     goto clean_and_exit;
 625   }
 626
 627   if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
 628     DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
 629               unicode_map_file_name, strerror(errno)));
 630     goto clean_and_exit;
 631   }
 632
 633   if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
 634     DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
 635               unicode_map_file_name, strerror(errno)));
 636     goto clean_and_exit;
 637   }
 638
 639   /* Check the version value */
 640   if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
 641     DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
 642 Needed %hu, got %hu.\n",
 643           unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
 644           SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
 645     goto clean_and_exit;
 646   }
 647
 648   /* Check the codepage value */
 649   if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
 650     DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
 651 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
 652     goto clean_and_exit;
 653   }
 654
 655   ucs2_to_cp_size = 2*65536;
 656   if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
 657     /*
 658      * This is a multibyte code page.
 659      */
 660     cp_to_ucs2_size = 2*65536;
 661   } else {
 662     /*
 663      * Single byte code page.
 664      */
 665     cp_to_ucs2_size = 2*256;
 666   }
 667
 668   /*
 669    * Free any old translation tables.
 670    */
 671
 672   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 673
 674   if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
 675     DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
 676     goto clean_and_exit;
 677   }
 678
 679   if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
 680     DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
 681     goto clean_and_exit;
 682   }
 683
 684   if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
 685     DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
 686               unicode_map_file_name, strerror(errno)));
 687     goto clean_and_exit;
 688   }
 689
 690   if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
 691     DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
 692               unicode_map_file_name, strerror(errno)));
 693     goto clean_and_exit;
 694   }
 695
 696   /*
 697    * Now ensure the 16 bit values are in the correct endianness.
 698    */
 699
 700   for (i = 0; i < cp_to_ucs2_size/2; i++)
 701     cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
 702
 703   for (i = 0; i < ucs2_to_cp_size/2; i++)
 704     ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
 705
 706   fclose(fp);
 707
 708   *pp_cp_to_ucs2 = cp_to_ucs2;
 709   *pp_ucs2_to_cp = ucs2_to_cp;
 710
 711   return True;
 712
 713 clean_and_exit:
 714
 715   /* pseudo destructor :-) */
 716
 717   if(fp != NULL)
 718     fclose(fp);
 719
 720   free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
 721
 722   default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
 723
 724   return False;
 725 }
 726
 727 /*******************************************************************
 728  Load a dos codepage to unicode and vica-versa map.
 729 ********************************************************************/
 730
 731 BOOL load_dos_unicode_map(int codepage)
 732 {
 733   fstring codepage_str;
 734
 735   slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
 736   DEBUG(10,("load_dos_unicode_map: %s\n", codepage_str));
 737   return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
 738 }
 739
 740 /*******************************************************************
 741  Load a UNIX codepage to unicode and vica-versa map.
 742 ********************************************************************/
 743
 744 BOOL load_unix_unicode_map(const char *unix_char_set, BOOL override)
 745 {
 746         static BOOL init_done;
 747         fstring upper_unix_char_set;
 748
 749         fstrcpy(upper_unix_char_set, unix_char_set);
 750         strupper(upper_unix_char_set);
 751
 752         DEBUG(10,("load_unix_unicode_map: %s (init_done=%d, override=%d)\n",
 753                 upper_unix_char_set, (int)init_done, (int)override ));
 754
 755         if (!init_done)
 756                 init_done = True;
 757         else if (!override)
 758                 return True;
 759
 760         return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
 761 }
 762
 763 /*******************************************************************
 764  The following functions reproduce many of the non-UNICODE standard
 765  string functions in Samba.
 766 ********************************************************************/
 767
 768 /*******************************************************************
 769  Convert a UNICODE string to multibyte format. Note that the 'src' is in
 770  native byte order, not little endian. Always zero terminates.
 771  dst_len is in bytes.
 772 ********************************************************************/
 773
 774 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
 775                                   size_t dst_len, const uint16 *ucs2_to_cp)
 776 {
 777         size_t dst_pos;
 778
 779         for(dst_pos = 0; (dst_pos < dst_len - 1) && *src;) {
 780                 smb_ucs2_t val = ucs2_to_cp[*src++];
 781                 if(val < 256) {
 782                         dst[dst_pos++] = (char)val;
 783                 } else {
 784
 785                         if(dst_pos >= dst_len - 2)
 786                                 break;
 787
 788                         /*
 789                          * A 2 byte value is always written as
 790                          * high/low into the buffer stream.
 791                          */
 792
 793                         dst[dst_pos++] = (char)((val >> 8) & 0xff);
 794                         dst[dst_pos++] = (char)(val & 0xff);
 795                 }
 796         }
 797
 798         dst[dst_pos] = '\0';
 799
 800         return dst;
 801 }
 802
 803 /*******************************************************************
 804  Convert a multibyte string to UNICODE format. Note that the 'dst' is in
 805  native byte order, not little endian. Always zero terminates.
 806  dst_len is in bytes.
 807 ********************************************************************/
 808
 809 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
 810                                  size_t dst_len, smb_ucs2_t *cp_to_ucs2)
 811 {
 812         size_t i;
 813
 814         dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
 815
 816         for(i = 0; (i < (dst_len  - 1)) && *src;) {
 817                 size_t skip = skip_multibyte_char(*src);
 818                 smb_ucs2_t val = (*src & 0xff);
 819
 820                 /*
 821                  * If this is a multibyte character
 822                  * then work out the index value for the unicode conversion.
 823                  */
 824
 825                 if (skip == 2)
 826                         val = ((val << 8) | (src[1] & 0xff));
 827
 828                 dst[i++] = cp_to_ucs2[val];
 829                 if (skip)
 830                         src += skip;
 831                 else
 832                         src++;
 833         }
 834
 835         dst[i] = 0;
 836
 837         return dst;
 838 }
 839
 840 /*******************************************************************
 841  Convert a UNICODE string to multibyte format. Note that the 'src' is in
 842  native byte order, not little endian. Always zero terminates.
 843  This function may be replaced if the MB  codepage format is an
 844  encoded one (ie. utf8, hex). See the code in lib/kanji.c
 845  for details. dst_len is in bytes.
 846 ********************************************************************/
 847
 848 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
 849 {
 850         return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
 851 }
 852
 853 /*******************************************************************
 854  Convert a UNIX string to UNICODE format. Note that the 'dst' is in
 855  native byte order, not little endian. Always zero terminates.
 856  This function may be replaced if the UNIX codepage format is a
 857  multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
 858  for details. dst_len is in bytes, not ucs2 units.
 859 ********************************************************************/
 860
 861 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
 862 {
 863         return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
 864 }
 865
 866 /*******************************************************************
 867  Convert a single UNICODE character to unix character. Returns the
 868  number of bytes in the unix character.
 869 ********************************************************************/
 870
 871 size_t unicode_to_unix_char(char *dst, const smb_ucs2_t src)
 872 {
 873         smb_ucs2_t val = ucs2_to_unixcp[src];
 874         if(val < 256) {
 875                 *dst = (char)val;
 876                 return (size_t)1;
 877         }
 878         /*
 879          * A 2 byte value is always written as
 880          * high/low into the buffer stream.
 881          */
 882
 883         dst[0] = (char)((val >> 8) & 0xff);
 884         dst[1] = (char)(val & 0xff);
 885         return (size_t)2;
 886 }
 887
 888 /*******************************************************************
 889  Convert a UNICODE string to DOS format. Note that the 'src' is in
 890  native byte order, not little endian. Always zero terminates.
 891  dst_len is in bytes.
 892 ********************************************************************/
 893
 894 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
 895 {
 896         return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
 897 }
 898
 899 /*******************************************************************
 900  Convert a single UNICODE character to DOS codepage. Returns the
 901  number of bytes in the DOS codepage character.
 902 ********************************************************************/
 903
 904 size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src)
 905 {
 906         smb_ucs2_t val = ucs2_to_doscp[src];
 907         if(val < 256) {
 908                 *dst = (char)val;
 909                 return (size_t)1;
 910         }
 911         /*
 912          * A 2 byte value is always written as
 913          * high/low into the buffer stream.
 914          */
 915
 916         dst[0] = (char)((val >> 8) & 0xff);
 917         dst[1] = (char)(val & 0xff);
 918         return (size_t)2;
 919 }
 920
 921 /*******************************************************************
 922  Convert a DOS string to UNICODE format. Note that the 'dst' is in
 923  native byte order, not little endian. Always zero terminates.
 924  This function may be replaced if the DOS codepage format is a
 925  multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
 926  for details. dst_len is in bytes, not ucs2 units.
 927 ********************************************************************/
 928
 929 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
 930 {
 931         return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
 932 }
 933
 934 /*******************************************************************
 935  Count the number of characters in a smb_ucs2_t string.
 936 ********************************************************************/
 937
 938 size_t strlen_w(const smb_ucs2_t *src)
 939 {
 940   size_t len;
 941
 942   for(len = 0; *src++; len++)
 943     ;
 944
 945   return len;
 946 }
 947
 948 /*******************************************************************
 949  Safe wstring copy into a known length string. maxlength includes
 950  the terminating zero. maxlength is in ucs2 units.
 951 ********************************************************************/
 952
 953 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
 954 {
 955     size_t ucs2_len;
 956
 957     if (!dest) {
 958         DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
 959         return NULL;
 960     }
 961
 962     if (!src) {
 963         *dest = 0;
 964         return dest;
 965     }
 966
 967         maxlength /= sizeof(smb_ucs2_t);
 968
 969         ucs2_len = strlen_w(src);
 970
 971     if (ucs2_len >= maxlength) {
 972                 fstring out;
 973         DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
 974                         (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
 975                         unicode_to_unix(out,src,sizeof(out))) );
 976                 ucs2_len = maxlength - 1;
 977     }
 978
 979     memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
 980     dest[ucs2_len] = 0;
 981     return dest;
 982 }
 983
 984 /*******************************************************************
 985  Safe string cat into a string. maxlength includes the terminating zero.
 986  maxlength is in ucs2 units.
 987 ********************************************************************/
 988
 989 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
 990 {
 991     size_t ucs2_src_len, ucs2_dest_len;
 992
 993     if (!dest) {
 994         DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
 995         return NULL;
 996     }
 997
 998     if (!src)
 999         return dest;
1000
1001     ucs2_src_len = strlen_w(src);
1002     ucs2_dest_len = strlen_w(dest);
1003
1004     if (ucs2_src_len + ucs2_dest_len >= maxlength) {
1005                 fstring out;
1006                 int new_len = maxlength - ucs2_dest_len - 1;
1007         DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
1008                         (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
1009                         unicode_to_unix(out,src,sizeof(out))) );
1010         ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
1011     }
1012
1013     memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
1014     dest[ucs2_dest_len + ucs2_src_len] = 0;
1015     return dest;
1016 }
1017
1018 /*******************************************************************
1019  Compare the two strings s1 and s2.
1020 ********************************************************************/
1021
1022 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1023 {
1024         smb_ucs2_t c1, c2;
1025
1026         for (;;) {
1027                 c1 = *s1++;
1028                 c2 = *s2++;
1029
1030                 if (c1 != c2)
1031                         return c1 - c2;
1032
1033                 if (c1 == 0)
1034                         break;
1035         }
1036         return 0;
1037 }
1038
1039 /*******************************************************************
1040  Compare the first n characters of s1 to s2. len is in ucs2 units.
1041 ********************************************************************/
1042
1043 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
1044 {
1045         smb_ucs2_t c1, c2;
1046
1047         for (; len != 0; --len) {
1048                 c1 = *s1++;
1049                 c2 = *s2++;
1050
1051                 if (c1 != c2)
1052                         return c1 - c2;
1053
1054                 if (c1 == 0)
1055                         break;
1056
1057         }
1058         return 0;
1059 }
1060
1061 /*******************************************************************
1062  Search string s2 from s1.
1063 ********************************************************************/
1064
1065 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1066 {
1067         size_t len = strlen_w(s2);
1068
1069         if (!*s2)
1070                 return (smb_ucs2_t *)s1;
1071
1072         for(;*s1; s1++) {
1073                 if (*s1 == *s2) {
1074                         if (strncmp_w(s1, s2, len) == 0)
1075                                 return (smb_ucs2_t *)s1;
1076                 }
1077         }
1078         return NULL;
1079 }
1080
1081 /*******************************************************************
1082  Search for ucs2 char c from the beginning of s.
1083 ********************************************************************/
1084
1085 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1086 {
1087         do {
1088                 if (*s == c)
1089                         return (smb_ucs2_t *)s;
1090         } while (*s++);
1091
1092         return NULL;
1093 }
1094
1095 /*******************************************************************
1096  Search for ucs2 char c from the end of s.
1097 ********************************************************************/
1098
1099 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1100 {
1101         smb_ucs2_t *retval = 0;
1102
1103         do {
1104                 if (*s == c)
1105                         retval = (smb_ucs2_t *)s;
1106         } while (*s++);
1107
1108         return retval;
1109 }
1110
1111 /*******************************************************************
1112  Search token from s1 separated by any ucs2 char of s2.
1113 ********************************************************************/
1114
1115 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
1116 {
1117         static smb_ucs2_t *s = NULL;
1118         smb_ucs2_t *q;
1119
1120         if (!s1) {
1121                 if (!s)
1122                         return NULL;
1123                 s1 = s;
1124         }
1125
1126         for (q = s1; *s1; s1++) {
1127                 smb_ucs2_t *p = strchr_w(s2, *s1);
1128                 if (p) {
1129                         if (s1 != q) {
1130                                 s = s1 + 1;
1131                                 *s1 = '\0';
1132                                 return q;
1133                         }
1134                         q = s1 + 1;
1135                 }
1136         }
1137
1138         s = NULL;
1139         if (*q)
1140                 return q;
1141
1142         return NULL;
1143 }
1144
1145 /*******************************************************************
1146  Duplicate a ucs2 string.
1147 ********************************************************************/
1148
1149 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1150 {
1151         size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1152         smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1153     if (newstr == NULL)
1154         return NULL;
1155     safe_strcpy_w(newstr, s, newlen);
1156     return newstr;
1157 }
1158
1159 /*******************************************************************
1160  Mapping tables for UNICODE character. Allows toupper/tolower and
1161  isXXX functions to work.
1162
1163  tridge: split into 2 pieces. This saves us 5/6 of the memory
1164  with a small speed penalty
1165  The magic constants are the lower/upper range of the tables two
1166  parts
1167 ********************************************************************/
1168
1169 typedef struct {
1170         smb_ucs2_t lower;
1171         smb_ucs2_t upper;
1172         unsigned char flags;
1173 } smb_unicode_table_t;
1174
1175 #define TABLE1_BOUNDARY 9450
1176 #define TABLE2_BOUNDARY 64256
1177
1178 static smb_unicode_table_t map_table1[] = {
1179 #include "unicode_map_table1.h"
1180 };
1181
1182 static smb_unicode_table_t map_table2[] = {
1183 #include "unicode_map_table2.h"
1184 };
1185
1186 static unsigned char map_table_flags(smb_ucs2_t v)
1187 {
1188         if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1189         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1190         return 0;
1191 }
1192
1193 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1194 {
1195         if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1196         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1197         return v;
1198 }
1199
1200 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1201 {
1202         if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1203         if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1204         return v;
1205 }
1206
1207 /*******************************************************************
1208  Is an upper case wchar.
1209 ********************************************************************/
1210
1211 int isupper_w( smb_ucs2_t val)
1212 {
1213         return (map_table_flags(val) & UNI_UPPER);
1214 }
1215
1216 /*******************************************************************
1217  Is a lower case wchar.
1218 ********************************************************************/
1219
1220 int islower_w( smb_ucs2_t val)
1221 {
1222         return (map_table_flags(val) & UNI_LOWER);
1223 }
1224
1225 /*******************************************************************
1226  Is a digit wchar.
1227 ********************************************************************/
1228
1229 int isdigit_w( smb_ucs2_t val)
1230 {
1231         return (map_table_flags(val) & UNI_DIGIT);
1232 }
1233
1234 /*******************************************************************
1235  Is a hex digit wchar.
1236 ********************************************************************/
1237
1238 int isxdigit_w( smb_ucs2_t val)
1239 {
1240         return (map_table_flags(val) & UNI_XDIGIT);
1241 }
1242
1243 /*******************************************************************
1244  Is a space wchar.
1245 ********************************************************************/
1246
1247 int isspace_w( smb_ucs2_t val)
1248 {
1249         return (map_table_flags(val) & UNI_SPACE);
1250 }
1251
1252 /*******************************************************************
1253  Convert a wchar to upper case.
1254 ********************************************************************/
1255
1256 smb_ucs2_t toupper_w( smb_ucs2_t val )
1257 {
1258         return map_table_upper(val);
1259 }
1260
1261 /*******************************************************************
1262  Convert a wchar to lower case.
1263 ********************************************************************/
1264
1265 smb_ucs2_t tolower_w( smb_ucs2_t val )
1266 {
1267         return map_table_lower(val);
1268 }
1269
1270 static smb_ucs2_t *last_ptr = NULL;
1271
1272 void set_first_token_w(smb_ucs2_t *ptr)
1273 {
1274         last_ptr = ptr;
1275 }
1276
1277 /****************************************************************************
1278  Get the next token from a string, return False if none found
1279  handles double-quotes.
1280  Based on a routine by GJC@VILLAGE.COM.
1281  Extensively modified by Andrew.Tridgell@anu.edu.au
1282  bufsize is in bytes.
1283 ****************************************************************************/
1284
1285 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t',  (smb_ucs2_t)'\n',  (smb_ucs2_t)'\r', 0};
1286 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1287
1288 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1289 {
1290         smb_ucs2_t *s;
1291         BOOL quoted;
1292         size_t len=1;
1293
1294         /*
1295          * Convert bufsize to smb_ucs2_t units.
1296          */
1297
1298         bufsize /= sizeof(smb_ucs2_t);
1299
1300         if (!ptr)
1301                 ptr = &last_ptr;
1302         if (!ptr)
1303                 return(False);
1304
1305         s = *ptr;
1306
1307         /*
1308          * Default to simple separators.
1309          */
1310
1311         if (!sep)
1312                 sep = sep_list;
1313
1314         /*
1315          * Find the first non sep char.
1316          */
1317
1318         while(*s && strchr_w(sep,*s))
1319                 s++;
1320
1321         /*
1322          * Nothing left ?
1323          */
1324
1325         if (!*s)
1326                 return(False);
1327
1328         /*
1329          * Copy over the token.
1330          */
1331
1332         for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1333                 if (*s == quotechar) {
1334                         quoted = !quoted;
1335                 } else {
1336                         len++;
1337                         *buff++ = *s;
1338                 }
1339         }
1340
1341         *ptr = (*s) ? s+1 : s;
1342         *buff = 0;
1343         last_ptr = *ptr;
1344
1345         return(True);
1346 }
1347
1348 /****************************************************************************
1349  Convert list of tokens to array; dependent on above routine.
1350  Uses last_ptr from above - bit of a hack.
1351 ****************************************************************************/
1352
1353 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1354 {
1355         smb_ucs2_t *s=last_ptr;
1356         int ictok=0;
1357         smb_ucs2_t **ret, **iret;
1358
1359         if (!sep)
1360                 sep = sep_list;
1361
1362         while(*s && strchr_w(sep,*s))
1363                 s++;
1364
1365         /*
1366          * Nothing left ?
1367          */
1368
1369         if (!*s)
1370                 return(NULL);
1371
1372         do {
1373                 ictok++;
1374                 while(*s && (!strchr_w(sep,*s)))
1375                         s++;
1376                 while(*s && strchr_w(sep,*s))
1377                         *s++=0;
1378         } while(*s);
1379
1380         *ctok = ictok;
1381         s = last_ptr;
1382
1383         if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1384                 return NULL;
1385
1386         while(ictok--) {
1387                 *iret++=s;
1388                 while(*s++)
1389                         ;
1390                 while(!*s)
1391                         s++;
1392         }
1393
1394         return ret;
1395 }
1396
1397 /*******************************************************************
1398  Case insensitive string compararison.
1399 ********************************************************************/
1400
1401 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1402 {
1403         /*
1404          * Compare until we run out of string, either t or s, or find a difference.
1405          */
1406
1407         while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1408                 s++;
1409                 t++;
1410         }
1411
1412         return(toupper_w(*s) - toupper_w(*t));
1413 }
1414
1415 /*******************************************************************
1416  Case insensitive string compararison, length limited.
1417  n is in ucs2 units.
1418 ********************************************************************/
1419
1420 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1421 {
1422         /*
1423          * Compare until we run out of string, either t or s, or chars.
1424          */
1425
1426         while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1427                 s++;
1428                 t++;
1429                 n--;
1430         }
1431
1432     /*
1433          * Not run out of chars - strings are different lengths.
1434          */
1435
1436     if (n)
1437       return(toupper_w(*s) - toupper_w(*t));
1438
1439     /*
1440          * Identical up to where we run out of chars,
1441          * and strings are same length.
1442          */
1443
1444         return(0);
1445 }
1446
1447 /*******************************************************************
1448  Compare 2 strings.
1449 ********************************************************************/
1450
1451 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1452 {
1453         if (s1 == s2)
1454                 return(True);
1455         if (!s1 || !s2)
1456                 return(False);
1457
1458         return(StrCaseCmp_w(s1,s2)==0);
1459 }
1460
1461 /*******************************************************************
1462  Compare 2 strings up to and including the nth char. n is in ucs2
1463  units.
1464 ******************************************************************/
1465
1466 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1467 {
1468         if (s1 == s2)
1469                 return(True);
1470         if (!s1 || !s2 || !n)
1471                 return(False);
1472
1473         return(StrnCaseCmp_w(s1,s2,n)==0);
1474 }
1475
1476 /*******************************************************************
1477  Compare 2 strings (case sensitive).
1478 ********************************************************************/
1479
1480 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1481 {
1482         if (s1 == s2)
1483                 return(True);
1484         if (!s1 || !s2)
1485                 return(False);
1486
1487         return(strcmp_w(s1,s2)==0);
1488 }
1489
1490 /*******************************************************************
1491  Convert a string to lower case.
1492 ********************************************************************/
1493
1494 void strlower_w(smb_ucs2_t *s)
1495 {
1496         while (*s) {
1497                 if (isupper_w(*s))
1498                         *s = tolower_w(*s);
1499                 s++;
1500         }
1501 }
1502
1503 /*******************************************************************
1504  Convert a string to upper case.
1505 ********************************************************************/
1506
1507 void strupper_w(smb_ucs2_t *s)
1508 {
1509         while (*s) {
1510                 if (islower_w(*s))
1511                         *s = toupper_w(*s);
1512                 s++;
1513         }
1514 }
1515
1516 /*******************************************************************
1517  Convert a string to "normal" form.
1518 ********************************************************************/
1519
1520 void strnorm_w(smb_ucs2_t *s)
1521 {
1522         extern int case_default;
1523         if (case_default == CASE_UPPER)
1524                 strupper_w(s);
1525         else
1526                 strlower_w(s);
1527 }
1528
1529 /*******************************************************************
1530  Check if a string is in "normal" case.
1531 ********************************************************************/
1532
1533 BOOL strisnormal_w(smb_ucs2_t *s)
1534 {
1535         extern int case_default;
1536         if (case_default == CASE_UPPER)
1537                 return(!strhaslower_w(s));
1538
1539         return(!strhasupper_w(s));
1540 }
1541
1542 /****************************************************************************
1543  String replace.
1544 ****************************************************************************/
1545
1546 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1547 {
1548         while (*s) {
1549                 if (oldc == *s)
1550                         *s = newc;
1551                 s++;
1552         }
1553 }
1554
1555 /*******************************************************************
1556  Skip past some strings in a buffer. n is in bytes.
1557 ********************************************************************/
1558
1559 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1560 {
1561         while (n--)
1562                 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1563         return(buf);
1564 }
1565
1566 /*******************************************************************
1567  Count the number of characters in a string. Same as strlen_w in
1568  smb_ucs2_t string units.
1569 ********************************************************************/
1570
1571 size_t str_charnum_w(const smb_ucs2_t *s)
1572 {
1573         return strlen_w(s);
1574 }
1575
1576 /*******************************************************************
1577  Trim the specified elements off the front and back of a string.
1578 ********************************************************************/
1579
1580 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1581 {
1582         BOOL ret = False;
1583         size_t front_len = (front && *front) ? strlen_w(front) : 0;
1584         size_t back_len = (back && *back) ? strlen_w(back) : 0;
1585         size_t s_len;
1586
1587         while (front_len && strncmp_w(s, front, front_len) == 0) {
1588                 smb_ucs2_t *p = s;
1589                 ret = True;
1590
1591                 while (1) {
1592                         if (!(*p = p[front_len]))
1593                                 break;
1594                         p++;
1595                 }
1596         }
1597
1598         if(back_len) {
1599                 s_len = strlen_w(s);
1600                 while ((s_len >= back_len) &&
1601                         (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1602                         ret = True;
1603                         s[s_len - back_len] = 0;
1604                         s_len = strlen_w(s);
1605                 }
1606         }
1607
1608         return(ret);
1609 }
1610
1611 /****************************************************************************
1612  Does a string have any uppercase chars in it ?
1613 ****************************************************************************/
1614
1615 BOOL strhasupper_w(const smb_ucs2_t *s)
1616 {
1617         while (*s) {
1618                 if (isupper_w(*s))
1619                         return(True);
1620                 s++;
1621         }
1622         return(False);
1623 }
1624
1625 /****************************************************************************
1626  Does a string have any lowercase chars in it ?
1627 ****************************************************************************/
1628
1629 BOOL strhaslower_w(const smb_ucs2_t *s)
1630 {
1631         while (*s) {
1632                 if (islower(*s))
1633                         return(True);
1634                 s++;
1635         }
1636         return(False);
1637 }
1638
1639 /****************************************************************************
1640  Find the number of 'c' chars in a string.
1641 ****************************************************************************/
1642
1643 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1644 {
1645         size_t count=0;
1646
1647         while (*s) {
1648                 if (*s == c)
1649                         count++;
1650                 s++;
1651         }
1652         return(count);
1653 }
1654
1655 /*******************************************************************
1656  Return True if a string consists only of one particular character.
1657 ********************************************************************/
1658
1659 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1660 {
1661         if(s == NULL)
1662                 return False;
1663         if(!*s)
1664                 return False;
1665
1666         while (*s) {
1667                 if (*s != c)
1668                         return False;
1669                 s++;
1670         }
1671         return True;
1672 }
1673
1674 /*******************************************************************
1675  Paranoid strcpy into a buffer of given length (includes terminating
1676  zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1677  does *NOT* check for multibyte characters. Don't change it !
1678  maxlength is in ucs2 units.
1679 ********************************************************************/
1680
1681 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const smb_ucs2_t *other_safe_chars, size_t maxlength)
1682 {
1683         size_t len, i;
1684         smb_ucs2_t nullstr_w = (smb_ucs2_t)0;
1685
1686         if (!dest) {
1687                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1688                 return NULL;
1689         }
1690
1691         if (!src) {
1692                 *dest = 0;
1693                 return dest;
1694         }
1695
1696         len = strlen_w(src);
1697         if (len >= maxlength)
1698                 len = maxlength - 1;
1699
1700         if (!other_safe_chars)
1701                 other_safe_chars = &nullstr_w;
1702
1703         for(i = 0; i < len; i++) {
1704                 smb_ucs2_t val = src[i];
1705                 if(isupper_w(val) ||islower_w(val) || isdigit_w(val) || strchr_w(other_safe_chars, val))
1706                         dest[i] = src[i];
1707                 else
1708                         dest[i] = (smb_ucs2_t)'_';
1709         }
1710
1711         dest[i] = 0;
1712
1713         return dest;
1714 }
1715
1716 /****************************************************************************
1717  Like strncpy but always null terminates. Make sure there is room !
1718  The variable n should always be one less than the available size and is in
1719  ucs2 units.
1720 ****************************************************************************/
1721
1722 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1723 {
1724         smb_ucs2_t *d = dest;
1725         if (!dest)
1726                 return(NULL);
1727         if (!src) {
1728                 *dest = 0;
1729                 return(dest);
1730         }
1731
1732         while (n-- && (*d++ = *src++))
1733                 ;
1734         *d = 0;
1735         return(dest);
1736 }
1737
1738 /****************************************************************************
1739  Like strncpy but copies up to the character marker. Always null terminates.
1740  returns a pointer to the character marker in the source string (src).
1741  n is in ucs2 units.
1742 ****************************************************************************/
1743
1744 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1745 {
1746         smb_ucs2_t *p;
1747         size_t str_len;
1748
1749         p = strchr_w(src, c);
1750         if (p == NULL) {
1751                 fstring cval;
1752                 smb_ucs2_t mbcval[2];
1753                 mbcval[0] = c;
1754                 mbcval[1] = 0;
1755                 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1756                         unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1757                 return NULL;
1758         }
1759
1760         str_len = PTR_DIFF(p, src) + 1;
1761         safe_strcpy_w(dest, src, MIN(n, str_len));
1762
1763         return p;
1764 }
1765
1766 /*************************************************************
1767  Routine to get hex characters and turn them into a 16 byte array.
1768  The array can be variable length, and any non-hex-numeric
1769  characters are skipped.  "0xnn" or "0Xnn" is specially catered
1770  for. len is in bytes.
1771  Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1772 **************************************************************/
1773
1774 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1775 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1776                                                                 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1777                                                                 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1778                                                                 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1779
1780 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1781 {
1782         size_t i;
1783         size_t num_chars = 0;
1784         unsigned char   lonybble, hinybble;
1785         smb_ucs2_t *p1 = NULL, *p2 = NULL;
1786
1787         /*
1788          * Convert to smb_ucs2_t units.
1789          */
1790
1791         len /= sizeof(smb_ucs2_t);
1792
1793         for (i = 0; i < len && strhex[i] != 0; i++) {
1794                 if (strnequal_w(hexchars, hexprefix, 2)) {
1795                         i++; /* skip two chars */
1796                         continue;
1797                 }
1798
1799                 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1800                         break;
1801
1802                 i++; /* next hex digit */
1803
1804                 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1805                         break;
1806
1807                 /* get the two nybbles */
1808                 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1809                 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1810
1811                 p[num_chars] = (hinybble << 4) | lonybble;
1812                 num_chars++;
1813
1814                 p1 = NULL;
1815                 p2 = NULL;
1816         }
1817         return num_chars;
1818 }
1819
1820 /****************************************************************************
1821  Check if a string is part of a list.
1822 ****************************************************************************/
1823
1824 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1825 {
1826         wpstring tok;
1827         smb_ucs2_t *p=list;
1828
1829         if (!list)
1830                 return(False);
1831
1832         while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1833                 if (casesensitive) {
1834                         if (strcmp_w(tok,s) == 0)
1835                                 return(True);
1836                 } else {
1837                         if (StrCaseCmp_w(tok,s) == 0)
1838                                 return(True);
1839                 }
1840         }
1841         return(False);
1842 }
1843
1844 /* This is used to prevent lots of mallocs of size 2 */
1845 static smb_ucs2_t *null_string = NULL;
1846
1847 /****************************************************************************
1848  Set a string value, allocing the space for the string.
1849 ****************************************************************************/
1850
1851 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1852 {
1853         size_t l;
1854
1855         if (!null_string) {
1856                 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1857                         DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1858                 return False;
1859                 }
1860                 *null_string = 0;
1861         }
1862
1863         if (!src)
1864                 src = null_string;
1865
1866         l = strlen_w(src);
1867
1868         if (l == 0)
1869                 *dest = null_string;
1870         else {
1871                 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1872                 if ((*dest) == NULL) {
1873                         DEBUG(0,("Out of memory in string_init_w\n"));
1874                         return False;
1875                 }
1876
1877                 wpstrcpy(*dest,src);
1878         }
1879         return(True);
1880 }
1881
1882 /****************************************************************************
1883  Free a string value.
1884 ****************************************************************************/
1885
1886 void string_free_w(smb_ucs2_t **s)
1887 {
1888         if (!s || !(*s))
1889                 return;
1890         if (*s == null_string)
1891                 *s = NULL;
1892         SAFE_FREE(*s);
1893 }
1894
1895 /****************************************************************************
1896  Set a string value, allocing the space for the string, and deallocating any
1897  existing space.
1898 ****************************************************************************/
1899
1900 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1901 {
1902         string_free_w(dest);
1903
1904         return(string_init_w(dest,src));
1905 }
1906
1907 /****************************************************************************
1908  Substitute a string for a pattern in another string. Make sure there is
1909  enough room !
1910
1911  This routine looks for pattern in s and replaces it with
1912  insert. It may do multiple replacements.
1913
1914  Any of " ; ' $ or ` in the insert string are replaced with _
1915  if len==0 then no length check is performed
1916  len is in ucs2 units.
1917 ****************************************************************************/
1918
1919 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1920 {
1921         smb_ucs2_t *p;
1922         ssize_t ls,lp,li, i;
1923
1924         if (!insert || !pattern || !s)
1925                 return;
1926
1927         ls = (ssize_t)strlen_w(s);
1928         lp = (ssize_t)strlen_w(pattern);
1929         li = (ssize_t)strlen_w(insert);
1930
1931         if (!*pattern)
1932                 return;
1933
1934         while (lp <= ls && (p = strstr_w(s,pattern))) {
1935                 if (len && (ls + (li-lp) >= len)) {
1936                         fstring out;
1937                         DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1938                                  (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1939                                  unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1940                         break;
1941                 }
1942                 if (li != lp)
1943                         memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1944
1945                 for (i=0;i<li;i++) {
1946                         switch (insert[i]) {
1947                         case (smb_ucs2_t)'`':
1948                         case (smb_ucs2_t)'"':
1949                         case (smb_ucs2_t)'\'':
1950                         case (smb_ucs2_t)';':
1951                         case (smb_ucs2_t)'$':
1952                         case (smb_ucs2_t)'%':
1953                         case (smb_ucs2_t)'\r':
1954                         case (smb_ucs2_t)'\n':
1955                                 p[i] = (smb_ucs2_t)'_';
1956                                 break;
1957                         default:
1958                                 p[i] = insert[i];
1959                         }
1960                 }
1961                 s = p + li;
1962                 ls += (li-lp);
1963         }
1964 }
1965
1966 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1967 {
1968         string_sub_w(s, pattern, insert, sizeof(wfstring));
1969 }
1970
1971 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1972 {
1973         string_sub_w(s, pattern, insert, sizeof(wpstring));
1974 }
1975
1976 /****************************************************************************
1977  Similar to string_sub() but allows for any character to be substituted.
1978  Use with caution !
1979  if len==0 then no length check is performed.
1980 ****************************************************************************/
1981
1982 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1983 {
1984         smb_ucs2_t *p;
1985         ssize_t ls,lp,li;
1986
1987         if (!insert || !pattern || !s)
1988                 return;
1989
1990         ls = (ssize_t)strlen_w(s);
1991         lp = (ssize_t)strlen_w(pattern);
1992         li = (ssize_t)strlen_w(insert);
1993
1994         if (!*pattern)
1995                 return;
1996
1997         while (lp <= ls && (p = strstr_w(s,pattern))) {
1998                 if (len && (ls + (li-lp) >= len)) {
1999                         fstring out;
2000                         DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
2001                                  (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
2002                                  unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
2003                         break;
2004                 }
2005                 if (li != lp)
2006                         memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
2007
2008                 memcpy(p, insert, li*sizeof(smb_ucs2_t));
2009                 s = p + li;
2010                 ls += (li-lp);
2011         }
2012 }
2013
2014 /****************************************************************************
2015  Splits out the front and back at a separator.
2016 ****************************************************************************/
2017
2018 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
2019 {
2020     smb_ucs2_t *p = strrchr_w(path, sep);
2021
2022         if (p != NULL)
2023                 *p = 0;
2024
2025         if (front != NULL)
2026                 wpstrcpy(front, path);
2027
2028         if (p != NULL) {
2029                 if (back != NULL)
2030                         wpstrcpy(back, p+1);
2031                 *p = (smb_ucs2_t)'\\';
2032         } else {
2033                 if (back != NULL)
2034                         back[0] = 0;
2035         }
2036 }
2037
2038
2039 /****************************************************************************
2040  Write an octal as a string.
2041 ****************************************************************************/
2042
2043 smb_ucs2_t *octal_string_w(int i)
2044 {
2045         static smb_ucs2_t wret[64];
2046         char ret[64];
2047
2048         if (i == -1)
2049                 slprintf(ret, sizeof(ret)-1, "-1");
2050         else
2051                 slprintf(ret, sizeof(ret)-1, "0%o", i);
2052         return unix_to_unicode(wret, ret, sizeof(wret));
2053 }
2054
2055
2056 /****************************************************************************
2057  Truncate a string at a specified length.
2058  length is in ucs2 units.
2059 ****************************************************************************/
2060
2061 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
2062 {
2063         if (s && strlen_w(s) > length)
2064                 s[length] = 0;
2065
2066         return s;
2067 }
2068
2069 /******************************************************************
2070  functions for UTF8 support (using in kanji.c)
2071  ******************************************************************/
2072 smb_ucs2_t doscp2ucs2(int w)
2073 {
2074   return ((smb_ucs2_t)doscp_to_ucs2[w]);
2075 }
2076
2077 int ucs2doscp(smb_ucs2_t w)
2078 {
2079   return ((int)ucs2_to_doscp[w]);
2080 }
2081
2082 /* Temporary fix until 3.0... JRA */
2083
2084 int rpcstr_pull(char* dest, void *src, int dest_len, int src_len, int flags)
2085 {
2086         if(dest_len==-1)
2087                 dest_len=MAXUNI-3;
2088
2089         if (flags & STR_TERMINATE)
2090                 src_len = strlen_w(src)*2+2;
2091
2092         dest_len = MIN((src_len/2), (dest_len-1));
2093         unistr_to_ascii(dest, src, dest_len);
2094         return src_len;
2095 }