source/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #include "includes.h"
  25
  26 /* We can parameterize this if someone complains.... JRA. */
  27
  28 char lp_failed_convert_char(void)
  29 {
  30         return '_';
  31 }
  32
  33 /**
  34  * @file
  35  *
  36  * @brief Character-set conversion routines built on our iconv.
  37  *
  38  * @note Samba's internal character set (at least in the 3.0 series)
  39  * is always the same as the one for the Unix filesystem.  It is
  40  * <b>not</b> necessarily UTF-8 and may be different on machines that
  41  * need i18n filenames to be compatible with Unix software.  It does
  42  * have to be a superset of ASCII.  All multibyte sequences must start
  43  * with a byte with the high bit set.
  44  *
  45  * @sa lib/iconv.c
  46  */
  47
  48
  49 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  50 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
  51
  52 /**
  53  * Return the name of a charset to give to iconv().
  54  **/
  55 static const char *charset_name(charset_t ch)
  56 {
  57         const char *ret = NULL;
  58
  59         if (ch == CH_UCS2) ret = "UTF-16LE";
  60         else if (ch == CH_UNIX) ret = lp_unix_charset();
  61         else if (ch == CH_DOS) ret = lp_dos_charset();
  62         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  63         else if (ch == CH_UTF8) ret = "UTF8";
  64
  65 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  66         if (ret && !strcmp(ret, "LOCALE")) {
  67                 const char *ln = NULL;
  68
  69 #ifdef HAVE_SETLOCALE
  70                 setlocale(LC_ALL, "");
  71 #endif
  72                 ln = nl_langinfo(CODESET);
  73                 if (ln) {
  74                         /* Check whether the charset name is supported
  75                            by iconv */
  76                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  77                         if (handle == (smb_iconv_t) -1) {
  78                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  79                                 ln = NULL;
  80                         } else {
  81                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  82                                 smb_iconv_close(handle);
  83                         }
  84                 }
  85                 ret = ln;
  86         }
  87 #endif
  88
  89         if (!ret || !*ret) ret = "ASCII";
  90         return ret;
  91 }
  92
  93 void lazy_initialize_conv(void)
  94 {
  95         static int initialized = False;
  96
  97         if (!initialized) {
  98                 initialized = True;
  99                 load_case_tables();
 100                 init_iconv();
 101         }
 102 }
 103
 104 /**
 105  * Initialize iconv conversion descriptors.
 106  *
 107  * This is called the first time it is needed, and also called again
 108  * every time the configuration is reloaded, because the charset or
 109  * codepage might have changed.
 110  **/
 111 void init_iconv(void)
 112 {
 113         int c1, c2;
 114         BOOL did_reload = False;
 115
 116         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 117            first */
 118         if (!conv_handles[CH_UNIX][CH_UCS2])
 119                 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2), "ASCII");
 120
 121         if (!conv_handles[CH_UCS2][CH_UNIX])
 122                 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UCS2));
 123
 124         for (c1=0;c1<NUM_CHARSETS;c1++) {
 125                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 126                         const char *n1 = charset_name((charset_t)c1);
 127                         const char *n2 = charset_name((charset_t)c2);
 128                         if (conv_handles[c1][c2] &&
 129                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 130                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 131                                 continue;
 132
 133                         did_reload = True;
 134
 135                         if (conv_handles[c1][c2])
 136                                 smb_iconv_close(conv_handles[c1][c2]);
 137
 138                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 139                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 140                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 141                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 142                                 if (c1 != CH_UCS2) {
 143                                         n1 = "ASCII";
 144                                 }
 145                                 if (c2 != CH_UCS2) {
 146                                         n2 = "ASCII";
 147                                 }
 148                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 149                                         n1, n2 ));
 150                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 151                                 if (!conv_handles[c1][c2]) {
 152                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 153                                         smb_panic("init_iconv: conv_handle initialization failed.");
 154                                 }
 155                         }
 156                 }
 157         }
 158
 159         if (did_reload) {
 160                 /* XXX: Does this really get called every time the dos
 161                  * codepage changes? */
 162                 /* XXX: Is the did_reload test too strict? */
 163                 conv_silent = True;
 164                 init_doschar_table();
 165                 init_valid_table();
 166                 conv_silent = False;
 167         }
 168 }
 169
 170 /**
 171  * Convert string from one encoding to another, making error checking etc
 172  * Slow path version - uses (slow) iconv.
 173  *
 174  * @param src pointer to source string (multibyte or singlebyte)
 175  * @param srclen length of the source string in bytes
 176  * @param dest pointer to destination string (multibyte or singlebyte)
 177  * @param destlen maximal length allowed for string
 178  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 179  * @returns the number of bytes occupied in the destination
 180  *
 181  * Ensure the srclen contains the terminating zero.
 182  *
 183  **/
 184
 185 static size_t convert_string_internal(charset_t from, charset_t to,
 186                       void const *src, size_t srclen,
 187                       void *dest, size_t destlen, BOOL allow_bad_conv)
 188 {
 189         size_t i_len, o_len;
 190         size_t retval;
 191         const char* inbuf = (const char*)src;
 192         char* outbuf = (char*)dest;
 193         smb_iconv_t descriptor;
 194
 195         lazy_initialize_conv();
 196
 197         descriptor = conv_handles[from][to];
 198
 199         if (srclen == (size_t)-1) {
 200                 if (from == CH_UCS2) {
 201                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 202                 } else {
 203                         srclen = strlen((const char *)src)+1;
 204                 }
 205         }
 206
 207
 208         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 209                 if (!conv_silent)
 210                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 211                 return (size_t)-1;
 212         }
 213
 214         i_len=srclen;
 215         o_len=destlen;
 216
 217  again:
 218
 219         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 220         if(retval==(size_t)-1) {
 221                 const char *reason="unknown error";
 222                 switch(errno) {
 223                         case EINVAL:
 224                                 reason="Incomplete multibyte sequence";
 225                                 if (!conv_silent)
 226                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 227                                 if (allow_bad_conv)
 228                                         goto use_as_is;
 229                                 break;
 230                         case E2BIG:
 231                                 reason="No more room";
 232                                 if (!conv_silent) {
 233                                         if (from == CH_UNIX) {
 234                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 235                                                         charset_name(from), charset_name(to),
 236                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 237                                         } else {
 238                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 239                                                         charset_name(from), charset_name(to),
 240                                                         (unsigned int)srclen, (unsigned int)destlen));
 241                                         }
 242                                 }
 243                                 break;
 244                         case EILSEQ:
 245                                 reason="Illegal multibyte sequence";
 246                                 if (!conv_silent)
 247                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 248                                 if (allow_bad_conv)
 249                                         goto use_as_is;
 250                                 break;
 251                         default:
 252                                 if (!conv_silent)
 253                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 254                                 break;
 255                 }
 256                 /* smb_panic(reason); */
 257         }
 258         return destlen-o_len;
 259
 260  use_as_is:
 261
 262         /*
 263          * Conversion not supported. This is actually an error, but there are so
 264          * many misconfigured iconv systems and smb.conf's out there we can't just
 265          * fail. Do a very bad conversion instead.... JRA.
 266          */
 267
 268         {
 269                 if (o_len == 0 || i_len == 0)
 270                         return destlen - o_len;
 271
 272                 if (from == CH_UCS2 && to != CH_UCS2) {
 273                         /* Can't convert from ucs2 to multibyte. Replace with the default fail char. */
 274                         if (i_len < 2)
 275                                 return destlen - o_len;
 276                         if (i_len >= 2) {
 277                                 *outbuf = lp_failed_convert_char();
 278
 279                                 outbuf++;
 280                                 o_len--;
 281
 282                                 inbuf += 2;
 283                                 i_len -= 2;
 284                         }
 285
 286                         if (o_len == 0 || i_len == 0)
 287                                 return destlen - o_len;
 288
 289                         /* Keep trying with the next char... */
 290                         goto again;
 291
 292                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 293                         /* Can't convert to ucs2 - just widen by adding the default fail char then zero. */
 294                         if (o_len < 2)
 295                                 return destlen - o_len;
 296
 297                         outbuf[0] = lp_failed_convert_char();
 298                         outbuf[1] = '\0';
 299
 300                         inbuf++;
 301                         i_len--;
 302
 303                         outbuf += 2;
 304                         o_len -= 2;
 305
 306                         if (o_len == 0 || i_len == 0)
 307                                 return destlen - o_len;
 308
 309                         /* Keep trying with the next char... */
 310                         goto again;
 311
 312                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 313                         /* Failed multibyte to multibyte. Just copy the default fail char and
 314                                 try again. */
 315                         outbuf[0] = lp_failed_convert_char();
 316
 317                         inbuf++;
 318                         i_len--;
 319
 320                         outbuf++;
 321                         o_len--;
 322
 323                         if (o_len == 0 || i_len == 0)
 324                                 return destlen - o_len;
 325
 326                         /* Keep trying with the next char... */
 327                         goto again;
 328
 329                 } else {
 330                         /* Keep compiler happy.... */
 331                         return destlen - o_len;
 332                 }
 333         }
 334 }
 335
 336 /**
 337  * Convert string from one encoding to another, making error checking etc
 338  * Fast path version - handles ASCII first.
 339  *
 340  * @param src pointer to source string (multibyte or singlebyte)
 341  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 342  * @param dest pointer to destination string (multibyte or singlebyte)
 343  * @param destlen maximal length allowed for string - *NEVER* -1.
 344  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 345  * @returns the number of bytes occupied in the destination
 346  *
 347  * Ensure the srclen contains the terminating zero.
 348  *
 349  * This function has been hand-tuned to provide a fast path.
 350  * Don't change unless you really know what you are doing. JRA.
 351  **/
 352
 353 size_t convert_string(charset_t from, charset_t to,
 354                       void const *src, size_t srclen,
 355                       void *dest, size_t destlen, BOOL allow_bad_conv)
 356 {
 357         /*
 358          * NB. We deliberately don't do a strlen here if srclen == -1.
 359          * This is very expensive over millions of calls and is taken
 360          * care of in the slow path in convert_string_internal. JRA.
 361          */
 362
 363 #ifdef DEVELOPER
 364         SMB_ASSERT(destlen != (size_t)-1);
 365 #endif
 366
 367         if (srclen == 0)
 368                 return 0;
 369
 370         if (from != CH_UCS2 && to != CH_UCS2) {
 371                 const unsigned char *p = (const unsigned char *)src;
 372                 unsigned char *q = (unsigned char *)dest;
 373                 size_t slen = srclen;
 374                 size_t dlen = destlen;
 375                 unsigned char lastp;
 376                 size_t retval = 0;
 377
 378                 /* If all characters are ascii, fast path here. */
 379                 while (slen && dlen) {
 380                         if ((lastp = *p) <= 0x7f) {
 381                                 *q++ = *p++;
 382                                 if (slen != (size_t)-1) {
 383                                         slen--;
 384                                 }
 385                                 dlen--;
 386                                 retval++;
 387                                 if (!lastp)
 388                                         break;
 389                         } else {
 390 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 391                                 goto general_case;
 392 #else
 393                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 394 #endif
 395                         }
 396                 }
 397                 return retval;
 398         } else if (from == CH_UCS2 && to != CH_UCS2) {
 399                 const unsigned char *p = (const unsigned char *)src;
 400                 unsigned char *q = (unsigned char *)dest;
 401                 size_t retval = 0;
 402                 size_t slen = srclen;
 403                 size_t dlen = destlen;
 404                 unsigned char lastp;
 405
 406                 /* If all characters are ascii, fast path here. */
 407                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 408                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 409                                 *q++ = *p;
 410                                 if (slen != (size_t)-1) {
 411                                         slen -= 2;
 412                                 }
 413                                 p += 2;
 414                                 dlen--;
 415                                 retval++;
 416                                 if (!lastp)
 417                                         break;
 418                         } else {
 419 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 420                                 goto general_case;
 421 #else
 422                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 423 #endif
 424                         }
 425                 }
 426                 return retval;
 427         } else if (from != CH_UCS2 && to == CH_UCS2) {
 428                 const unsigned char *p = (const unsigned char *)src;
 429                 unsigned char *q = (unsigned char *)dest;
 430                 size_t retval = 0;
 431                 size_t slen = srclen;
 432                 size_t dlen = destlen;
 433                 unsigned char lastp;
 434
 435                 /* If all characters are ascii, fast path here. */
 436                 while (slen && (dlen >= 2)) {
 437                         if ((lastp = *p) <= 0x7F) {
 438                                 *q++ = *p++;
 439                                 *q++ = '\0';
 440                                 if (slen != (size_t)-1) {
 441                                         slen--;
 442                                 }
 443                                 dlen -= 2;
 444                                 retval += 2;
 445                                 if (!lastp)
 446                                         break;
 447                         } else {
 448 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 449                                 goto general_case;
 450 #else
 451                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 452 #endif
 453                         }
 454                 }
 455                 return retval;
 456         }
 457
 458 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 459   general_case:
 460 #endif
 461         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 462 }
 463
 464 /**
 465  * Convert between character sets, allocating a new buffer for the result.
 466  *
 467  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 468  * @param srclen length of source buffer.
 469  * @param dest always set at least to NULL
 470  * @note -1 is not accepted for srclen.
 471  *
 472  * @returns Size in bytes of the converted string; or -1 in case of error.
 473  *
 474  * Ensure the srclen contains the terminating zero.
 475  *
 476  * I hate the goto's in this function. It's embarressing.....
 477  * There has to be a cleaner way to do this. JRA.
 478  **/
 479
 480 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 481                                void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
 482 {
 483         size_t i_len, o_len, destlen = MAX(srclen, 512);
 484         size_t retval;
 485         const char *inbuf = (const char *)src;
 486         char *outbuf = NULL, *ob = NULL;
 487         smb_iconv_t descriptor;
 488
 489         *dest = NULL;
 490
 491         if (src == NULL || srclen == (size_t)-1)
 492                 return (size_t)-1;
 493         if (srclen == 0)
 494                 return 0;
 495
 496         lazy_initialize_conv();
 497
 498         descriptor = conv_handles[from][to];
 499
 500         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 501                 if (!conv_silent)
 502                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 503                 return (size_t)-1;
 504         }
 505
 506   convert:
 507
 508         if ((destlen*2) < destlen) {
 509                 /* wrapped ! abort. */
 510                 if (!conv_silent)
 511                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 512                 if (!ctx)
 513                         SAFE_FREE(outbuf);
 514                 return (size_t)-1;
 515         } else {
 516                 destlen = destlen * 2;
 517         }
 518
 519         if (ctx)
 520                 ob = (char *)talloc_realloc(ctx, ob, destlen);
 521         else
 522                 ob = (char *)Realloc(ob, destlen);
 523
 524         if (!ob) {
 525                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 526                 if (!ctx)
 527                         SAFE_FREE(outbuf);
 528                 return (size_t)-1;
 529         } else {
 530                 outbuf = ob;
 531         }
 532         i_len = srclen;
 533         o_len = destlen;
 534
 535  again:
 536
 537         retval = smb_iconv(descriptor,
 538                            &inbuf, &i_len,
 539                            &outbuf, &o_len);
 540         if(retval == (size_t)-1)                {
 541                 const char *reason="unknown error";
 542                 switch(errno) {
 543                         case EINVAL:
 544                                 reason="Incomplete multibyte sequence";
 545                                 if (!conv_silent)
 546                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 547                                 if (allow_bad_conv)
 548                                         goto use_as_is;
 549                                 break;
 550                         case E2BIG:
 551                                 goto convert;
 552                         case EILSEQ:
 553                                 reason="Illegal multibyte sequence";
 554                                 if (!conv_silent)
 555                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 556                                 if (allow_bad_conv)
 557                                         goto use_as_is;
 558                                 break;
 559                 }
 560                 if (!conv_silent)
 561                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 562                 /* smb_panic(reason); */
 563                 return (size_t)-1;
 564         }
 565
 566   out:
 567
 568         destlen = destlen - o_len;
 569         if (ctx)
 570                 *dest = (char *)talloc_realloc(ctx,ob,destlen);
 571         else
 572                 *dest = (char *)Realloc(ob,destlen);
 573         if (destlen && !*dest) {
 574                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 575                 if (!ctx)
 576                         SAFE_FREE(ob);
 577                 return (size_t)-1;
 578         }
 579
 580         return destlen;
 581
 582  use_as_is:
 583
 584         /*
 585          * Conversion not supported. This is actually an error, but there are so
 586          * many misconfigured iconv systems and smb.conf's out there we can't just
 587          * fail. Do a very bad conversion instead.... JRA.
 588          */
 589
 590         {
 591                 if (o_len == 0 || i_len == 0)
 592                         goto out;
 593
 594                 if (from == CH_UCS2 && to != CH_UCS2) {
 595                         /* Can't convert from ucs2 to multibyte. Just use the default fail char. */
 596                         if (i_len < 2)
 597                                 goto out;
 598
 599                         if (i_len >= 2) {
 600                                 *outbuf = lp_failed_convert_char();
 601
 602                                 outbuf++;
 603                                 o_len--;
 604
 605                                 inbuf += 2;
 606                                 i_len -= 2;
 607                         }
 608
 609                         if (o_len == 0 || i_len == 0)
 610                                 goto out;
 611
 612                         /* Keep trying with the next char... */
 613                         goto again;
 614
 615                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 616                         /* Can't convert to ucs2 - just widen by adding the default fail char then zero. */
 617                         if (o_len < 2)
 618                                 goto out;
 619
 620                         outbuf[0] = lp_failed_convert_char();
 621                         outbuf[1] = '\0';
 622
 623                         inbuf++;
 624                         i_len--;
 625
 626                         outbuf += 2;
 627                         o_len -= 2;
 628
 629                         if (o_len == 0 || i_len == 0)
 630                                 goto out;
 631
 632                         /* Keep trying with the next char... */
 633                         goto again;
 634
 635                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 636                         /* Failed multibyte to multibyte. Just copy the default fail char and
 637                                 try again. */
 638                         outbuf[0] = lp_failed_convert_char();
 639
 640                         inbuf++;
 641                         i_len--;
 642
 643                         outbuf++;
 644                         o_len--;
 645
 646                         if (o_len == 0 || i_len == 0)
 647                                 goto out;
 648
 649                         /* Keep trying with the next char... */
 650                         goto again;
 651
 652                 } else {
 653                         /* Keep compiler happy.... */
 654                         goto out;
 655                 }
 656         }
 657 }
 658
 659 /**
 660  * Convert between character sets, allocating a new buffer using talloc for the result.
 661  *
 662  * @param srclen length of source buffer.
 663  * @param dest always set at least to NULL
 664  * @note -1 is not accepted for srclen.
 665  *
 666  * @returns Size in bytes of the converted string; or -1 in case of error.
 667  **/
 668 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 669                                 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
 670 {
 671         size_t dest_len;
 672
 673         *dest = NULL;
 674         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
 675         if (dest_len == (size_t)-1)
 676                 return (size_t)-1;
 677         if (*dest == NULL)
 678                 return (size_t)-1;
 679         return dest_len;
 680 }
 681
 682 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 683 {
 684         size_t size;
 685         smb_ucs2_t *buffer;
 686
 687         size = push_ucs2_allocate(&buffer, src);
 688         if (size == (size_t)-1) {
 689                 smb_panic("failed to create UCS2 buffer");
 690         }
 691         if (!strupper_w(buffer) && (dest == src)) {
 692                 free(buffer);
 693                 return srclen;
 694         }
 695
 696         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
 697         free(buffer);
 698         return size;
 699 }
 700
 701 /**
 702  strdup() a unix string to upper case.
 703  Max size is pstring.
 704 **/
 705
 706 char *strdup_upper(const char *s)
 707 {
 708         pstring out_buffer;
 709         const unsigned char *p = (const unsigned char *)s;
 710         unsigned char *q = (unsigned char *)out_buffer;
 711
 712         /* this is quite a common operation, so we want it to be
 713            fast. We optimise for the ascii case, knowing that all our
 714            supported multi-byte character sets are ascii-compatible
 715            (ie. they match for the first 128 chars) */
 716
 717         while (1) {
 718                 if (*p & 0x80)
 719                         break;
 720                 *q++ = toupper(*p);
 721                 if (!*p)
 722                         break;
 723                 p++;
 724                 if (p - ( const unsigned char *)s >= sizeof(pstring))
 725                         break;
 726         }
 727
 728         if (*p) {
 729                 /* MB case. */
 730                 size_t size;
 731                 wpstring buffer;
 732                 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
 733                 if (size == (size_t)-1) {
 734                         return NULL;
 735                 }
 736
 737                 strupper_w(buffer);
 738
 739                 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
 740                 if (size == (size_t)-1) {
 741                         return NULL;
 742                 }
 743         }
 744
 745         return strdup(out_buffer);
 746 }
 747
 748 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 749 {
 750         size_t size;
 751         smb_ucs2_t *buffer = NULL;
 752
 753         size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
 754                                        (void **) &buffer, True);
 755         if (size == (size_t)-1 || !buffer) {
 756                 smb_panic("failed to create UCS2 buffer");
 757         }
 758         if (!strlower_w(buffer) && (dest == src)) {
 759                 SAFE_FREE(buffer);
 760                 return srclen;
 761         }
 762         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
 763         SAFE_FREE(buffer);
 764         return size;
 765 }
 766
 767 /**
 768  strdup() a unix string to lower case.
 769 **/
 770
 771 char *strdup_lower(const char *s)
 772 {
 773         size_t size;
 774         smb_ucs2_t *buffer = NULL;
 775         char *out_buffer;
 776
 777         size = push_ucs2_allocate(&buffer, s);
 778         if (size == -1 || !buffer) {
 779                 return NULL;
 780         }
 781
 782         strlower_w(buffer);
 783
 784         size = pull_ucs2_allocate(&out_buffer, buffer);
 785         SAFE_FREE(buffer);
 786
 787         if (size == (size_t)-1) {
 788                 return NULL;
 789         }
 790
 791         return out_buffer;
 792 }
 793
 794 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 795 {
 796         if (flags & (STR_NOALIGN|STR_ASCII))
 797                 return 0;
 798         return PTR_DIFF(p, base_ptr) & 1;
 799 }
 800
 801
 802 /**
 803  * Copy a string from a char* unix src to a dos codepage string destination.
 804  *
 805  * @return the number of bytes occupied by the string in the destination.
 806  *
 807  * @param flags can include
 808  * <dl>
 809  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 810  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 811  * </dl>
 812  *
 813  * @param dest_len the maximum length in bytes allowed in the
 814  * destination.  If @p dest_len is -1 then no maximum is used.
 815  **/
 816 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 817 {
 818         size_t src_len = strlen(src);
 819         pstring tmpbuf;
 820
 821         /* treat a pstring as "unlimited" length */
 822         if (dest_len == (size_t)-1)
 823                 dest_len = sizeof(pstring);
 824
 825         if (flags & STR_UPPER) {
 826                 pstrcpy(tmpbuf, src);
 827                 strupper_m(tmpbuf);
 828                 src = tmpbuf;
 829         }
 830
 831         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 832                 src_len++;
 833
 834         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 835 }
 836
 837 size_t push_ascii_fstring(void *dest, const char *src)
 838 {
 839         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 840 }
 841
 842 size_t push_ascii_pstring(void *dest, const char *src)
 843 {
 844         return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
 845 }
 846
 847 /********************************************************************
 848  Push an nstring - ensure null terminated. Written by
 849  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 850 ********************************************************************/
 851
 852 size_t push_ascii_nstring(void *dest, const char *src)
 853 {
 854         size_t i, buffer_len, dest_len;
 855         smb_ucs2_t *buffer;
 856
 857         conv_silent = True;
 858         buffer_len = push_ucs2_allocate(&buffer, src);
 859         if (buffer_len == (size_t)-1) {
 860                 smb_panic("failed to create UCS2 buffer");
 861         }
 862
 863         /* We're using buffer_len below to count ucs2 characters, not bytes. */
 864         buffer_len /= sizeof(smb_ucs2_t);
 865
 866         dest_len = 0;
 867         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
 868                 unsigned char mb[10];
 869                 /* Convert one smb_ucs2_t character at a time. */
 870                 size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
 871                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
 872                         memcpy((char *)dest + dest_len, mb, mb_len);
 873                         dest_len += mb_len;
 874                 } else {
 875                         errno = E2BIG;
 876                         break;
 877                 }
 878         }
 879         ((char *)dest)[dest_len] = '\0';
 880
 881         SAFE_FREE(buffer);
 882         conv_silent = False;
 883         return dest_len;
 884 }
 885
 886 /**
 887  * Copy a string from a dos codepage source to a unix char* destination.
 888  *
 889  * The resulting string in "dest" is always null terminated.
 890  *
 891  * @param flags can have:
 892  * <dl>
 893  * <dt>STR_TERMINATE</dt>
 894  * <dd>STR_TERMINATE means the string in @p src
 895  * is null terminated, and src_len is ignored.</dd>
 896  * </dl>
 897  *
 898  * @param src_len is the length of the source area in bytes.
 899  * @returns the number of bytes occupied by the string in @p src.
 900  **/
 901 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 902 {
 903         size_t ret;
 904
 905         if (dest_len == (size_t)-1)
 906                 dest_len = sizeof(pstring);
 907
 908         if (flags & STR_TERMINATE) {
 909                 if (src_len == (size_t)-1) {
 910                         src_len = strlen(src) + 1;
 911                 } else {
 912                         size_t len = strnlen(src, src_len);
 913                         if (len < src_len)
 914                                 len++;
 915                         src_len = len;
 916                 }
 917         }
 918
 919         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
 920         if (ret == (size_t)-1) {
 921                 dest_len = 0;
 922         }
 923
 924         if (dest_len)
 925                 dest[MIN(ret, dest_len-1)] = 0;
 926         else
 927                 dest[0] = 0;
 928
 929         return src_len;
 930 }
 931
 932 size_t pull_ascii_pstring(char *dest, const void *src)
 933 {
 934         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
 935 }
 936
 937 size_t pull_ascii_fstring(char *dest, const void *src)
 938 {
 939         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
 940 }
 941
 942 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
 943
 944 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
 945 {
 946         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
 947 }
 948
 949 /**
 950  * Copy a string from a char* src to a unicode destination.
 951  *
 952  * @returns the number of bytes occupied by the string in the destination.
 953  *
 954  * @param flags can have:
 955  *
 956  * <dl>
 957  * <dt>STR_TERMINATE <dd>means include the null termination.
 958  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 959  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 960  * </dl>
 961  *
 962  * @param dest_len is the maximum length allowed in the
 963  * destination. If dest_len is -1 then no maxiumum is used.
 964  **/
 965
 966 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
 967 {
 968         size_t len=0;
 969         size_t src_len;
 970         size_t ret;
 971
 972         /* treat a pstring as "unlimited" length */
 973         if (dest_len == (size_t)-1)
 974                 dest_len = sizeof(pstring);
 975
 976         if (flags & STR_TERMINATE)
 977                 src_len = (size_t)-1;
 978         else
 979                 src_len = strlen(src);
 980
 981         if (ucs2_align(base_ptr, dest, flags)) {
 982                 *(char *)dest = 0;
 983                 dest = (void *)((char *)dest + 1);
 984                 if (dest_len)
 985                         dest_len--;
 986                 len++;
 987         }
 988
 989         /* ucs2 is always a multiple of 2 bytes */
 990         dest_len &= ~1;
 991
 992         ret =  convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
 993         if (ret == (size_t)-1) {
 994                 return 0;
 995         }
 996
 997         len += ret;
 998
 999         if (flags & STR_UPPER) {
1000                 smb_ucs2_t *dest_ucs2 = dest;
1001                 size_t i;
1002                 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
1003                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1004                         if (v != dest_ucs2[i]) {
1005                                 dest_ucs2[i] = v;
1006                         }
1007                 }
1008         }
1009
1010         return len;
1011 }
1012
1013
1014 /**
1015  * Copy a string from a unix char* src to a UCS2 destination,
1016  * allocating a buffer using talloc().
1017  *
1018  * @param dest always set at least to NULL
1019  *
1020  * @returns The number of bytes occupied by the string in the destination
1021  *         or -1 in case of error.
1022  **/
1023 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1024 {
1025         size_t src_len = strlen(src)+1;
1026
1027         *dest = NULL;
1028         return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1029 }
1030
1031
1032 /**
1033  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1034  *
1035  * @param dest always set at least to NULL
1036  *
1037  * @returns The number of bytes occupied by the string in the destination
1038  *         or -1 in case of error.
1039  **/
1040
1041 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1042 {
1043         size_t src_len = strlen(src)+1;
1044
1045         *dest = NULL;
1046         return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1047 }
1048
1049 /**
1050  Copy a string from a char* src to a UTF-8 destination.
1051  Return the number of bytes occupied by the string in the destination
1052  Flags can have:
1053   STR_TERMINATE means include the null termination
1054   STR_UPPER     means uppercase in the destination
1055  dest_len is the maximum length allowed in the destination. If dest_len
1056  is -1 then no maxiumum is used.
1057 **/
1058
1059 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1060 {
1061         size_t src_len = strlen(src);
1062         pstring tmpbuf;
1063
1064         /* treat a pstring as "unlimited" length */
1065         if (dest_len == (size_t)-1)
1066                 dest_len = sizeof(pstring);
1067
1068         if (flags & STR_UPPER) {
1069                 pstrcpy(tmpbuf, src);
1070                 strupper_m(tmpbuf);
1071                 src = tmpbuf;
1072         }
1073
1074         if (flags & STR_TERMINATE)
1075                 src_len++;
1076
1077         return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1078 }
1079
1080 size_t push_utf8_fstring(void *dest, const char *src)
1081 {
1082         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1083 }
1084
1085 /**
1086  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1087  *
1088  * @param dest always set at least to NULL
1089  *
1090  * @returns The number of bytes occupied by the string in the destination
1091  **/
1092
1093 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1094 {
1095         size_t src_len = strlen(src)+1;
1096
1097         *dest = NULL;
1098         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1099 }
1100
1101 /**
1102  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1103  *
1104  * @param dest always set at least to NULL
1105  *
1106  * @returns The number of bytes occupied by the string in the destination
1107  **/
1108
1109 size_t push_utf8_allocate(char **dest, const char *src)
1110 {
1111         size_t src_len = strlen(src)+1;
1112
1113         *dest = NULL;
1114         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1115 }
1116
1117 /**
1118  Copy a string from a ucs2 source to a unix char* destination.
1119  Flags can have:
1120   STR_TERMINATE means the string in src is null terminated.
1121   STR_NOALIGN   means don't try to align.
1122  if STR_TERMINATE is set then src_len is ignored if it is -1.
1123  src_len is the length of the source area in bytes
1124  Return the number of bytes occupied by the string in src.
1125  The resulting string in "dest" is always null terminated.
1126 **/
1127
1128 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1129 {
1130         size_t ret;
1131
1132         if (dest_len == (size_t)-1)
1133                 dest_len = sizeof(pstring);
1134
1135         if (ucs2_align(base_ptr, src, flags)) {
1136                 src = (const void *)((const char *)src + 1);
1137                 if (src_len != (size_t)-1)
1138                         src_len--;
1139         }
1140
1141         if (flags & STR_TERMINATE) {
1142                 /* src_len -1 is the default for null terminated strings. */
1143                 if (src_len != (size_t)-1) {
1144                         size_t len = strnlen_w(src, src_len/2);
1145                         if (len < src_len/2)
1146                                 len++;
1147                         src_len = len*2;
1148                 }
1149         }
1150
1151         /* ucs2 is always a multiple of 2 bytes */
1152         if (src_len != (size_t)-1)
1153                 src_len &= ~1;
1154
1155         ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1156         if (ret == (size_t)-1) {
1157                 return 0;
1158         }
1159
1160         if (src_len == (size_t)-1)
1161                 src_len = ret*2;
1162
1163         if (dest_len)
1164                 dest[MIN(ret, dest_len-1)] = 0;
1165         else
1166                 dest[0] = 0;
1167
1168         return src_len;
1169 }
1170
1171 size_t pull_ucs2_pstring(char *dest, const void *src)
1172 {
1173         return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1174 }
1175
1176 size_t pull_ucs2_fstring(char *dest, const void *src)
1177 {
1178         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1179 }
1180
1181 /**
1182  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1183  *
1184  * @param dest always set at least to NULL
1185  *
1186  * @returns The number of bytes occupied by the string in the destination
1187  **/
1188
1189 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1190 {
1191         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1192         *dest = NULL;
1193         return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1194 }
1195
1196 /**
1197  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1198  *
1199  * @param dest always set at least to NULL
1200  *
1201  * @returns The number of bytes occupied by the string in the destination
1202  **/
1203
1204 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1205 {
1206         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1207         *dest = NULL;
1208         return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1209 }
1210
1211 /**
1212  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1213  *
1214  * @param dest always set at least to NULL
1215  *
1216  * @returns The number of bytes occupied by the string in the destination
1217  **/
1218
1219 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1220 {
1221         size_t src_len = strlen(src)+1;
1222         *dest = NULL;
1223         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1224 }
1225
1226 /**
1227  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1228  *
1229  * @param dest always set at least to NULL
1230  *
1231  * @returns The number of bytes occupied by the string in the destination
1232  **/
1233
1234 size_t pull_utf8_allocate(char **dest, const char *src)
1235 {
1236         size_t src_len = strlen(src)+1;
1237         *dest = NULL;
1238         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1239 }
1240
1241 /**
1242  Copy a string from a char* src to a unicode or ascii
1243  dos codepage destination choosing unicode or ascii based on the
1244  flags in the SMB buffer starting at base_ptr.
1245  Return the number of bytes occupied by the string in the destination.
1246  flags can have:
1247   STR_TERMINATE means include the null termination.
1248   STR_UPPER     means uppercase in the destination.
1249   STR_ASCII     use ascii even with unicode packet.
1250   STR_NOALIGN   means don't do alignment.
1251  dest_len is the maximum length allowed in the destination. If dest_len
1252  is -1 then no maxiumum is used.
1253 **/
1254
1255 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1256 {
1257 #ifdef DEVELOPER
1258         /* We really need to zero fill here, not clobber
1259          * region, as we want to ensure that valgrind thinks
1260          * all of the outgoing buffer has been written to
1261          * so a send() or write() won't trap an error.
1262          * JRA.
1263          */
1264 #if 0
1265         if (dest_len != (size_t)-1)
1266                 clobber_region(function, line, dest, dest_len);
1267 #else
1268         if (dest_len != (size_t)-1)
1269                 memset(dest, '\0', dest_len);
1270 #endif
1271 #endif
1272
1273         if (!(flags & STR_ASCII) && \
1274             ((flags & STR_UNICODE || \
1275               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1276                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1277         }
1278         return push_ascii(dest, src, dest_len, flags);
1279 }
1280
1281
1282 /**
1283  Copy a string from a unicode or ascii source (depending on
1284  the packet flags) to a char* destination.
1285  Flags can have:
1286   STR_TERMINATE means the string in src is null terminated.
1287   STR_UNICODE   means to force as unicode.
1288   STR_ASCII     use ascii even with unicode packet.
1289   STR_NOALIGN   means don't do alignment.
1290  if STR_TERMINATE is set then src_len is ignored is it is -1
1291  src_len is the length of the source area in bytes.
1292  Return the number of bytes occupied by the string in src.
1293  The resulting string in "dest" is always null terminated.
1294 **/
1295
1296 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1297 {
1298 #ifdef DEVELOPER
1299         if (dest_len != (size_t)-1)
1300                 clobber_region(function, line, dest, dest_len);
1301 #endif
1302
1303         if (!(flags & STR_ASCII) && \
1304             ((flags & STR_UNICODE || \
1305               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1306                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1307         }
1308         return pull_ascii(dest, src, dest_len, src_len, flags);
1309 }
1310
1311 size_t align_string(const void *base_ptr, const char *p, int flags)
1312 {
1313         if (!(flags & STR_ASCII) && \
1314             ((flags & STR_UNICODE || \
1315               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1316                 return ucs2_align(base_ptr, p, flags);
1317         }
1318         return 0;
1319 }
1320
1321 /****************************************************************
1322  Calculate the size (in bytes) of the next multibyte character in
1323  our internal character set. Note that p must be pointing to a
1324  valid mb char, not within one.
1325 ****************************************************************/
1326
1327 size_t next_mb_char_size(const char *s)
1328 {
1329         size_t i;
1330
1331         if (!(*s & 0x80))
1332                 return 1; /* ascii. */
1333
1334         conv_silent = True;
1335         for ( i = 1; i <=4; i++ ) {
1336                 smb_ucs2_t uc;
1337                 if (convert_string(CH_UNIX, CH_UCS2, s, i, &uc, 2, False) == 2) {
1338 #if 0 /* JRATEST */
1339                         DEBUG(10,("next_mb_char_size: size %u at string %s\n",
1340                                 (unsigned int)i, s));
1341 #endif
1342                         conv_silent = False;
1343                         return i;
1344                 }
1345         }
1346         /* We're hosed - we don't know how big this is... */
1347         DEBUG(10,("next_mb_char_size: unknown size at string %s\n", s));
1348         conv_silent = False;
1349         return 1;
1350 }