source/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
  50
  51 /**
  52  * Return the name of a charset to give to iconv().
  53  **/
  54 static const char *charset_name(charset_t ch)
  55 {
  56         const char *ret = NULL;
  57
  58         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  59         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  60         else if (ch == CH_UNIX) ret = lp_unix_charset();
  61         else if (ch == CH_DOS) ret = lp_dos_charset();
  62         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  63         else if (ch == CH_UTF8) ret = "UTF8";
  64
  65 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  66         if (ret && !strcmp(ret, "LOCALE")) {
  67                 const char *ln = NULL;
  68
  69 #ifdef HAVE_SETLOCALE
  70                 setlocale(LC_ALL, "");
  71 #endif
  72                 ln = nl_langinfo(CODESET);
  73                 if (ln) {
  74                         /* Check whether the charset name is supported
  75                            by iconv */
  76                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  77                         if (handle == (smb_iconv_t) -1) {
  78                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  79                                 ln = NULL;
  80                         } else {
  81                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  82                                 smb_iconv_close(handle);
  83                         }
  84                 }
  85                 ret = ln;
  86         }
  87 #endif
  88
  89         if (!ret || !*ret) ret = "ASCII";
  90         return ret;
  91 }
  92
  93 void lazy_initialize_conv(void)
  94 {
  95         static int initialized = False;
  96
  97         if (!initialized) {
  98                 initialized = True;
  99                 load_case_tables();
 100                 init_iconv();
 101         }
 102 }
 103
 104 /**
 105  * Destroy global objects allocated by init_iconv()
 106  **/
 107 void gfree_charcnv(void)
 108 {
 109         int c1, c2;
 110
 111         for (c1=0;c1<NUM_CHARSETS;c1++) {
 112                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 113                         if ( conv_handles[c1][c2] ) {
 114                                 smb_iconv_close( conv_handles[c1][c2] );
 115                                 conv_handles[c1][c2] = 0;
 116                         }
 117                 }
 118         }
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         BOOL did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_doschar_table();
 182                 init_valid_table();
 183                 conv_silent = False;
 184         }
 185 }
 186
 187 /**
 188  * Convert string from one encoding to another, making error checking etc
 189  * Slow path version - uses (slow) iconv.
 190  *
 191  * @param src pointer to source string (multibyte or singlebyte)
 192  * @param srclen length of the source string in bytes
 193  * @param dest pointer to destination string (multibyte or singlebyte)
 194  * @param destlen maximal length allowed for string
 195  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 196  * @returns the number of bytes occupied in the destination
 197  *
 198  * Ensure the srclen contains the terminating zero.
 199  *
 200  **/
 201
 202 static size_t convert_string_internal(charset_t from, charset_t to,
 203                       void const *src, size_t srclen,
 204                       void *dest, size_t destlen, BOOL allow_bad_conv)
 205 {
 206         size_t i_len, o_len;
 207         size_t retval;
 208         const char* inbuf = (const char*)src;
 209         char* outbuf = (char*)dest;
 210         smb_iconv_t descriptor;
 211
 212         lazy_initialize_conv();
 213
 214         descriptor = conv_handles[from][to];
 215
 216         if (srclen == (size_t)-1) {
 217                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 218                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 219                 } else {
 220                         srclen = strlen((const char *)src)+1;
 221                 }
 222         }
 223
 224
 225         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 226                 if (!conv_silent)
 227                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 228                 return (size_t)-1;
 229         }
 230
 231         i_len=srclen;
 232         o_len=destlen;
 233
 234  again:
 235
 236         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 237         if(retval==(size_t)-1) {
 238                 const char *reason="unknown error";
 239                 switch(errno) {
 240                         case EINVAL:
 241                                 reason="Incomplete multibyte sequence";
 242                                 if (!conv_silent)
 243                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 244                                 if (allow_bad_conv)
 245                                         goto use_as_is;
 246                                 break;
 247                         case E2BIG:
 248                                 reason="No more room";
 249                                 if (!conv_silent) {
 250                                         if (from == CH_UNIX) {
 251                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 252                                                         charset_name(from), charset_name(to),
 253                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 254                                         } else {
 255                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 256                                                         charset_name(from), charset_name(to),
 257                                                         (unsigned int)srclen, (unsigned int)destlen));
 258                                         }
 259                                 }
 260                                 break;
 261                         case EILSEQ:
 262                                 reason="Illegal multibyte sequence";
 263                                 if (!conv_silent)
 264                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 265                                 if (allow_bad_conv)
 266                                         goto use_as_is;
 267                                 break;
 268                         default:
 269                                 if (!conv_silent)
 270                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 271                                 break;
 272                 }
 273                 /* smb_panic(reason); */
 274         }
 275         return destlen-o_len;
 276
 277  use_as_is:
 278
 279         /*
 280          * Conversion not supported. This is actually an error, but there are so
 281          * many misconfigured iconv systems and smb.conf's out there we can't just
 282          * fail. Do a very bad conversion instead.... JRA.
 283          */
 284
 285         {
 286                 if (o_len == 0 || i_len == 0)
 287                         return destlen - o_len;
 288
 289                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 290                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 291                         /* Can't convert from utf16 any endian to multibyte.
 292                            Replace with the default fail char.
 293                         */
 294                         if (i_len < 2)
 295                                 return destlen - o_len;
 296                         if (i_len >= 2) {
 297                                 *outbuf = lp_failed_convert_char();
 298
 299                                 outbuf++;
 300                                 o_len--;
 301
 302                                 inbuf += 2;
 303                                 i_len -= 2;
 304                         }
 305
 306                         if (o_len == 0 || i_len == 0)
 307                                 return destlen - o_len;
 308
 309                         /* Keep trying with the next char... */
 310                         goto again;
 311
 312                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 313                         /* Can't convert to UTF16LE - just widen by adding the
 314                            default fail char then zero.
 315                         */
 316                         if (o_len < 2)
 317                                 return destlen - o_len;
 318
 319                         outbuf[0] = lp_failed_convert_char();
 320                         outbuf[1] = '\0';
 321
 322                         inbuf++;
 323                         i_len--;
 324
 325                         outbuf += 2;
 326                         o_len -= 2;
 327
 328                         if (o_len == 0 || i_len == 0)
 329                                 return destlen - o_len;
 330
 331                         /* Keep trying with the next char... */
 332                         goto again;
 333
 334                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 335                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 336                         /* Failed multibyte to multibyte. Just copy the default fail char and
 337                                 try again. */
 338                         outbuf[0] = lp_failed_convert_char();
 339
 340                         inbuf++;
 341                         i_len--;
 342
 343                         outbuf++;
 344                         o_len--;
 345
 346                         if (o_len == 0 || i_len == 0)
 347                                 return destlen - o_len;
 348
 349                         /* Keep trying with the next char... */
 350                         goto again;
 351
 352                 } else {
 353                         /* Keep compiler happy.... */
 354                         return destlen - o_len;
 355                 }
 356         }
 357 }
 358
 359 /**
 360  * Convert string from one encoding to another, making error checking etc
 361  * Fast path version - handles ASCII first.
 362  *
 363  * @param src pointer to source string (multibyte or singlebyte)
 364  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 365  * @param dest pointer to destination string (multibyte or singlebyte)
 366  * @param destlen maximal length allowed for string - *NEVER* -1.
 367  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 368  * @returns the number of bytes occupied in the destination
 369  *
 370  * Ensure the srclen contains the terminating zero.
 371  *
 372  * This function has been hand-tuned to provide a fast path.
 373  * Don't change unless you really know what you are doing. JRA.
 374  **/
 375
 376 size_t convert_string(charset_t from, charset_t to,
 377                       void const *src, size_t srclen,
 378                       void *dest, size_t destlen, BOOL allow_bad_conv)
 379 {
 380         /*
 381          * NB. We deliberately don't do a strlen here if srclen == -1.
 382          * This is very expensive over millions of calls and is taken
 383          * care of in the slow path in convert_string_internal. JRA.
 384          */
 385
 386 #ifdef DEVELOPER
 387         SMB_ASSERT(destlen != (size_t)-1);
 388 #endif
 389
 390         if (srclen == 0)
 391                 return 0;
 392
 393         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 394                 const unsigned char *p = (const unsigned char *)src;
 395                 unsigned char *q = (unsigned char *)dest;
 396                 size_t slen = srclen;
 397                 size_t dlen = destlen;
 398                 unsigned char lastp = '\0';
 399                 size_t retval = 0;
 400
 401                 /* If all characters are ascii, fast path here. */
 402                 while (slen && dlen) {
 403                         if ((lastp = *p) <= 0x7f) {
 404                                 *q++ = *p++;
 405                                 if (slen != (size_t)-1) {
 406                                         slen--;
 407                                 }
 408                                 dlen--;
 409                                 retval++;
 410                                 if (!lastp)
 411                                         break;
 412                         } else {
 413 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 414                                 goto general_case;
 415 #else
 416                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 417 #endif
 418                         }
 419                 }
 420                 if (!dlen) {
 421                         /* Even if we fast path we should note if we ran out of room. */
 422                         if (((slen != (size_t)-1) && slen) ||
 423                                         ((slen == (size_t)-1) && lastp)) {
 424                                 errno = E2BIG;
 425                         }
 426                 }
 427                 return retval;
 428         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 429                 const unsigned char *p = (const unsigned char *)src;
 430                 unsigned char *q = (unsigned char *)dest;
 431                 size_t retval = 0;
 432                 size_t slen = srclen;
 433                 size_t dlen = destlen;
 434                 unsigned char lastp = '\0';
 435
 436                 /* If all characters are ascii, fast path here. */
 437                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 438                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 439                                 *q++ = *p;
 440                                 if (slen != (size_t)-1) {
 441                                         slen -= 2;
 442                                 }
 443                                 p += 2;
 444                                 dlen--;
 445                                 retval++;
 446                                 if (!lastp)
 447                                         break;
 448                         } else {
 449 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 450                                 goto general_case;
 451 #else
 452                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 453 #endif
 454                         }
 455                 }
 456                 if (!dlen) {
 457                         /* Even if we fast path we should note if we ran out of room. */
 458                         if (((slen != (size_t)-1) && slen) ||
 459                                         ((slen == (size_t)-1) && lastp)) {
 460                                 errno = E2BIG;
 461                         }
 462                 }
 463                 return retval;
 464         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 465                 const unsigned char *p = (const unsigned char *)src;
 466                 unsigned char *q = (unsigned char *)dest;
 467                 size_t retval = 0;
 468                 size_t slen = srclen;
 469                 size_t dlen = destlen;
 470                 unsigned char lastp = '\0';
 471
 472                 /* If all characters are ascii, fast path here. */
 473                 while (slen && (dlen >= 2)) {
 474                         if ((lastp = *p) <= 0x7F) {
 475                                 *q++ = *p++;
 476                                 *q++ = '\0';
 477                                 if (slen != (size_t)-1) {
 478                                         slen--;
 479                                 }
 480                                 dlen -= 2;
 481                                 retval += 2;
 482                                 if (!lastp)
 483                                         break;
 484                         } else {
 485 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 486                                 goto general_case;
 487 #else
 488                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 489 #endif
 490                         }
 491                 }
 492                 if (!dlen) {
 493                         /* Even if we fast path we should note if we ran out of room. */
 494                         if (((slen != (size_t)-1) && slen) ||
 495                                         ((slen == (size_t)-1) && lastp)) {
 496                                 errno = E2BIG;
 497                         }
 498                 }
 499                 return retval;
 500         }
 501
 502 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 503   general_case:
 504 #endif
 505         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 506 }
 507
 508 /**
 509  * Convert between character sets, allocating a new buffer for the result.
 510  *
 511  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 512  * (this is a bad interface and needs fixing. JRA).
 513  * @param srclen length of source buffer.
 514  * @param dest always set at least to NULL
 515  * @note -1 is not accepted for srclen.
 516  *
 517  * @returns Size in bytes of the converted string; or -1 in case of error.
 518  *
 519  * Ensure the srclen contains the terminating zero.
 520  *
 521  * I hate the goto's in this function. It's embarressing.....
 522  * There has to be a cleaner way to do this. JRA.
 523  **/
 524
 525 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 526                                void const *src, size_t srclen, void *dst, BOOL allow_bad_conv)
 527 {
 528         size_t i_len, o_len, destlen = MAX(srclen, 512);
 529         size_t retval;
 530         const char *inbuf = (const char *)src;
 531         char *outbuf = NULL, *ob = NULL;
 532         smb_iconv_t descriptor;
 533         void **dest = (void **)dst;
 534
 535         *dest = NULL;
 536
 537         if (src == NULL || srclen == (size_t)-1)
 538                 return (size_t)-1;
 539         if (srclen == 0)
 540                 return 0;
 541
 542         lazy_initialize_conv();
 543
 544         descriptor = conv_handles[from][to];
 545
 546         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 547                 if (!conv_silent)
 548                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 549                 return (size_t)-1;
 550         }
 551
 552   convert:
 553
 554         if ((destlen*2) < destlen) {
 555                 /* wrapped ! abort. */
 556                 if (!conv_silent)
 557                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 558                 if (!ctx)
 559                         SAFE_FREE(outbuf);
 560                 return (size_t)-1;
 561         } else {
 562                 destlen = destlen * 2;
 563         }
 564
 565         if (ctx) {
 566                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen);
 567         } else {
 568                 ob = (char *)SMB_REALLOC(ob, destlen);
 569         }
 570
 571         if (!ob) {
 572                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 573                 return (size_t)-1;
 574         }
 575         outbuf = ob;
 576         i_len = srclen;
 577         o_len = destlen;
 578
 579  again:
 580
 581         retval = smb_iconv(descriptor,
 582                            &inbuf, &i_len,
 583                            &outbuf, &o_len);
 584         if(retval == (size_t)-1)                {
 585                 const char *reason="unknown error";
 586                 switch(errno) {
 587                         case EINVAL:
 588                                 reason="Incomplete multibyte sequence";
 589                                 if (!conv_silent)
 590                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 591                                 if (allow_bad_conv)
 592                                         goto use_as_is;
 593                                 break;
 594                         case E2BIG:
 595                                 goto convert;
 596                         case EILSEQ:
 597                                 reason="Illegal multibyte sequence";
 598                                 if (!conv_silent)
 599                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 600                                 if (allow_bad_conv)
 601                                         goto use_as_is;
 602                                 break;
 603                 }
 604                 if (!conv_silent)
 605                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 606                 /* smb_panic(reason); */
 607                 if (ctx) {
 608                         TALLOC_FREE(ob);
 609                 } else {
 610                         SAFE_FREE(ob);
 611                 }
 612                 return (size_t)-1;
 613         }
 614
 615   out:
 616
 617         destlen = destlen - o_len;
 618         if (ctx) {
 619                 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen);
 620         } else {
 621                 ob = (char *)SMB_REALLOC(ob,destlen);
 622         }
 623
 624         if (destlen && !ob) {
 625                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 626                 return (size_t)-1;
 627         }
 628
 629         *dest = ob;
 630         return destlen;
 631
 632  use_as_is:
 633
 634         /*
 635          * Conversion not supported. This is actually an error, but there are so
 636          * many misconfigured iconv systems and smb.conf's out there we can't just
 637          * fail. Do a very bad conversion instead.... JRA.
 638          */
 639
 640         {
 641                 if (o_len == 0 || i_len == 0)
 642                         goto out;
 643
 644                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 645                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 646                         /* Can't convert from utf16 any endian to multibyte.
 647                            Replace with the default fail char.
 648                         */
 649
 650                         if (i_len < 2)
 651                                 goto out;
 652
 653                         if (i_len >= 2) {
 654                                 *outbuf = lp_failed_convert_char();
 655
 656                                 outbuf++;
 657                                 o_len--;
 658
 659                                 inbuf += 2;
 660                                 i_len -= 2;
 661                         }
 662
 663                         if (o_len == 0 || i_len == 0)
 664                                 goto out;
 665
 666                         /* Keep trying with the next char... */
 667                         goto again;
 668
 669                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 670                         /* Can't convert to UTF16LE - just widen by adding the
 671                            default fail char then zero.
 672                         */
 673                         if (o_len < 2)
 674                                 goto out;
 675
 676                         outbuf[0] = lp_failed_convert_char();
 677                         outbuf[1] = '\0';
 678
 679                         inbuf++;
 680                         i_len--;
 681
 682                         outbuf += 2;
 683                         o_len -= 2;
 684
 685                         if (o_len == 0 || i_len == 0)
 686                                 goto out;
 687
 688                         /* Keep trying with the next char... */
 689                         goto again;
 690
 691                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 692                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 693                         /* Failed multibyte to multibyte. Just copy the default fail char and
 694                            try again. */
 695                         outbuf[0] = lp_failed_convert_char();
 696
 697                         inbuf++;
 698                         i_len--;
 699
 700                         outbuf++;
 701                         o_len--;
 702
 703                         if (o_len == 0 || i_len == 0)
 704                                 goto out;
 705
 706                         /* Keep trying with the next char... */
 707                         goto again;
 708
 709                 } else {
 710                         /* Keep compiler happy.... */
 711                         goto out;
 712                 }
 713         }
 714 }
 715
 716 /**
 717  * Convert between character sets, allocating a new buffer using talloc for the result.
 718  *
 719  * @param srclen length of source buffer.
 720  * @param dest always set at least to NULL
 721  * @note -1 is not accepted for srclen.
 722  *
 723  * @returns Size in bytes of the converted string; or -1 in case of error.
 724  **/
 725 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 726                              void const *src, size_t srclen, void *dst,
 727                              BOOL allow_bad_conv)
 728 {
 729         void **dest = (void **)dst;
 730         size_t dest_len;
 731
 732         *dest = NULL;
 733         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
 734         if (dest_len == (size_t)-1)
 735                 return (size_t)-1;
 736         if (*dest == NULL)
 737                 return (size_t)-1;
 738         return dest_len;
 739 }
 740
 741 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 742 {
 743         size_t size;
 744         smb_ucs2_t *buffer;
 745
 746         size = push_ucs2_allocate(&buffer, src);
 747         if (size == (size_t)-1) {
 748                 smb_panic("failed to create UCS2 buffer");
 749         }
 750         if (!strupper_w(buffer) && (dest == src)) {
 751                 free(buffer);
 752                 return srclen;
 753         }
 754
 755         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 756         free(buffer);
 757         return size;
 758 }
 759
 760 /**
 761  strdup() a unix string to upper case.
 762  Max size is pstring.
 763 **/
 764
 765 char *strdup_upper(const char *s)
 766 {
 767         pstring out_buffer;
 768         const unsigned char *p = (const unsigned char *)s;
 769         unsigned char *q = (unsigned char *)out_buffer;
 770
 771         /* this is quite a common operation, so we want it to be
 772            fast. We optimise for the ascii case, knowing that all our
 773            supported multi-byte character sets are ascii-compatible
 774            (ie. they match for the first 128 chars) */
 775
 776         while (1) {
 777                 if (*p & 0x80)
 778                         break;
 779                 *q++ = toupper_ascii(*p);
 780                 if (!*p)
 781                         break;
 782                 p++;
 783                 if (p - ( const unsigned char *)s >= sizeof(pstring))
 784                         break;
 785         }
 786
 787         if (*p) {
 788                 /* MB case. */
 789                 size_t size;
 790                 wpstring buffer;
 791                 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
 792                 if (size == (size_t)-1) {
 793                         return NULL;
 794                 }
 795
 796                 strupper_w(buffer);
 797
 798                 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
 799                 if (size == (size_t)-1) {
 800                         return NULL;
 801                 }
 802         }
 803
 804         return SMB_STRDUP(out_buffer);
 805 }
 806
 807 /**
 808  talloc_strdup() a unix string to upper case.
 809 **/
 810
 811 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 812 {
 813         char *out_buffer = talloc_strdup(ctx,s);
 814         const unsigned char *p = (const unsigned char *)s;
 815         unsigned char *q = (unsigned char *)out_buffer;
 816
 817         if (!q) {
 818                 return NULL;
 819         }
 820
 821         /* this is quite a common operation, so we want it to be
 822            fast. We optimise for the ascii case, knowing that all our
 823            supported multi-byte character sets are ascii-compatible
 824            (ie. they match for the first 128 chars) */
 825
 826         while (1) {
 827                 if (*p & 0x80)
 828                         break;
 829                 *q++ = toupper_ascii(*p);
 830                 if (!*p)
 831                         break;
 832                 p++;
 833         }
 834
 835         if (*p) {
 836                 /* MB case. */
 837                 size_t size;
 838                 smb_ucs2_t *ubuf = NULL;
 839
 840                 /* We're not using the ascii buffer above. */
 841                 TALLOC_FREE(out_buffer);
 842
 843                 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
 844                                 s, strlen(s),
 845                                 (void *)&ubuf,
 846                                 True);
 847                 if (size == (size_t)-1) {
 848                         return NULL;
 849                 }
 850
 851                 strupper_w(ubuf);
 852
 853                 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
 854                                 ubuf, size,
 855                                 (void *)&out_buffer,
 856                                 True);
 857
 858                 /* Don't need the intermediate buffer
 859                  * anymore.
 860                  */
 861
 862                 TALLOC_FREE(ubuf);
 863
 864                 if (size == (size_t)-1) {
 865                         return NULL;
 866                 }
 867         }
 868
 869         return out_buffer;
 870 }
 871
 872 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 873 {
 874         size_t size;
 875         smb_ucs2_t *buffer = NULL;
 876
 877         size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 878                                        (void **)(void *)&buffer, True);
 879         if (size == (size_t)-1 || !buffer) {
 880                 smb_panic("failed to create UCS2 buffer");
 881         }
 882         if (!strlower_w(buffer) && (dest == src)) {
 883                 SAFE_FREE(buffer);
 884                 return srclen;
 885         }
 886         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 887         SAFE_FREE(buffer);
 888         return size;
 889 }
 890
 891 /**
 892  strdup() a unix string to lower case.
 893 **/
 894
 895 char *strdup_lower(const char *s)
 896 {
 897         size_t size;
 898         smb_ucs2_t *buffer = NULL;
 899         char *out_buffer;
 900
 901         size = push_ucs2_allocate(&buffer, s);
 902         if (size == -1 || !buffer) {
 903                 return NULL;
 904         }
 905
 906         strlower_w(buffer);
 907
 908         size = pull_ucs2_allocate(&out_buffer, buffer);
 909         SAFE_FREE(buffer);
 910
 911         if (size == (size_t)-1) {
 912                 return NULL;
 913         }
 914
 915         return out_buffer;
 916 }
 917
 918 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 919 {
 920         if (flags & (STR_NOALIGN|STR_ASCII))
 921                 return 0;
 922         return PTR_DIFF(p, base_ptr) & 1;
 923 }
 924
 925
 926 /**
 927  * Copy a string from a char* unix src to a dos codepage string destination.
 928  *
 929  * @return the number of bytes occupied by the string in the destination.
 930  *
 931  * @param flags can include
 932  * <dl>
 933  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 934  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 935  * </dl>
 936  *
 937  * @param dest_len the maximum length in bytes allowed in the
 938  * destination.  If @p dest_len is -1 then no maximum is used.
 939  **/
 940 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 941 {
 942         size_t src_len = strlen(src);
 943         pstring tmpbuf;
 944
 945         /* treat a pstring as "unlimited" length */
 946         if (dest_len == (size_t)-1)
 947                 dest_len = sizeof(pstring);
 948
 949         if (flags & STR_UPPER) {
 950                 pstrcpy(tmpbuf, src);
 951                 strupper_m(tmpbuf);
 952                 src = tmpbuf;
 953         }
 954
 955         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 956                 src_len++;
 957
 958         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 959 }
 960
 961 size_t push_ascii_fstring(void *dest, const char *src)
 962 {
 963         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 964 }
 965
 966 size_t push_ascii_pstring(void *dest, const char *src)
 967 {
 968         return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
 969 }
 970
 971 /********************************************************************
 972  Push an nstring - ensure null terminated. Written by
 973  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 974 ********************************************************************/
 975
 976 size_t push_ascii_nstring(void *dest, const char *src)
 977 {
 978         size_t i, buffer_len, dest_len;
 979         smb_ucs2_t *buffer;
 980
 981         conv_silent = True;
 982         buffer_len = push_ucs2_allocate(&buffer, src);
 983         if (buffer_len == (size_t)-1) {
 984                 smb_panic("failed to create UCS2 buffer");
 985         }
 986
 987         /* We're using buffer_len below to count ucs2 characters, not bytes. */
 988         buffer_len /= sizeof(smb_ucs2_t);
 989
 990         dest_len = 0;
 991         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
 992                 unsigned char mb[10];
 993                 /* Convert one smb_ucs2_t character at a time. */
 994                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
 995                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
 996                         memcpy((char *)dest + dest_len, mb, mb_len);
 997                         dest_len += mb_len;
 998                 } else {
 999                         errno = E2BIG;
1000                         break;
1001                 }
1002         }
1003         ((char *)dest)[dest_len] = '\0';
1004
1005         SAFE_FREE(buffer);
1006         conv_silent = False;
1007         return dest_len;
1008 }
1009
1010 /**
1011  * Copy a string from a dos codepage source to a unix char* destination.
1012  *
1013  * The resulting string in "dest" is always null terminated.
1014  *
1015  * @param flags can have:
1016  * <dl>
1017  * <dt>STR_TERMINATE</dt>
1018  * <dd>STR_TERMINATE means the string in @p src
1019  * is null terminated, and src_len is ignored.</dd>
1020  * </dl>
1021  *
1022  * @param src_len is the length of the source area in bytes.
1023  * @returns the number of bytes occupied by the string in @p src.
1024  **/
1025 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1026 {
1027         size_t ret;
1028
1029         if (dest_len == (size_t)-1)
1030                 dest_len = sizeof(pstring);
1031
1032         if (flags & STR_TERMINATE) {
1033                 if (src_len == (size_t)-1) {
1034                         src_len = strlen((const char *)src) + 1;
1035                 } else {
1036                         size_t len = strnlen((const char *)src, src_len);
1037                         if (len < src_len)
1038                                 len++;
1039                         src_len = len;
1040                 }
1041         }
1042
1043         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1044         if (ret == (size_t)-1) {
1045                 ret = 0;
1046                 dest_len = 0;
1047         }
1048
1049         if (dest_len && ret) {
1050                 /* Did we already process the terminating zero ? */
1051                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1052                         dest[MIN(ret, dest_len-1)] = 0;
1053                 }
1054         } else  {
1055                 dest[0] = 0;
1056         }
1057
1058         return src_len;
1059 }
1060
1061 /**
1062  * Copy a string from a dos codepage source to a unix char* destination.
1063  Talloc version.
1064  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1065  needs fixing. JRA).
1066  *
1067  * The resulting string in "dest" is always null terminated.
1068  *
1069  * @param flags can have:
1070  * <dl>
1071  * <dt>STR_TERMINATE</dt>
1072  * <dd>STR_TERMINATE means the string in @p src
1073  * is null terminated, and src_len is ignored.</dd>
1074  * </dl>
1075  *
1076  * @param src_len is the length of the source area in bytes.
1077  * @returns the number of bytes occupied by the string in @p src.
1078  **/
1079
1080 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1081                                         char **ppdest,
1082                                         const void *src,
1083                                         size_t src_len,
1084                                         int flags)
1085 {
1086         char *dest = NULL;
1087         size_t dest_len = 0;
1088
1089 #ifdef DEVELOPER
1090         /* Ensure we never use the braindead "malloc" varient. */
1091         if (ctx == NULL) {
1092                 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1093         }
1094 #endif
1095
1096         *ppdest = NULL;
1097
1098         if (flags & STR_TERMINATE) {
1099                 if (src_len == (size_t)-1) {
1100                         src_len = strlen((const char *)src) + 1;
1101                 } else {
1102                         size_t len = strnlen((const char *)src, src_len);
1103                         if (len < src_len)
1104                                 len++;
1105                         src_len = len;
1106                 }
1107                 /* Ensure we don't use an insane length from the client. */
1108                 if (src_len >= 1024*1024) {
1109                         smb_panic("Bad src length in pull_ascii_base_talloc\n");
1110                 }
1111         }
1112
1113         dest_len = convert_string_allocate(ctx,
1114                                 CH_DOS,
1115                                 CH_UNIX,
1116                                 src,
1117                                 src_len,
1118                                 &dest,
1119                                 True);
1120
1121         if (dest_len == (size_t)-1) {
1122                 return 0;
1123         }
1124
1125         if (dest_len && dest) {
1126                 /* Did we already process the terminating zero ? */
1127                 if (dest[dest_len-1] != 0) {
1128                         dest[dest_len-1] = 0;
1129                 }
1130         } else if (dest) {
1131                 dest[0] = 0;
1132         }
1133
1134         *ppdest = dest;
1135         return src_len;
1136 }
1137
1138
1139 size_t pull_ascii_pstring(char *dest, const void *src)
1140 {
1141         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
1142 }
1143
1144 size_t pull_ascii_fstring(char *dest, const void *src)
1145 {
1146         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1147 }
1148
1149 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1150
1151 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1152 {
1153         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1154 }
1155
1156 /**
1157  * Copy a string from a char* src to a unicode destination.
1158  *
1159  * @returns the number of bytes occupied by the string in the destination.
1160  *
1161  * @param flags can have:
1162  *
1163  * <dl>
1164  * <dt>STR_TERMINATE <dd>means include the null termination.
1165  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1166  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1167  * </dl>
1168  *
1169  * @param dest_len is the maximum length allowed in the
1170  * destination. If dest_len is -1 then no maxiumum is used.
1171  **/
1172
1173 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1174 {
1175         size_t len=0;
1176         size_t src_len;
1177         size_t ret;
1178
1179         /* treat a pstring as "unlimited" length */
1180         if (dest_len == (size_t)-1)
1181                 dest_len = sizeof(pstring);
1182
1183         if (flags & STR_TERMINATE)
1184                 src_len = (size_t)-1;
1185         else
1186                 src_len = strlen(src);
1187
1188         if (ucs2_align(base_ptr, dest, flags)) {
1189                 *(char *)dest = 0;
1190                 dest = (void *)((char *)dest + 1);
1191                 if (dest_len)
1192                         dest_len--;
1193                 len++;
1194         }
1195
1196         /* ucs2 is always a multiple of 2 bytes */
1197         dest_len &= ~1;
1198
1199         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1200         if (ret == (size_t)-1) {
1201                 return 0;
1202         }
1203
1204         len += ret;
1205
1206         if (flags & STR_UPPER) {
1207                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1208                 size_t i;
1209
1210                 /* We check for i < (ret / 2) below as the dest string isn't null
1211                    terminated if STR_TERMINATE isn't set. */
1212
1213                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1214                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1215                         if (v != dest_ucs2[i]) {
1216                                 dest_ucs2[i] = v;
1217                         }
1218                 }
1219         }
1220
1221         return len;
1222 }
1223
1224
1225 /**
1226  * Copy a string from a unix char* src to a UCS2 destination,
1227  * allocating a buffer using talloc().
1228  *
1229  * @param dest always set at least to NULL
1230  *
1231  * @returns The number of bytes occupied by the string in the destination
1232  *         or -1 in case of error.
1233  **/
1234 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1235 {
1236         size_t src_len = strlen(src)+1;
1237
1238         *dest = NULL;
1239         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1240 }
1241
1242
1243 /**
1244  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1245  *
1246  * @param dest always set at least to NULL
1247  *
1248  * @returns The number of bytes occupied by the string in the destination
1249  *         or -1 in case of error.
1250  **/
1251
1252 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1253 {
1254         size_t src_len = strlen(src)+1;
1255
1256         *dest = NULL;
1257         return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1258 }
1259
1260 /**
1261  Copy a string from a char* src to a UTF-8 destination.
1262  Return the number of bytes occupied by the string in the destination
1263  Flags can have:
1264   STR_TERMINATE means include the null termination
1265   STR_UPPER     means uppercase in the destination
1266  dest_len is the maximum length allowed in the destination. If dest_len
1267  is -1 then no maxiumum is used.
1268 **/
1269
1270 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1271 {
1272         size_t src_len = strlen(src);
1273         pstring tmpbuf;
1274
1275         /* treat a pstring as "unlimited" length */
1276         if (dest_len == (size_t)-1)
1277                 dest_len = sizeof(pstring);
1278
1279         if (flags & STR_UPPER) {
1280                 pstrcpy(tmpbuf, src);
1281                 strupper_m(tmpbuf);
1282                 src = tmpbuf;
1283         }
1284
1285         if (flags & STR_TERMINATE)
1286                 src_len++;
1287
1288         return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1289 }
1290
1291 size_t push_utf8_fstring(void *dest, const char *src)
1292 {
1293         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1294 }
1295
1296 /**
1297  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1298  *
1299  * @param dest always set at least to NULL
1300  *
1301  * @returns The number of bytes occupied by the string in the destination
1302  **/
1303
1304 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1305 {
1306         size_t src_len = strlen(src)+1;
1307
1308         *dest = NULL;
1309         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1310 }
1311
1312 /**
1313  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1314  *
1315  * @param dest always set at least to NULL
1316  *
1317  * @returns The number of bytes occupied by the string in the destination
1318  **/
1319
1320 size_t push_utf8_allocate(char **dest, const char *src)
1321 {
1322         size_t src_len = strlen(src)+1;
1323
1324         *dest = NULL;
1325         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1326 }
1327
1328 /**
1329  Copy a string from a ucs2 source to a unix char* destination.
1330  Flags can have:
1331   STR_TERMINATE means the string in src is null terminated.
1332   STR_NOALIGN   means don't try to align.
1333  if STR_TERMINATE is set then src_len is ignored if it is -1.
1334  src_len is the length of the source area in bytes
1335  Return the number of bytes occupied by the string in src.
1336  The resulting string in "dest" is always null terminated.
1337 **/
1338
1339 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1340 {
1341         size_t ret;
1342
1343         if (dest_len == (size_t)-1)
1344                 dest_len = sizeof(pstring);
1345
1346         if (ucs2_align(base_ptr, src, flags)) {
1347                 src = (const void *)((const char *)src + 1);
1348                 if (src_len != (size_t)-1)
1349                         src_len--;
1350         }
1351
1352         if (flags & STR_TERMINATE) {
1353                 /* src_len -1 is the default for null terminated strings. */
1354                 if (src_len != (size_t)-1) {
1355                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1356                                                 src_len/2);
1357                         if (len < src_len/2)
1358                                 len++;
1359                         src_len = len*2;
1360                 }
1361         }
1362
1363         /* ucs2 is always a multiple of 2 bytes */
1364         if (src_len != (size_t)-1)
1365                 src_len &= ~1;
1366
1367         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1368         if (ret == (size_t)-1) {
1369                 return 0;
1370         }
1371
1372         if (src_len == (size_t)-1)
1373                 src_len = ret*2;
1374
1375         if (dest_len && ret) {
1376                 /* Did we already process the terminating zero ? */
1377                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1378                         dest[MIN(ret, dest_len-1)] = 0;
1379                 }
1380         } else {
1381                 dest[0] = 0;
1382         }
1383
1384         return src_len;
1385 }
1386
1387 /**
1388  Copy a string from a ucs2 source to a unix char* destination.
1389  Talloc version with a base pointer.
1390  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1391  needs fixing. JRA).
1392  Flags can have:
1393   STR_TERMINATE means the string in src is null terminated.
1394   STR_NOALIGN   means don't try to align.
1395  if STR_TERMINATE is set then src_len is ignored if it is -1.
1396  src_len is the length of the source area in bytes
1397  Return the number of bytes occupied by the string in src.
1398  The resulting string in "dest" is always null terminated.
1399 **/
1400
1401 static size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1402                         const void *base_ptr,
1403                         char **ppdest,
1404                         const void *src,
1405                         size_t src_len,
1406                         int flags)
1407 {
1408         char *dest;
1409         size_t dest_len;
1410
1411         *ppdest = NULL;
1412
1413 #ifdef DEVELOPER
1414         /* Ensure we never use the braindead "malloc" varient. */
1415         if (ctx == NULL) {
1416                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1417         }
1418 #endif
1419
1420         if (ucs2_align(base_ptr, src, flags)) {
1421                 src = (const void *)((const char *)src + 1);
1422                 if (src_len != (size_t)-1)
1423                         src_len--;
1424         }
1425
1426         if (flags & STR_TERMINATE) {
1427                 /* src_len -1 is the default for null terminated strings. */
1428                 if (src_len != (size_t)-1) {
1429                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1430                                                 src_len/2);
1431                         if (len < src_len/2)
1432                                 len++;
1433                         src_len = len*2;
1434                 }
1435                 /* Ensure we don't use an insane length from the client. */
1436                 if (src_len >= 1024*1024) {
1437                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1438                 }
1439         }
1440
1441         /* ucs2 is always a multiple of 2 bytes */
1442         if (src_len != (size_t)-1) {
1443                 src_len &= ~1;
1444         }
1445
1446         dest_len = convert_string_talloc(ctx,
1447                                         CH_UTF16LE,
1448                                         CH_UNIX,
1449                                         src,
1450                                         src_len,
1451                                         (void *)&dest,
1452                                         True);
1453         if (dest_len == (size_t)-1) {
1454                 return 0;
1455         }
1456
1457         if (src_len == (size_t)-1)
1458                 src_len = dest_len*2;
1459
1460         if (dest_len) {
1461                 /* Did we already process the terminating zero ? */
1462                 if (dest[dest_len-1] != 0) {
1463                         size_t size = talloc_get_size(dest);
1464                         /* Have we got space to append the '\0' ? */
1465                         if (size <= dest_len) {
1466                                 /* No, realloc. */
1467                                 dest = TALLOC_REALLOC(ctx, dest,
1468                                                 dest_len+1);
1469                                 if (!dest) {
1470                                         /* talloc fail. */
1471                                         dest_len = (size_t)-1;
1472                                         return 0;
1473                                 }
1474                         }
1475                         /* Yay - space ! */
1476                         dest[dest_len] = '\0';
1477                         dest_len++;
1478                 }
1479         } else if (dest) {
1480                 dest[0] = 0;
1481         }
1482
1483         *ppdest = dest;
1484         return src_len;
1485 }
1486
1487 size_t pull_ucs2_pstring(char *dest, const void *src)
1488 {
1489         return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1490 }
1491
1492 size_t pull_ucs2_fstring(char *dest, const void *src)
1493 {
1494         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1495 }
1496
1497 /**
1498  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1499  *
1500  * @param dest always set at least to NULL
1501  *
1502  * @returns The number of bytes occupied by the string in the destination
1503  **/
1504
1505 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1506 {
1507         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1508         *dest = NULL;
1509         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1510 }
1511
1512 /**
1513  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1514  *
1515  * @param dest always set at least to NULL
1516  *
1517  * @returns The number of bytes occupied by the string in the destination
1518  **/
1519
1520 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1521 {
1522         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1523         *dest = NULL;
1524         return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1525 }
1526
1527 /**
1528  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1529  *
1530  * @param dest always set at least to NULL
1531  *
1532  * @returns The number of bytes occupied by the string in the destination
1533  **/
1534
1535 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1536 {
1537         size_t src_len = strlen(src)+1;
1538         *dest = NULL;
1539         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1540 }
1541
1542 /**
1543  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1544  *
1545  * @param dest always set at least to NULL
1546  *
1547  * @returns The number of bytes occupied by the string in the destination
1548  **/
1549
1550 size_t pull_utf8_allocate(char **dest, const char *src)
1551 {
1552         size_t src_len = strlen(src)+1;
1553         *dest = NULL;
1554         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1555 }
1556
1557 /**
1558  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1559  *
1560  * @param dest always set at least to NULL
1561  *
1562  * @returns The number of bytes occupied by the string in the destination
1563  **/
1564
1565 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1566 {
1567         size_t src_len = strlen(src)+1;
1568         *dest = NULL;
1569         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1570 }
1571
1572 /**
1573  Copy a string from a char* src to a unicode or ascii
1574  dos codepage destination choosing unicode or ascii based on the
1575  flags in the SMB buffer starting at base_ptr.
1576  Return the number of bytes occupied by the string in the destination.
1577  flags can have:
1578   STR_TERMINATE means include the null termination.
1579   STR_UPPER     means uppercase in the destination.
1580   STR_ASCII     use ascii even with unicode packet.
1581   STR_NOALIGN   means don't do alignment.
1582  dest_len is the maximum length allowed in the destination. If dest_len
1583  is -1 then no maxiumum is used.
1584 **/
1585
1586 size_t push_string_fn(const char *function, unsigned int line,
1587                       const void *base_ptr, uint16 flags2,
1588                       void *dest, const char *src,
1589                       size_t dest_len, int flags)
1590 {
1591 #ifdef DEVELOPER
1592         /* We really need to zero fill here, not clobber
1593          * region, as we want to ensure that valgrind thinks
1594          * all of the outgoing buffer has been written to
1595          * so a send() or write() won't trap an error.
1596          * JRA.
1597          */
1598 #if 0
1599         if (dest_len != (size_t)-1)
1600                 clobber_region(function, line, dest, dest_len);
1601 #else
1602         if (dest_len != (size_t)-1)
1603                 memset(dest, '\0', dest_len);
1604 #endif
1605 #endif
1606
1607         if (!(flags & STR_ASCII) && \
1608             ((flags & STR_UNICODE || \
1609               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1610                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1611         }
1612         return push_ascii(dest, src, dest_len, flags);
1613 }
1614
1615
1616 /**
1617  Copy a string from a unicode or ascii source (depending on
1618  the packet flags) to a char* destination.
1619  Flags can have:
1620   STR_TERMINATE means the string in src is null terminated.
1621   STR_UNICODE   means to force as unicode.
1622   STR_ASCII     use ascii even with unicode packet.
1623   STR_NOALIGN   means don't do alignment.
1624  if STR_TERMINATE is set then src_len is ignored is it is -1
1625  src_len is the length of the source area in bytes.
1626  Return the number of bytes occupied by the string in src.
1627  The resulting string in "dest" is always null terminated.
1628 **/
1629
1630 size_t pull_string_fn(const char *function, unsigned int line,
1631                       const void *base_ptr, uint16 smb_flags2, char *dest,
1632                       const void *src, size_t dest_len, size_t src_len,
1633                       int flags)
1634 {
1635 #ifdef DEVELOPER
1636         if (dest_len != (size_t)-1)
1637                 clobber_region(function, line, dest, dest_len);
1638 #endif
1639
1640         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1641                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1642                           "UNICODE defined");
1643         }
1644
1645         if (!(flags & STR_ASCII) && \
1646             ((flags & STR_UNICODE || \
1647               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1648                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1649         }
1650         return pull_ascii(dest, src, dest_len, src_len, flags);
1651 }
1652
1653 /**
1654  Copy a string from a unicode or ascii source (depending on
1655  the packet flags) to a char* destination.
1656  Variant that uses talloc.
1657  Flags can have:
1658   STR_TERMINATE means the string in src is null terminated.
1659   STR_UNICODE   means to force as unicode.
1660   STR_ASCII     use ascii even with unicode packet.
1661   STR_NOALIGN   means don't do alignment.
1662  if STR_TERMINATE is set then src_len is ignored is it is -1
1663  src_len is the length of the source area in bytes.
1664  Return the number of bytes occupied by the string in src.
1665  The resulting string in "dest" is always null terminated.
1666 **/
1667
1668 size_t pull_string_talloc_fn(const char *function,
1669                         unsigned int line,
1670                         TALLOC_CTX *ctx,
1671                         const void *base_ptr,
1672                         uint16 smb_flags2,
1673                         char **ppdest,
1674                         const void *src,
1675                         size_t src_len,
1676                         int flags)
1677 {
1678         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1679                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1680                           "UNICODE defined");
1681         }
1682
1683         if (!(flags & STR_ASCII) && \
1684             ((flags & STR_UNICODE || \
1685               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1686                 return pull_ucs2_base_talloc(ctx,
1687                                         base_ptr,
1688                                         ppdest,
1689                                         src,
1690                                         src_len,
1691                                         flags);
1692         }
1693         return pull_ascii_base_talloc(ctx,
1694                                         ppdest,
1695                                         src,
1696                                         src_len,
1697                                         flags);
1698 }
1699
1700
1701 size_t align_string(const void *base_ptr, const char *p, int flags)
1702 {
1703         if (!(flags & STR_ASCII) && \
1704             ((flags & STR_UNICODE || \
1705               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1706                 return ucs2_align(base_ptr, p, flags);
1707         }
1708         return 0;
1709 }
1710
1711 /*
1712   Return the unicode codepoint for the next multi-byte CH_UNIX character
1713   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1714
1715   Also return the number of bytes consumed (which tells the caller
1716   how many bytes to skip to get to the next CH_UNIX character).
1717
1718   Return INVALID_CODEPOINT if the next character cannot be converted.
1719 */
1720
1721 codepoint_t next_codepoint(const char *str, size_t *size)
1722 {
1723         /* It cannot occupy more than 4 bytes in UTF16 format */
1724         uint8_t buf[4];
1725         smb_iconv_t descriptor;
1726         size_t ilen_orig;
1727         size_t ilen;
1728         size_t olen;
1729         char *outbuf;
1730
1731         if ((str[0] & 0x80) == 0) {
1732                 *size = 1;
1733                 return (codepoint_t)str[0];
1734         }
1735
1736         /* We assume that no multi-byte character can take
1737            more than 5 bytes. This is OK as we only
1738            support codepoints up to 1M */
1739
1740         ilen_orig = strnlen(str, 5);
1741         ilen = ilen_orig;
1742
1743         lazy_initialize_conv();
1744
1745         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1746         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1747                 *size = 1;
1748                 return INVALID_CODEPOINT;
1749         }
1750
1751         /* This looks a little strange, but it is needed to cope
1752            with codepoints above 64k which are encoded as per RFC2781. */
1753         olen = 2;
1754         outbuf = (char *)buf;
1755         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1756         if (olen == 2) {
1757                 /* We failed to convert to a 2 byte character.
1758                    See if we can convert to a 4 UTF16-LE byte char encoding.
1759                 */
1760                 olen = 4;
1761                 outbuf = (char *)buf;
1762                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1763                 if (olen == 4) {
1764                         /* We didn't convert any bytes */
1765                         *size = 1;
1766                         return INVALID_CODEPOINT;
1767                 }
1768                 olen = 4 - olen;
1769         } else {
1770                 olen = 2 - olen;
1771         }
1772
1773         *size = ilen_orig - ilen;
1774
1775         if (olen == 2) {
1776                 /* 2 byte, UTF16-LE encoded value. */
1777                 return (codepoint_t)SVAL(buf, 0);
1778         }
1779         if (olen == 4) {
1780                 /* Decode a 4 byte UTF16-LE character manually.
1781                    See RFC2871 for the encoding machanism.
1782                 */
1783                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1784                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1785
1786                 return (codepoint_t)0x10000 +
1787                                 (w1 << 10) + w2;
1788         }
1789
1790         /* no other length is valid */
1791         return INVALID_CODEPOINT;
1792 }