source/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50 static bool initialized;
  51
  52 /**
  53  * Return the name of a charset to give to iconv().
  54  **/
  55 static const char *charset_name(charset_t ch)
  56 {
  57         const char *ret = NULL;
  58
  59         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  60         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  61         else if (ch == CH_UNIX) ret = lp_unix_charset();
  62         else if (ch == CH_DOS) ret = lp_dos_charset();
  63         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  64         else if (ch == CH_UTF8) ret = "UTF8";
  65
  66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  67         if (ret && !strcmp(ret, "LOCALE")) {
  68                 const char *ln = NULL;
  69
  70 #ifdef HAVE_SETLOCALE
  71                 setlocale(LC_ALL, "");
  72 #endif
  73                 ln = nl_langinfo(CODESET);
  74                 if (ln) {
  75                         /* Check whether the charset name is supported
  76                            by iconv */
  77                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  78                         if (handle == (smb_iconv_t) -1) {
  79                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  80                                 ln = NULL;
  81                         } else {
  82                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  83                                 smb_iconv_close(handle);
  84                         }
  85                 }
  86                 ret = ln;
  87         }
  88 #endif
  89
  90         if (!ret || !*ret) ret = "ASCII";
  91         return ret;
  92 }
  93
  94 void lazy_initialize_conv(void)
  95 {
  96         if (!initialized) {
  97                 load_case_tables();
  98                 init_iconv();
  99                 initialized = true;
 100         }
 101 }
 102
 103 /**
 104  * Destroy global objects allocated by init_iconv()
 105  **/
 106 void gfree_charcnv(void)
 107 {
 108         int c1, c2;
 109
 110         for (c1=0;c1<NUM_CHARSETS;c1++) {
 111                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 112                         if ( conv_handles[c1][c2] ) {
 113                                 smb_iconv_close( conv_handles[c1][c2] );
 114                                 conv_handles[c1][c2] = 0;
 115                         }
 116                 }
 117         }
 118         initialized = false;
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         bool did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_valid_table();
 182                 conv_silent = False;
 183         }
 184 }
 185
 186 /**
 187  * Convert string from one encoding to another, making error checking etc
 188  * Slow path version - uses (slow) iconv.
 189  *
 190  * @param src pointer to source string (multibyte or singlebyte)
 191  * @param srclen length of the source string in bytes
 192  * @param dest pointer to destination string (multibyte or singlebyte)
 193  * @param destlen maximal length allowed for string
 194  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 195  * @returns the number of bytes occupied in the destination
 196  *
 197  * Ensure the srclen contains the terminating zero.
 198  *
 199  **/
 200
 201 static size_t convert_string_internal(charset_t from, charset_t to,
 202                       void const *src, size_t srclen,
 203                       void *dest, size_t destlen, bool allow_bad_conv)
 204 {
 205         size_t i_len, o_len;
 206         size_t retval;
 207         const char* inbuf = (const char*)src;
 208         char* outbuf = (char*)dest;
 209         smb_iconv_t descriptor;
 210
 211         lazy_initialize_conv();
 212
 213         descriptor = conv_handles[from][to];
 214
 215         if (srclen == (size_t)-1) {
 216                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 217                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 218                 } else {
 219                         srclen = strlen((const char *)src)+1;
 220                 }
 221         }
 222
 223
 224         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 225                 if (!conv_silent)
 226                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 227                 return (size_t)-1;
 228         }
 229
 230         i_len=srclen;
 231         o_len=destlen;
 232
 233  again:
 234
 235         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 236         if(retval==(size_t)-1) {
 237                 const char *reason="unknown error";
 238                 switch(errno) {
 239                         case EINVAL:
 240                                 reason="Incomplete multibyte sequence";
 241                                 if (!conv_silent)
 242                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 243                                 if (allow_bad_conv)
 244                                         goto use_as_is;
 245                                 break;
 246                         case E2BIG:
 247                                 reason="No more room";
 248                                 if (!conv_silent) {
 249                                         if (from == CH_UNIX) {
 250                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 251                                                         charset_name(from), charset_name(to),
 252                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 253                                         } else {
 254                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 255                                                         charset_name(from), charset_name(to),
 256                                                         (unsigned int)srclen, (unsigned int)destlen));
 257                                         }
 258                                 }
 259                                 break;
 260                         case EILSEQ:
 261                                 reason="Illegal multibyte sequence";
 262                                 if (!conv_silent)
 263                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 264                                 if (allow_bad_conv)
 265                                         goto use_as_is;
 266                                 break;
 267                         default:
 268                                 if (!conv_silent)
 269                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 270                                 break;
 271                 }
 272                 /* smb_panic(reason); */
 273         }
 274         return destlen-o_len;
 275
 276  use_as_is:
 277
 278         /*
 279          * Conversion not supported. This is actually an error, but there are so
 280          * many misconfigured iconv systems and smb.conf's out there we can't just
 281          * fail. Do a very bad conversion instead.... JRA.
 282          */
 283
 284         {
 285                 if (o_len == 0 || i_len == 0)
 286                         return destlen - o_len;
 287
 288                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 289                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 290                         /* Can't convert from utf16 any endian to multibyte.
 291                            Replace with the default fail char.
 292                         */
 293                         if (i_len < 2)
 294                                 return destlen - o_len;
 295                         if (i_len >= 2) {
 296                                 *outbuf = lp_failed_convert_char();
 297
 298                                 outbuf++;
 299                                 o_len--;
 300
 301                                 inbuf += 2;
 302                                 i_len -= 2;
 303                         }
 304
 305                         if (o_len == 0 || i_len == 0)
 306                                 return destlen - o_len;
 307
 308                         /* Keep trying with the next char... */
 309                         goto again;
 310
 311                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 312                         /* Can't convert to UTF16LE - just widen by adding the
 313                            default fail char then zero.
 314                         */
 315                         if (o_len < 2)
 316                                 return destlen - o_len;
 317
 318                         outbuf[0] = lp_failed_convert_char();
 319                         outbuf[1] = '\0';
 320
 321                         inbuf++;
 322                         i_len--;
 323
 324                         outbuf += 2;
 325                         o_len -= 2;
 326
 327                         if (o_len == 0 || i_len == 0)
 328                                 return destlen - o_len;
 329
 330                         /* Keep trying with the next char... */
 331                         goto again;
 332
 333                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 334                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 335                         /* Failed multibyte to multibyte. Just copy the default fail char and
 336                                 try again. */
 337                         outbuf[0] = lp_failed_convert_char();
 338
 339                         inbuf++;
 340                         i_len--;
 341
 342                         outbuf++;
 343                         o_len--;
 344
 345                         if (o_len == 0 || i_len == 0)
 346                                 return destlen - o_len;
 347
 348                         /* Keep trying with the next char... */
 349                         goto again;
 350
 351                 } else {
 352                         /* Keep compiler happy.... */
 353                         return destlen - o_len;
 354                 }
 355         }
 356 }
 357
 358 /**
 359  * Convert string from one encoding to another, making error checking etc
 360  * Fast path version - handles ASCII first.
 361  *
 362  * @param src pointer to source string (multibyte or singlebyte)
 363  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 364  * @param dest pointer to destination string (multibyte or singlebyte)
 365  * @param destlen maximal length allowed for string - *NEVER* -1.
 366  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 367  * @returns the number of bytes occupied in the destination
 368  *
 369  * Ensure the srclen contains the terminating zero.
 370  *
 371  * This function has been hand-tuned to provide a fast path.
 372  * Don't change unless you really know what you are doing. JRA.
 373  **/
 374
 375 size_t convert_string(charset_t from, charset_t to,
 376                       void const *src, size_t srclen,
 377                       void *dest, size_t destlen, bool allow_bad_conv)
 378 {
 379         /*
 380          * NB. We deliberately don't do a strlen here if srclen == -1.
 381          * This is very expensive over millions of calls and is taken
 382          * care of in the slow path in convert_string_internal. JRA.
 383          */
 384
 385 #ifdef DEVELOPER
 386         SMB_ASSERT(destlen != (size_t)-1);
 387 #endif
 388
 389         if (srclen == 0)
 390                 return 0;
 391
 392         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 393                 const unsigned char *p = (const unsigned char *)src;
 394                 unsigned char *q = (unsigned char *)dest;
 395                 size_t slen = srclen;
 396                 size_t dlen = destlen;
 397                 unsigned char lastp = '\0';
 398                 size_t retval = 0;
 399
 400                 /* If all characters are ascii, fast path here. */
 401                 while (slen && dlen) {
 402                         if ((lastp = *p) <= 0x7f) {
 403                                 *q++ = *p++;
 404                                 if (slen != (size_t)-1) {
 405                                         slen--;
 406                                 }
 407                                 dlen--;
 408                                 retval++;
 409                                 if (!lastp)
 410                                         break;
 411                         } else {
 412 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 413                                 goto general_case;
 414 #else
 415                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 416 #endif
 417                         }
 418                 }
 419                 if (!dlen) {
 420                         /* Even if we fast path we should note if we ran out of room. */
 421                         if (((slen != (size_t)-1) && slen) ||
 422                                         ((slen == (size_t)-1) && lastp)) {
 423                                 errno = E2BIG;
 424                         }
 425                 }
 426                 return retval;
 427         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 428                 const unsigned char *p = (const unsigned char *)src;
 429                 unsigned char *q = (unsigned char *)dest;
 430                 size_t retval = 0;
 431                 size_t slen = srclen;
 432                 size_t dlen = destlen;
 433                 unsigned char lastp = '\0';
 434
 435                 /* If all characters are ascii, fast path here. */
 436                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 437                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 438                                 *q++ = *p;
 439                                 if (slen != (size_t)-1) {
 440                                         slen -= 2;
 441                                 }
 442                                 p += 2;
 443                                 dlen--;
 444                                 retval++;
 445                                 if (!lastp)
 446                                         break;
 447                         } else {
 448 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 449                                 goto general_case;
 450 #else
 451                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 452 #endif
 453                         }
 454                 }
 455                 if (!dlen) {
 456                         /* Even if we fast path we should note if we ran out of room. */
 457                         if (((slen != (size_t)-1) && slen) ||
 458                                         ((slen == (size_t)-1) && lastp)) {
 459                                 errno = E2BIG;
 460                         }
 461                 }
 462                 return retval;
 463         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 464                 const unsigned char *p = (const unsigned char *)src;
 465                 unsigned char *q = (unsigned char *)dest;
 466                 size_t retval = 0;
 467                 size_t slen = srclen;
 468                 size_t dlen = destlen;
 469                 unsigned char lastp = '\0';
 470
 471                 /* If all characters are ascii, fast path here. */
 472                 while (slen && (dlen >= 2)) {
 473                         if ((lastp = *p) <= 0x7F) {
 474                                 *q++ = *p++;
 475                                 *q++ = '\0';
 476                                 if (slen != (size_t)-1) {
 477                                         slen--;
 478                                 }
 479                                 dlen -= 2;
 480                                 retval += 2;
 481                                 if (!lastp)
 482                                         break;
 483                         } else {
 484 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 485                                 goto general_case;
 486 #else
 487                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 488 #endif
 489                         }
 490                 }
 491                 if (!dlen) {
 492                         /* Even if we fast path we should note if we ran out of room. */
 493                         if (((slen != (size_t)-1) && slen) ||
 494                                         ((slen == (size_t)-1) && lastp)) {
 495                                 errno = E2BIG;
 496                         }
 497                 }
 498                 return retval;
 499         }
 500
 501 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 502   general_case:
 503 #endif
 504         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 505 }
 506
 507 /**
 508  * Convert between character sets, allocating a new buffer for the result.
 509  *
 510  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 511  * (this is a bad interface and needs fixing. JRA).
 512  * @param srclen length of source buffer.
 513  * @param dest always set at least to NULL
 514  * @param converted_size set to the size of the allocated buffer on return
 515  * true
 516  * @note -1 is not accepted for srclen.
 517  *
 518  * @return True if new buffer was correctly allocated, and string was
 519  * converted.
 520  *
 521  * Ensure the srclen contains the terminating zero.
 522  *
 523  * I hate the goto's in this function. It's embarressing.....
 524  * There has to be a cleaner way to do this. JRA.
 525  **/
 526
 527 bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 528                              void const *src, size_t srclen, void *dst,
 529                              size_t *converted_size, bool allow_bad_conv)
 530 {
 531         size_t i_len, o_len, destlen = (srclen * 3) / 2;
 532         size_t retval;
 533         const char *inbuf = (const char *)src;
 534         char *outbuf = NULL, *ob = NULL;
 535         smb_iconv_t descriptor;
 536         void **dest = (void **)dst;
 537
 538         *dest = NULL;
 539
 540         if (!converted_size) {
 541                 errno = EINVAL;
 542                 return false;
 543         }
 544
 545         if (src == NULL || srclen == (size_t)-1) {
 546                 errno = EINVAL;
 547                 return false;
 548         }
 549         if (srclen == 0) {
 550                 *converted_size = 0;
 551                 return true;
 552         }
 553
 554         lazy_initialize_conv();
 555
 556         descriptor = conv_handles[from][to];
 557
 558         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 559                 if (!conv_silent)
 560                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 561                 errno = EOPNOTSUPP;
 562                 return false;
 563         }
 564
 565   convert:
 566
 567         /* +2 is for ucs2 null termination. */
 568         if ((destlen*2)+2 < destlen) {
 569                 /* wrapped ! abort. */
 570                 if (!conv_silent)
 571                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 572                 if (!ctx)
 573                         SAFE_FREE(outbuf);
 574                 errno = EOPNOTSUPP;
 575                 return false;
 576         } else {
 577                 destlen = destlen * 2;
 578         }
 579
 580         /* +2 is for ucs2 null termination. */
 581         if (ctx) {
 582                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
 583         } else {
 584                 ob = (char *)SMB_REALLOC(ob, destlen + 2);
 585         }
 586
 587         if (!ob) {
 588                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 589                 errno = ENOMEM;
 590                 return false;
 591         }
 592         outbuf = ob;
 593         i_len = srclen;
 594         o_len = destlen;
 595
 596  again:
 597
 598         retval = smb_iconv(descriptor,
 599                            &inbuf, &i_len,
 600                            &outbuf, &o_len);
 601         if(retval == (size_t)-1)                {
 602                 const char *reason="unknown error";
 603                 switch(errno) {
 604                         case EINVAL:
 605                                 reason="Incomplete multibyte sequence";
 606                                 if (!conv_silent)
 607                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 608                                 if (allow_bad_conv)
 609                                         goto use_as_is;
 610                                 break;
 611                         case E2BIG:
 612                                 goto convert;
 613                         case EILSEQ:
 614                                 reason="Illegal multibyte sequence";
 615                                 if (!conv_silent)
 616                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 617                                 if (allow_bad_conv)
 618                                         goto use_as_is;
 619                                 break;
 620                 }
 621                 if (!conv_silent)
 622                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 623                 /* smb_panic(reason); */
 624                 if (ctx) {
 625                         TALLOC_FREE(ob);
 626                 } else {
 627                         SAFE_FREE(ob);
 628                 }
 629                 return false;
 630         }
 631
 632   out:
 633
 634         destlen = destlen - o_len;
 635         /* Don't shrink unless we're reclaiming a lot of
 636          * space. This is in the hot codepath and these
 637          * reallocs *cost*. JRA.
 638          */
 639         if (o_len > 1024) {
 640                 /* We're shrinking here so we know the +2 is safe from wrap. */
 641                 if (ctx) {
 642                         ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
 643                 } else {
 644                         ob = (char *)SMB_REALLOC(ob,destlen + 2);
 645                 }
 646         }
 647
 648         if (destlen && !ob) {
 649                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 650                 errno = ENOMEM;
 651                 return false;
 652         }
 653
 654         *dest = ob;
 655
 656         /* Must ucs2 null terminate in the extra space we allocated. */
 657         ob[destlen] = '\0';
 658         ob[destlen+1] = '\0';
 659
 660         *converted_size = destlen;
 661         return true;
 662
 663  use_as_is:
 664
 665         /*
 666          * Conversion not supported. This is actually an error, but there are so
 667          * many misconfigured iconv systems and smb.conf's out there we can't just
 668          * fail. Do a very bad conversion instead.... JRA.
 669          */
 670
 671         {
 672                 if (o_len == 0 || i_len == 0)
 673                         goto out;
 674
 675                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 676                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 677                         /* Can't convert from utf16 any endian to multibyte.
 678                            Replace with the default fail char.
 679                         */
 680
 681                         if (i_len < 2)
 682                                 goto out;
 683
 684                         if (i_len >= 2) {
 685                                 *outbuf = lp_failed_convert_char();
 686
 687                                 outbuf++;
 688                                 o_len--;
 689
 690                                 inbuf += 2;
 691                                 i_len -= 2;
 692                         }
 693
 694                         if (o_len == 0 || i_len == 0)
 695                                 goto out;
 696
 697                         /* Keep trying with the next char... */
 698                         goto again;
 699
 700                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 701                         /* Can't convert to UTF16LE - just widen by adding the
 702                            default fail char then zero.
 703                         */
 704                         if (o_len < 2)
 705                                 goto out;
 706
 707                         outbuf[0] = lp_failed_convert_char();
 708                         outbuf[1] = '\0';
 709
 710                         inbuf++;
 711                         i_len--;
 712
 713                         outbuf += 2;
 714                         o_len -= 2;
 715
 716                         if (o_len == 0 || i_len == 0)
 717                                 goto out;
 718
 719                         /* Keep trying with the next char... */
 720                         goto again;
 721
 722                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 723                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 724                         /* Failed multibyte to multibyte. Just copy the default fail char and
 725                            try again. */
 726                         outbuf[0] = lp_failed_convert_char();
 727
 728                         inbuf++;
 729                         i_len--;
 730
 731                         outbuf++;
 732                         o_len--;
 733
 734                         if (o_len == 0 || i_len == 0)
 735                                 goto out;
 736
 737                         /* Keep trying with the next char... */
 738                         goto again;
 739
 740                 } else {
 741                         /* Keep compiler happy.... */
 742                         goto out;
 743                 }
 744         }
 745 }
 746
 747 /**
 748  * Convert between character sets, allocating a new buffer using talloc for the result.
 749  *
 750  * @param srclen length of source buffer.
 751  * @param dest always set at least to NULL
 752  * @note -1 is not accepted for srclen.
 753  *
 754  * @returns Size in bytes of the converted string; or -1 in case of error.
 755  **/
 756 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 757                              void const *src, size_t srclen, void *dst,
 758                              bool allow_bad_conv)
 759 {
 760         void **dest = (void **)dst;
 761         size_t dest_len;
 762
 763         *dest = NULL;
 764         if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
 765                 &dest_len, allow_bad_conv))
 766                 return (size_t)-1;
 767         if (*dest == NULL)
 768                 return (size_t)-1;
 769         return dest_len;
 770 }
 771
 772 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 773 {
 774         size_t size;
 775         smb_ucs2_t *buffer;
 776
 777         size = push_ucs2_allocate(&buffer, src);
 778         if (size == (size_t)-1) {
 779                 return (size_t)-1;
 780         }
 781         if (!strupper_w(buffer) && (dest == src)) {
 782                 free(buffer);
 783                 return srclen;
 784         }
 785
 786         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 787         free(buffer);
 788         return size;
 789 }
 790
 791 /**
 792  strdup() a unix string to upper case.
 793 **/
 794
 795 char *strdup_upper(const char *s)
 796 {
 797         char *out_buffer = SMB_STRDUP(s);
 798         const unsigned char *p = (const unsigned char *)s;
 799         unsigned char *q = (unsigned char *)out_buffer;
 800
 801         if (!q) {
 802                 return NULL;
 803         }
 804
 805         /* this is quite a common operation, so we want it to be
 806            fast. We optimise for the ascii case, knowing that all our
 807            supported multi-byte character sets are ascii-compatible
 808            (ie. they match for the first 128 chars) */
 809
 810         while (*p) {
 811                 if (*p & 0x80)
 812                         break;
 813                 *q++ = toupper_ascii_fast(*p);
 814                 p++;
 815         }
 816
 817         if (*p) {
 818                 /* MB case. */
 819                 size_t size, size2;
 820                 smb_ucs2_t *buffer = NULL;
 821
 822                 SAFE_FREE(out_buffer);
 823                 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
 824                         strlen(s) + 1, (void **)(void *)&buffer, &size,
 825                         True)) {
 826                         return NULL;
 827                 }
 828
 829                 strupper_w(buffer);
 830
 831                 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
 832                         size, (void **)(void *)&out_buffer, &size2, True)) {
 833                         TALLOC_FREE(buffer);
 834                         return NULL;
 835                 }
 836
 837                 /* Don't need the intermediate buffer
 838                  * anymore.
 839                  */
 840                 TALLOC_FREE(buffer);
 841         }
 842
 843         return out_buffer;
 844 }
 845
 846 /**
 847  talloc_strdup() a unix string to upper case.
 848 **/
 849
 850 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 851 {
 852         char *out_buffer = talloc_strdup(ctx,s);
 853         const unsigned char *p = (const unsigned char *)s;
 854         unsigned char *q = (unsigned char *)out_buffer;
 855
 856         if (!q) {
 857                 return NULL;
 858         }
 859
 860         /* this is quite a common operation, so we want it to be
 861            fast. We optimise for the ascii case, knowing that all our
 862            supported multi-byte character sets are ascii-compatible
 863            (ie. they match for the first 128 chars) */
 864
 865         while (*p) {
 866                 if (*p & 0x80)
 867                         break;
 868                 *q++ = toupper_ascii_fast(*p);
 869                 p++;
 870         }
 871
 872         if (*p) {
 873                 /* MB case. */
 874                 size_t size;
 875                 smb_ucs2_t *ubuf = NULL;
 876
 877                 /* We're not using the ascii buffer above. */
 878                 TALLOC_FREE(out_buffer);
 879
 880                 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
 881                                 s, strlen(s)+1,
 882                                 (void *)&ubuf,
 883                                 True);
 884                 if (size == (size_t)-1) {
 885                         return NULL;
 886                 }
 887
 888                 strupper_w(ubuf);
 889
 890                 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
 891                                 ubuf, size,
 892                                 (void *)&out_buffer,
 893                                 True);
 894
 895                 /* Don't need the intermediate buffer
 896                  * anymore.
 897                  */
 898
 899                 TALLOC_FREE(ubuf);
 900
 901                 if (size == (size_t)-1) {
 902                         return NULL;
 903                 }
 904         }
 905
 906         return out_buffer;
 907 }
 908
 909 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 910 {
 911         size_t size;
 912         smb_ucs2_t *buffer = NULL;
 913
 914         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 915                 (void **)(void *)&buffer, &size, True)) {
 916                 smb_panic("failed to create UCS2 buffer");
 917         }
 918         if (!strlower_w(buffer) && (dest == src)) {
 919                 SAFE_FREE(buffer);
 920                 return srclen;
 921         }
 922         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 923         SAFE_FREE(buffer);
 924         return size;
 925 }
 926
 927 /**
 928  strdup() a unix string to lower case.
 929 **/
 930
 931 char *strdup_lower(const char *s)
 932 {
 933         size_t size;
 934         smb_ucs2_t *buffer = NULL;
 935         char *out_buffer;
 936
 937         size = push_ucs2_allocate(&buffer, s);
 938         if (size == -1 || !buffer) {
 939                 return NULL;
 940         }
 941
 942         strlower_w(buffer);
 943
 944         size = pull_ucs2_allocate(&out_buffer, buffer);
 945         SAFE_FREE(buffer);
 946
 947         if (size == (size_t)-1) {
 948                 return NULL;
 949         }
 950
 951         return out_buffer;
 952 }
 953
 954 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
 955 {
 956         size_t size;
 957         smb_ucs2_t *buffer = NULL;
 958         char *out_buffer;
 959
 960         size = push_ucs2_talloc(ctx, &buffer, s);
 961         if (size == -1 || !buffer) {
 962                 TALLOC_FREE(buffer);
 963                 return NULL;
 964         }
 965
 966         strlower_w(buffer);
 967
 968         size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
 969         TALLOC_FREE(buffer);
 970
 971         if (size == (size_t)-1) {
 972                 TALLOC_FREE(out_buffer);
 973                 return NULL;
 974         }
 975
 976         return out_buffer;
 977 }
 978
 979
 980 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 981 {
 982         if (flags & (STR_NOALIGN|STR_ASCII))
 983                 return 0;
 984         return PTR_DIFF(p, base_ptr) & 1;
 985 }
 986
 987
 988 /**
 989  * Copy a string from a char* unix src to a dos codepage string destination.
 990  *
 991  * @return the number of bytes occupied by the string in the destination.
 992  *
 993  * @param flags can include
 994  * <dl>
 995  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 996  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 997  * </dl>
 998  *
 999  * @param dest_len the maximum length in bytes allowed in the
1000  * destination.
1001  **/
1002 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1003 {
1004         size_t src_len = strlen(src);
1005         char *tmpbuf = NULL;
1006         size_t ret;
1007
1008         /* No longer allow a length of -1. */
1009         if (dest_len == (size_t)-1) {
1010                 smb_panic("push_ascii - dest_len == -1");
1011         }
1012
1013         if (flags & STR_UPPER) {
1014                 tmpbuf = SMB_STRDUP(src);
1015                 if (!tmpbuf) {
1016                         smb_panic("malloc fail");
1017                 }
1018                 strupper_m(tmpbuf);
1019                 src = tmpbuf;
1020         }
1021
1022         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1023                 src_len++;
1024         }
1025
1026         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1027         if (ret == (size_t)-1 &&
1028                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1029                         && dest_len > 0) {
1030                 ((char *)dest)[0] = '\0';
1031         }
1032         SAFE_FREE(tmpbuf);
1033         return ret;
1034 }
1035
1036 size_t push_ascii_fstring(void *dest, const char *src)
1037 {
1038         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1039 }
1040
1041 /********************************************************************
1042  Push an nstring - ensure null terminated. Written by
1043  moriyama@miraclelinux.com (MORIYAMA Masayuki).
1044 ********************************************************************/
1045
1046 size_t push_ascii_nstring(void *dest, const char *src)
1047 {
1048         size_t i, buffer_len, dest_len;
1049         smb_ucs2_t *buffer;
1050
1051         conv_silent = True;
1052         buffer_len = push_ucs2_allocate(&buffer, src);
1053         if (buffer_len == (size_t)-1) {
1054                 smb_panic("failed to create UCS2 buffer");
1055         }
1056
1057         /* We're using buffer_len below to count ucs2 characters, not bytes. */
1058         buffer_len /= sizeof(smb_ucs2_t);
1059
1060         dest_len = 0;
1061         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1062                 unsigned char mb[10];
1063                 /* Convert one smb_ucs2_t character at a time. */
1064                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1065                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1066                         memcpy((char *)dest + dest_len, mb, mb_len);
1067                         dest_len += mb_len;
1068                 } else {
1069                         errno = E2BIG;
1070                         break;
1071                 }
1072         }
1073         ((char *)dest)[dest_len] = '\0';
1074
1075         SAFE_FREE(buffer);
1076         conv_silent = False;
1077         return dest_len;
1078 }
1079
1080 /********************************************************************
1081  Push and malloc an ascii string. src and dest null terminated.
1082 ********************************************************************/
1083
1084 size_t push_ascii_allocate(char **dest, const char *src)
1085 {
1086         size_t dest_len, src_len = strlen(src)+1;
1087
1088         *dest = NULL;
1089         if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1090                 (void **)dest, &dest_len, True))
1091                 return (size_t)-1;
1092         else
1093                 return dest_len;
1094 }
1095
1096 /**
1097  * Copy a string from a dos codepage source to a unix char* destination.
1098  *
1099  * The resulting string in "dest" is always null terminated.
1100  *
1101  * @param flags can have:
1102  * <dl>
1103  * <dt>STR_TERMINATE</dt>
1104  * <dd>STR_TERMINATE means the string in @p src
1105  * is null terminated, and src_len is ignored.</dd>
1106  * </dl>
1107  *
1108  * @param src_len is the length of the source area in bytes.
1109  * @returns the number of bytes occupied by the string in @p src.
1110  **/
1111 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1112 {
1113         size_t ret;
1114
1115         if (dest_len == (size_t)-1) {
1116                 /* No longer allow dest_len of -1. */
1117                 smb_panic("pull_ascii - invalid dest_len of -1");
1118         }
1119
1120         if (flags & STR_TERMINATE) {
1121                 if (src_len == (size_t)-1) {
1122                         src_len = strlen((const char *)src) + 1;
1123                 } else {
1124                         size_t len = strnlen((const char *)src, src_len);
1125                         if (len < src_len)
1126                                 len++;
1127                         src_len = len;
1128                 }
1129         }
1130
1131         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1132         if (ret == (size_t)-1) {
1133                 ret = 0;
1134                 dest_len = 0;
1135         }
1136
1137         if (dest_len && ret) {
1138                 /* Did we already process the terminating zero ? */
1139                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1140                         dest[MIN(ret, dest_len-1)] = 0;
1141                 }
1142         } else  {
1143                 dest[0] = 0;
1144         }
1145
1146         return src_len;
1147 }
1148
1149 /**
1150  * Copy a string from a dos codepage source to a unix char* destination.
1151  Talloc version.
1152  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1153  needs fixing. JRA).
1154  *
1155  * The resulting string in "dest" is always null terminated.
1156  *
1157  * @param flags can have:
1158  * <dl>
1159  * <dt>STR_TERMINATE</dt>
1160  * <dd>STR_TERMINATE means the string in @p src
1161  * is null terminated, and src_len is ignored.</dd>
1162  * </dl>
1163  *
1164  * @param src_len is the length of the source area in bytes.
1165  * @returns the number of bytes occupied by the string in @p src.
1166  **/
1167
1168 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1169                                         char **ppdest,
1170                                         const void *src,
1171                                         size_t src_len,
1172                                         int flags)
1173 {
1174         char *dest = NULL;
1175         size_t dest_len = 0;
1176
1177 #ifdef DEVELOPER
1178         /* Ensure we never use the braindead "malloc" varient. */
1179         if (ctx == NULL) {
1180                 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1181         }
1182 #endif
1183
1184         *ppdest = NULL;
1185
1186         if (!src_len) {
1187                 return 0;
1188         }
1189
1190         if (flags & STR_TERMINATE) {
1191                 if (src_len == (size_t)-1) {
1192                         src_len = strlen((const char *)src) + 1;
1193                 } else {
1194                         size_t len = strnlen((const char *)src, src_len);
1195                         if (len < src_len)
1196                                 len++;
1197                         src_len = len;
1198                 }
1199                 /* Ensure we don't use an insane length from the client. */
1200                 if (src_len >= 1024*1024) {
1201                         char *msg = talloc_asprintf(ctx,
1202                                         "Bad src length (%u) in "
1203                                         "pull_ascii_base_talloc",
1204                                         (unsigned int)src_len);
1205                         smb_panic(msg);
1206                 }
1207         } else {
1208                 /* Can't have an unlimited length
1209                  * non STR_TERMINATE'd.
1210                  */
1211                 if (src_len == (size_t)-1) {
1212                         errno = EINVAL;
1213                         return 0;
1214                 }
1215         }
1216
1217         /* src_len != -1 here. */
1218
1219         if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1220                 &dest_len, True)) {
1221                 dest_len = 0;
1222         }
1223
1224         if (dest_len && dest) {
1225                 /* Did we already process the terminating zero ? */
1226                 if (dest[dest_len-1] != 0) {
1227                         size_t size = talloc_get_size(dest);
1228                         /* Have we got space to append the '\0' ? */
1229                         if (size <= dest_len) {
1230                                 /* No, realloc. */
1231                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1232                                                 dest_len+1);
1233                                 if (!dest) {
1234                                         /* talloc fail. */
1235                                         dest_len = (size_t)-1;
1236                                         return 0;
1237                                 }
1238                         }
1239                         /* Yay - space ! */
1240                         dest[dest_len] = '\0';
1241                         dest_len++;
1242                 }
1243         } else if (dest) {
1244                 dest[0] = 0;
1245         }
1246
1247         *ppdest = dest;
1248         return src_len;
1249 }
1250
1251 size_t pull_ascii_fstring(char *dest, const void *src)
1252 {
1253         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1254 }
1255
1256 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1257
1258 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1259 {
1260         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1261 }
1262
1263 /**
1264  * Copy a string from a char* src to a unicode destination.
1265  *
1266  * @returns the number of bytes occupied by the string in the destination.
1267  *
1268  * @param flags can have:
1269  *
1270  * <dl>
1271  * <dt>STR_TERMINATE <dd>means include the null termination.
1272  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1273  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1274  * </dl>
1275  *
1276  * @param dest_len is the maximum length allowed in the
1277  * destination.
1278  **/
1279
1280 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1281 {
1282         size_t len=0;
1283         size_t src_len;
1284         size_t ret;
1285
1286         if (dest_len == (size_t)-1) {
1287                 /* No longer allow dest_len of -1. */
1288                 smb_panic("push_ucs2 - invalid dest_len of -1");
1289         }
1290
1291         if (flags & STR_TERMINATE)
1292                 src_len = (size_t)-1;
1293         else
1294                 src_len = strlen(src);
1295
1296         if (ucs2_align(base_ptr, dest, flags)) {
1297                 *(char *)dest = 0;
1298                 dest = (void *)((char *)dest + 1);
1299                 if (dest_len)
1300                         dest_len--;
1301                 len++;
1302         }
1303
1304         /* ucs2 is always a multiple of 2 bytes */
1305         dest_len &= ~1;
1306
1307         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1308         if (ret == (size_t)-1) {
1309                 if ((flags & STR_TERMINATE) &&
1310                                 dest &&
1311                                 dest_len) {
1312                         *(char *)dest = 0;
1313                 }
1314                 return len;
1315         }
1316
1317         len += ret;
1318
1319         if (flags & STR_UPPER) {
1320                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1321                 size_t i;
1322
1323                 /* We check for i < (ret / 2) below as the dest string isn't null
1324                    terminated if STR_TERMINATE isn't set. */
1325
1326                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1327                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1328                         if (v != dest_ucs2[i]) {
1329                                 dest_ucs2[i] = v;
1330                         }
1331                 }
1332         }
1333
1334         return len;
1335 }
1336
1337
1338 /**
1339  * Copy a string from a unix char* src to a UCS2 destination,
1340  * allocating a buffer using talloc().
1341  *
1342  * @param dest always set at least to NULL
1343  *
1344  * @returns The number of bytes occupied by the string in the destination
1345  *         or -1 in case of error.
1346  **/
1347 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1348 {
1349         size_t src_len = strlen(src)+1;
1350
1351         *dest = NULL;
1352         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1353 }
1354
1355
1356 /**
1357  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1358  *
1359  * @param dest always set at least to NULL
1360  *
1361  * @returns The number of bytes occupied by the string in the destination
1362  *         or -1 in case of error.
1363  **/
1364
1365 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1366 {
1367         size_t dest_len, src_len = strlen(src)+1;
1368
1369         *dest = NULL;
1370         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1371                 (void **)dest, &dest_len, True))
1372                 return (size_t)-1;
1373         else
1374                 return dest_len;
1375 }
1376
1377 /**
1378  Copy a string from a char* src to a UTF-8 destination.
1379  Return the number of bytes occupied by the string in the destination
1380  Flags can have:
1381   STR_TERMINATE means include the null termination
1382   STR_UPPER     means uppercase in the destination
1383  dest_len is the maximum length allowed in the destination. If dest_len
1384  is -1 then no maxiumum is used.
1385 **/
1386
1387 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1388 {
1389         size_t src_len = 0;
1390         size_t ret;
1391         char *tmpbuf = NULL;
1392
1393         if (dest_len == (size_t)-1) {
1394                 /* No longer allow dest_len of -1. */
1395                 smb_panic("push_utf8 - invalid dest_len of -1");
1396         }
1397
1398         if (flags & STR_UPPER) {
1399                 tmpbuf = strdup_upper(src);
1400                 if (!tmpbuf) {
1401                         return (size_t)-1;
1402                 }
1403                 src = tmpbuf;
1404                 src_len = strlen(src);
1405         }
1406
1407         src_len = strlen(src);
1408         if (flags & STR_TERMINATE) {
1409                 src_len++;
1410         }
1411
1412         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1413         SAFE_FREE(tmpbuf);
1414         return ret;
1415 }
1416
1417 size_t push_utf8_fstring(void *dest, const char *src)
1418 {
1419         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1420 }
1421
1422 /**
1423  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1424  *
1425  * @param dest always set at least to NULL
1426  *
1427  * @returns The number of bytes occupied by the string in the destination
1428  **/
1429
1430 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1431 {
1432         size_t src_len = strlen(src)+1;
1433
1434         *dest = NULL;
1435         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1436 }
1437
1438 /**
1439  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1440  *
1441  * @param dest always set at least to NULL
1442  *
1443  * @returns The number of bytes occupied by the string in the destination
1444  **/
1445
1446 size_t push_utf8_allocate(char **dest, const char *src)
1447 {
1448         size_t dest_len, src_len = strlen(src)+1;
1449
1450         *dest = NULL;
1451         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1452                 (void **)dest, &dest_len, True))
1453                 return (size_t)-1;
1454         else
1455                 return dest_len;
1456 }
1457
1458 /**
1459  Copy a string from a ucs2 source to a unix char* destination.
1460  Flags can have:
1461   STR_TERMINATE means the string in src is null terminated.
1462   STR_NOALIGN   means don't try to align.
1463  if STR_TERMINATE is set then src_len is ignored if it is -1.
1464  src_len is the length of the source area in bytes
1465  Return the number of bytes occupied by the string in src.
1466  The resulting string in "dest" is always null terminated.
1467 **/
1468
1469 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1470 {
1471         size_t ret;
1472
1473         if (dest_len == (size_t)-1) {
1474                 /* No longer allow dest_len of -1. */
1475                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1476         }
1477
1478         if (!src_len) {
1479                 if (dest && dest_len > 0) {
1480                         dest[0] = '\0';
1481                 }
1482                 return 0;
1483         }
1484
1485         if (ucs2_align(base_ptr, src, flags)) {
1486                 src = (const void *)((const char *)src + 1);
1487                 if (src_len != (size_t)-1)
1488                         src_len--;
1489         }
1490
1491         if (flags & STR_TERMINATE) {
1492                 /* src_len -1 is the default for null terminated strings. */
1493                 if (src_len != (size_t)-1) {
1494                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1495                                                 src_len/2);
1496                         if (len < src_len/2)
1497                                 len++;
1498                         src_len = len*2;
1499                 }
1500         }
1501
1502         /* ucs2 is always a multiple of 2 bytes */
1503         if (src_len != (size_t)-1)
1504                 src_len &= ~1;
1505
1506         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1507         if (ret == (size_t)-1) {
1508                 ret = 0;
1509                 dest_len = 0;
1510         }
1511
1512         if (src_len == (size_t)-1)
1513                 src_len = ret*2;
1514
1515         if (dest_len && ret) {
1516                 /* Did we already process the terminating zero ? */
1517                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1518                         dest[MIN(ret, dest_len-1)] = 0;
1519                 }
1520         } else {
1521                 dest[0] = 0;
1522         }
1523
1524         return src_len;
1525 }
1526
1527 /**
1528  Copy a string from a ucs2 source to a unix char* destination.
1529  Talloc version with a base pointer.
1530  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1531  needs fixing. JRA).
1532  Flags can have:
1533   STR_TERMINATE means the string in src is null terminated.
1534   STR_NOALIGN   means don't try to align.
1535  if STR_TERMINATE is set then src_len is ignored if it is -1.
1536  src_len is the length of the source area in bytes
1537  Return the number of bytes occupied by the string in src.
1538  The resulting string in "dest" is always null terminated.
1539 **/
1540
1541 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1542                         const void *base_ptr,
1543                         char **ppdest,
1544                         const void *src,
1545                         size_t src_len,
1546                         int flags)
1547 {
1548         char *dest;
1549         size_t dest_len;
1550
1551         *ppdest = NULL;
1552
1553 #ifdef DEVELOPER
1554         /* Ensure we never use the braindead "malloc" varient. */
1555         if (ctx == NULL) {
1556                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1557         }
1558 #endif
1559
1560         if (!src_len) {
1561                 return 0;
1562         }
1563
1564         if (ucs2_align(base_ptr, src, flags)) {
1565                 src = (const void *)((const char *)src + 1);
1566                 if (src_len != (size_t)-1)
1567                         src_len--;
1568         }
1569
1570         if (flags & STR_TERMINATE) {
1571                 /* src_len -1 is the default for null terminated strings. */
1572                 if (src_len != (size_t)-1) {
1573                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1574                                                 src_len/2);
1575                         if (len < src_len/2)
1576                                 len++;
1577                         src_len = len*2;
1578                 } else {
1579                         /*
1580                          * src_len == -1 - alloc interface won't take this
1581                          * so we must calculate.
1582                          */
1583                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1584                 }
1585                 /* Ensure we don't use an insane length from the client. */
1586                 if (src_len >= 1024*1024) {
1587                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1588                 }
1589         } else {
1590                 /* Can't have an unlimited length
1591                  * non STR_TERMINATE'd.
1592                  */
1593                 if (src_len == (size_t)-1) {
1594                         errno = EINVAL;
1595                         return 0;
1596                 }
1597         }
1598
1599         /* src_len != -1 here. */
1600
1601         /* ucs2 is always a multiple of 2 bytes */
1602         src_len &= ~1;
1603
1604         dest_len = convert_string_talloc(ctx,
1605                                         CH_UTF16LE,
1606                                         CH_UNIX,
1607                                         src,
1608                                         src_len,
1609                                         (void *)&dest,
1610                                         True);
1611         if (dest_len == (size_t)-1) {
1612                 dest_len = 0;
1613         }
1614
1615         if (dest_len) {
1616                 /* Did we already process the terminating zero ? */
1617                 if (dest[dest_len-1] != 0) {
1618                         size_t size = talloc_get_size(dest);
1619                         /* Have we got space to append the '\0' ? */
1620                         if (size <= dest_len) {
1621                                 /* No, realloc. */
1622                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1623                                                 dest_len+1);
1624                                 if (!dest) {
1625                                         /* talloc fail. */
1626                                         dest_len = (size_t)-1;
1627                                         return 0;
1628                                 }
1629                         }
1630                         /* Yay - space ! */
1631                         dest[dest_len] = '\0';
1632                         dest_len++;
1633                 }
1634         } else if (dest) {
1635                 dest[0] = 0;
1636         }
1637
1638         *ppdest = dest;
1639         return src_len;
1640 }
1641
1642 size_t pull_ucs2_fstring(char *dest, const void *src)
1643 {
1644         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1645 }
1646
1647 /**
1648  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1649  *
1650  * @param dest always set at least to NULL
1651  *
1652  * @returns The number of bytes occupied by the string in the destination
1653  **/
1654
1655 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1656 {
1657         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1658         *dest = NULL;
1659         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1660 }
1661
1662 /**
1663  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1664  *
1665  * @param dest always set at least to NULL
1666  *
1667  * @returns The number of bytes occupied by the string in the destination
1668  **/
1669
1670 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1671 {
1672         size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1673         *dest = NULL;
1674         if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1675                 (void **)dest, &dest_len, True))
1676                 return (size_t)-1;
1677         else
1678                 return dest_len;
1679 }
1680
1681 /**
1682  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1683  *
1684  * @param dest always set at least to NULL
1685  *
1686  * @returns The number of bytes occupied by the string in the destination
1687  **/
1688
1689 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1690 {
1691         size_t src_len = strlen(src)+1;
1692         *dest = NULL;
1693         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1694 }
1695
1696 /**
1697  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1698  *
1699  * @param dest always set at least to NULL
1700  *
1701  * @returns The number of bytes occupied by the string in the destination
1702  **/
1703
1704 size_t pull_utf8_allocate(char **dest, const char *src)
1705 {
1706         size_t dest_len, src_len = strlen(src)+1;
1707         *dest = NULL;
1708         if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1709                 (void **)dest, &dest_len, True))
1710                 return (size_t)-1;
1711         else
1712                 return dest_len;
1713 }
1714
1715 /**
1716  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1717  *
1718  * @param dest always set at least to NULL
1719  *
1720  * @returns The number of bytes occupied by the string in the destination
1721  **/
1722
1723 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1724 {
1725         size_t src_len = strlen(src)+1;
1726         *dest = NULL;
1727         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1728 }
1729
1730 /**
1731  Copy a string from a char* src to a unicode or ascii
1732  dos codepage destination choosing unicode or ascii based on the
1733  flags in the SMB buffer starting at base_ptr.
1734  Return the number of bytes occupied by the string in the destination.
1735  flags can have:
1736   STR_TERMINATE means include the null termination.
1737   STR_UPPER     means uppercase in the destination.
1738   STR_ASCII     use ascii even with unicode packet.
1739   STR_NOALIGN   means don't do alignment.
1740  dest_len is the maximum length allowed in the destination. If dest_len
1741  is -1 then no maxiumum is used.
1742 **/
1743
1744 size_t push_string_fn(const char *function, unsigned int line,
1745                       const void *base_ptr, uint16 flags2,
1746                       void *dest, const char *src,
1747                       size_t dest_len, int flags)
1748 {
1749 #ifdef DEVELOPER
1750         /* We really need to zero fill here, not clobber
1751          * region, as we want to ensure that valgrind thinks
1752          * all of the outgoing buffer has been written to
1753          * so a send() or write() won't trap an error.
1754          * JRA.
1755          */
1756 #if 0
1757         clobber_region(function, line, dest, dest_len);
1758 #else
1759         memset(dest, '\0', dest_len);
1760 #endif
1761 #endif
1762
1763         if (!(flags & STR_ASCII) && \
1764             ((flags & STR_UNICODE || \
1765               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1766                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1767         }
1768         return push_ascii(dest, src, dest_len, flags);
1769 }
1770
1771
1772 /**
1773  Copy a string from a unicode or ascii source (depending on
1774  the packet flags) to a char* destination.
1775  Flags can have:
1776   STR_TERMINATE means the string in src is null terminated.
1777   STR_UNICODE   means to force as unicode.
1778   STR_ASCII     use ascii even with unicode packet.
1779   STR_NOALIGN   means don't do alignment.
1780  if STR_TERMINATE is set then src_len is ignored is it is -1
1781  src_len is the length of the source area in bytes.
1782  Return the number of bytes occupied by the string in src.
1783  The resulting string in "dest" is always null terminated.
1784 **/
1785
1786 size_t pull_string_fn(const char *function,
1787                         unsigned int line,
1788                         const void *base_ptr,
1789                         uint16 smb_flags2,
1790                         char *dest,
1791                         const void *src,
1792                         size_t dest_len,
1793                         size_t src_len,
1794                         int flags)
1795 {
1796 #ifdef DEVELOPER
1797         clobber_region(function, line, dest, dest_len);
1798 #endif
1799
1800         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1801                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1802                           "UNICODE defined");
1803         }
1804
1805         if (!(flags & STR_ASCII) && \
1806             ((flags & STR_UNICODE || \
1807               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1808                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1809         }
1810         return pull_ascii(dest, src, dest_len, src_len, flags);
1811 }
1812
1813 /**
1814  Copy a string from a unicode or ascii source (depending on
1815  the packet flags) to a char* destination.
1816  Variant that uses talloc.
1817  Flags can have:
1818   STR_TERMINATE means the string in src is null terminated.
1819   STR_UNICODE   means to force as unicode.
1820   STR_ASCII     use ascii even with unicode packet.
1821   STR_NOALIGN   means don't do alignment.
1822  if STR_TERMINATE is set then src_len is ignored is it is -1
1823  src_len is the length of the source area in bytes.
1824  Return the number of bytes occupied by the string in src.
1825  The resulting string in "dest" is always null terminated.
1826 **/
1827
1828 size_t pull_string_talloc_fn(const char *function,
1829                         unsigned int line,
1830                         TALLOC_CTX *ctx,
1831                         const void *base_ptr,
1832                         uint16 smb_flags2,
1833                         char **ppdest,
1834                         const void *src,
1835                         size_t src_len,
1836                         int flags)
1837 {
1838         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1839                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1840                           "UNICODE defined");
1841         }
1842
1843         if (!(flags & STR_ASCII) && \
1844             ((flags & STR_UNICODE || \
1845               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1846                 return pull_ucs2_base_talloc(ctx,
1847                                         base_ptr,
1848                                         ppdest,
1849                                         src,
1850                                         src_len,
1851                                         flags);
1852         }
1853         return pull_ascii_base_talloc(ctx,
1854                                         ppdest,
1855                                         src,
1856                                         src_len,
1857                                         flags);
1858 }
1859
1860
1861 size_t align_string(const void *base_ptr, const char *p, int flags)
1862 {
1863         if (!(flags & STR_ASCII) && \
1864             ((flags & STR_UNICODE || \
1865               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1866                 return ucs2_align(base_ptr, p, flags);
1867         }
1868         return 0;
1869 }
1870
1871 /*
1872   Return the unicode codepoint for the next multi-byte CH_UNIX character
1873   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1874
1875   Also return the number of bytes consumed (which tells the caller
1876   how many bytes to skip to get to the next CH_UNIX character).
1877
1878   Return INVALID_CODEPOINT if the next character cannot be converted.
1879 */
1880
1881 codepoint_t next_codepoint(const char *str, size_t *size)
1882 {
1883         /* It cannot occupy more than 4 bytes in UTF16 format */
1884         uint8_t buf[4];
1885         smb_iconv_t descriptor;
1886         size_t ilen_orig;
1887         size_t ilen;
1888         size_t olen;
1889         char *outbuf;
1890
1891         if ((str[0] & 0x80) == 0) {
1892                 *size = 1;
1893                 return (codepoint_t)str[0];
1894         }
1895
1896         /* We assume that no multi-byte character can take
1897            more than 5 bytes. This is OK as we only
1898            support codepoints up to 1M */
1899
1900         ilen_orig = strnlen(str, 5);
1901         ilen = ilen_orig;
1902
1903         lazy_initialize_conv();
1904
1905         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1906         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1907                 *size = 1;
1908                 return INVALID_CODEPOINT;
1909         }
1910
1911         /* This looks a little strange, but it is needed to cope
1912            with codepoints above 64k which are encoded as per RFC2781. */
1913         olen = 2;
1914         outbuf = (char *)buf;
1915         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1916         if (olen == 2) {
1917                 /* We failed to convert to a 2 byte character.
1918                    See if we can convert to a 4 UTF16-LE byte char encoding.
1919                 */
1920                 olen = 4;
1921                 outbuf = (char *)buf;
1922                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1923                 if (olen == 4) {
1924                         /* We didn't convert any bytes */
1925                         *size = 1;
1926                         return INVALID_CODEPOINT;
1927                 }
1928                 olen = 4 - olen;
1929         } else {
1930                 olen = 2 - olen;
1931         }
1932
1933         *size = ilen_orig - ilen;
1934
1935         if (olen == 2) {
1936                 /* 2 byte, UTF16-LE encoded value. */
1937                 return (codepoint_t)SVAL(buf, 0);
1938         }
1939         if (olen == 4) {
1940                 /* Decode a 4 byte UTF16-LE character manually.
1941                    See RFC2871 for the encoding machanism.
1942                 */
1943                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1944                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1945
1946                 return (codepoint_t)0x10000 +
1947                                 (w1 << 10) + w2;
1948         }
1949
1950         /* no other length is valid */
1951         return INVALID_CODEPOINT;
1952 }