source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50 static bool initialized;
  51
  52 /**
  53  * Return the name of a charset to give to iconv().
  54  **/
  55 static const char *charset_name(charset_t ch)
  56 {
  57         const char *ret = NULL;
  58
  59         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  60         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  61         else if (ch == CH_UNIX) ret = lp_unix_charset();
  62         else if (ch == CH_DOS) ret = lp_dos_charset();
  63         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  64         else if (ch == CH_UTF8) ret = "UTF8";
  65
  66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  67         if (ret && !strcmp(ret, "LOCALE")) {
  68                 const char *ln = NULL;
  69
  70 #ifdef HAVE_SETLOCALE
  71                 setlocale(LC_ALL, "");
  72 #endif
  73                 ln = nl_langinfo(CODESET);
  74                 if (ln) {
  75                         /* Check whether the charset name is supported
  76                            by iconv */
  77                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  78                         if (handle == (smb_iconv_t) -1) {
  79                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  80                                 ln = NULL;
  81                         } else {
  82                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  83                                 smb_iconv_close(handle);
  84                         }
  85                 }
  86                 ret = ln;
  87         }
  88 #endif
  89
  90         if (!ret || !*ret) ret = "ASCII";
  91         return ret;
  92 }
  93
  94 void lazy_initialize_conv(void)
  95 {
  96         if (!initialized) {
  97                 load_case_tables();
  98                 init_iconv();
  99                 initialized = true;
 100         }
 101 }
 102
 103 /**
 104  * Destroy global objects allocated by init_iconv()
 105  **/
 106 void gfree_charcnv(void)
 107 {
 108         int c1, c2;
 109
 110         for (c1=0;c1<NUM_CHARSETS;c1++) {
 111                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 112                         if ( conv_handles[c1][c2] ) {
 113                                 smb_iconv_close( conv_handles[c1][c2] );
 114                                 conv_handles[c1][c2] = 0;
 115                         }
 116                 }
 117         }
 118         initialized = false;
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         bool did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_valid_table();
 182                 conv_silent = False;
 183         }
 184 }
 185
 186 /**
 187  * Convert string from one encoding to another, making error checking etc
 188  * Slow path version - uses (slow) iconv.
 189  *
 190  * @param src pointer to source string (multibyte or singlebyte)
 191  * @param srclen length of the source string in bytes
 192  * @param dest pointer to destination string (multibyte or singlebyte)
 193  * @param destlen maximal length allowed for string
 194  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 195  * @returns the number of bytes occupied in the destination
 196  *
 197  * Ensure the srclen contains the terminating zero.
 198  *
 199  **/
 200
 201 static size_t convert_string_internal(charset_t from, charset_t to,
 202                       void const *src, size_t srclen,
 203                       void *dest, size_t destlen, bool allow_bad_conv)
 204 {
 205         size_t i_len, o_len;
 206         size_t retval;
 207         const char* inbuf = (const char*)src;
 208         char* outbuf = (char*)dest;
 209         smb_iconv_t descriptor;
 210
 211         lazy_initialize_conv();
 212
 213         descriptor = conv_handles[from][to];
 214
 215         if (srclen == (size_t)-1) {
 216                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 217                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 218                 } else {
 219                         srclen = strlen((const char *)src)+1;
 220                 }
 221         }
 222
 223
 224         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 225                 if (!conv_silent)
 226                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 227                 return (size_t)-1;
 228         }
 229
 230         i_len=srclen;
 231         o_len=destlen;
 232
 233  again:
 234
 235         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 236         if(retval==(size_t)-1) {
 237                 const char *reason="unknown error";
 238                 switch(errno) {
 239                         case EINVAL:
 240                                 reason="Incomplete multibyte sequence";
 241                                 if (!conv_silent)
 242                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 243                                 if (allow_bad_conv)
 244                                         goto use_as_is;
 245                                 break;
 246                         case E2BIG:
 247                                 reason="No more room";
 248                                 if (!conv_silent) {
 249                                         if (from == CH_UNIX) {
 250                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 251                                                         charset_name(from), charset_name(to),
 252                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 253                                         } else {
 254                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 255                                                         charset_name(from), charset_name(to),
 256                                                         (unsigned int)srclen, (unsigned int)destlen));
 257                                         }
 258                                 }
 259                                 break;
 260                         case EILSEQ:
 261                                 reason="Illegal multibyte sequence";
 262                                 if (!conv_silent)
 263                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 264                                 if (allow_bad_conv)
 265                                         goto use_as_is;
 266                                 break;
 267                         default:
 268                                 if (!conv_silent)
 269                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 270                                 break;
 271                 }
 272                 /* smb_panic(reason); */
 273         }
 274         return destlen-o_len;
 275
 276  use_as_is:
 277
 278         /*
 279          * Conversion not supported. This is actually an error, but there are so
 280          * many misconfigured iconv systems and smb.conf's out there we can't just
 281          * fail. Do a very bad conversion instead.... JRA.
 282          */
 283
 284         {
 285                 if (o_len == 0 || i_len == 0)
 286                         return destlen - o_len;
 287
 288                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 289                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 290                         /* Can't convert from utf16 any endian to multibyte.
 291                            Replace with the default fail char.
 292                         */
 293                         if (i_len < 2)
 294                                 return destlen - o_len;
 295                         if (i_len >= 2) {
 296                                 *outbuf = lp_failed_convert_char();
 297
 298                                 outbuf++;
 299                                 o_len--;
 300
 301                                 inbuf += 2;
 302                                 i_len -= 2;
 303                         }
 304
 305                         if (o_len == 0 || i_len == 0)
 306                                 return destlen - o_len;
 307
 308                         /* Keep trying with the next char... */
 309                         goto again;
 310
 311                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 312                         /* Can't convert to UTF16LE - just widen by adding the
 313                            default fail char then zero.
 314                         */
 315                         if (o_len < 2)
 316                                 return destlen - o_len;
 317
 318                         outbuf[0] = lp_failed_convert_char();
 319                         outbuf[1] = '\0';
 320
 321                         inbuf++;
 322                         i_len--;
 323
 324                         outbuf += 2;
 325                         o_len -= 2;
 326
 327                         if (o_len == 0 || i_len == 0)
 328                                 return destlen - o_len;
 329
 330                         /* Keep trying with the next char... */
 331                         goto again;
 332
 333                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 334                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 335                         /* Failed multibyte to multibyte. Just copy the default fail char and
 336                                 try again. */
 337                         outbuf[0] = lp_failed_convert_char();
 338
 339                         inbuf++;
 340                         i_len--;
 341
 342                         outbuf++;
 343                         o_len--;
 344
 345                         if (o_len == 0 || i_len == 0)
 346                                 return destlen - o_len;
 347
 348                         /* Keep trying with the next char... */
 349                         goto again;
 350
 351                 } else {
 352                         /* Keep compiler happy.... */
 353                         return destlen - o_len;
 354                 }
 355         }
 356 }
 357
 358 /**
 359  * Convert string from one encoding to another, making error checking etc
 360  * Fast path version - handles ASCII first.
 361  *
 362  * @param src pointer to source string (multibyte or singlebyte)
 363  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 364  * @param dest pointer to destination string (multibyte or singlebyte)
 365  * @param destlen maximal length allowed for string - *NEVER* -1.
 366  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 367  * @returns the number of bytes occupied in the destination
 368  *
 369  * Ensure the srclen contains the terminating zero.
 370  *
 371  * This function has been hand-tuned to provide a fast path.
 372  * Don't change unless you really know what you are doing. JRA.
 373  **/
 374
 375 size_t convert_string(charset_t from, charset_t to,
 376                       void const *src, size_t srclen,
 377                       void *dest, size_t destlen, bool allow_bad_conv)
 378 {
 379         /*
 380          * NB. We deliberately don't do a strlen here if srclen == -1.
 381          * This is very expensive over millions of calls and is taken
 382          * care of in the slow path in convert_string_internal. JRA.
 383          */
 384
 385 #ifdef DEVELOPER
 386         SMB_ASSERT(destlen != (size_t)-1);
 387 #endif
 388
 389         if (srclen == 0)
 390                 return 0;
 391
 392         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 393                 const unsigned char *p = (const unsigned char *)src;
 394                 unsigned char *q = (unsigned char *)dest;
 395                 size_t slen = srclen;
 396                 size_t dlen = destlen;
 397                 unsigned char lastp = '\0';
 398                 size_t retval = 0;
 399
 400                 /* If all characters are ascii, fast path here. */
 401                 while (slen && dlen) {
 402                         if ((lastp = *p) <= 0x7f) {
 403                                 *q++ = *p++;
 404                                 if (slen != (size_t)-1) {
 405                                         slen--;
 406                                 }
 407                                 dlen--;
 408                                 retval++;
 409                                 if (!lastp)
 410                                         break;
 411                         } else {
 412 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 413                                 goto general_case;
 414 #else
 415                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 416 #endif
 417                         }
 418                 }
 419                 if (!dlen) {
 420                         /* Even if we fast path we should note if we ran out of room. */
 421                         if (((slen != (size_t)-1) && slen) ||
 422                                         ((slen == (size_t)-1) && lastp)) {
 423                                 errno = E2BIG;
 424                         }
 425                 }
 426                 return retval;
 427         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 428                 const unsigned char *p = (const unsigned char *)src;
 429                 unsigned char *q = (unsigned char *)dest;
 430                 size_t retval = 0;
 431                 size_t slen = srclen;
 432                 size_t dlen = destlen;
 433                 unsigned char lastp = '\0';
 434
 435                 /* If all characters are ascii, fast path here. */
 436                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 437                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 438                                 *q++ = *p;
 439                                 if (slen != (size_t)-1) {
 440                                         slen -= 2;
 441                                 }
 442                                 p += 2;
 443                                 dlen--;
 444                                 retval++;
 445                                 if (!lastp)
 446                                         break;
 447                         } else {
 448 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 449                                 goto general_case;
 450 #else
 451                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 452 #endif
 453                         }
 454                 }
 455                 if (!dlen) {
 456                         /* Even if we fast path we should note if we ran out of room. */
 457                         if (((slen != (size_t)-1) && slen) ||
 458                                         ((slen == (size_t)-1) && lastp)) {
 459                                 errno = E2BIG;
 460                         }
 461                 }
 462                 return retval;
 463         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 464                 const unsigned char *p = (const unsigned char *)src;
 465                 unsigned char *q = (unsigned char *)dest;
 466                 size_t retval = 0;
 467                 size_t slen = srclen;
 468                 size_t dlen = destlen;
 469                 unsigned char lastp = '\0';
 470
 471                 /* If all characters are ascii, fast path here. */
 472                 while (slen && (dlen >= 2)) {
 473                         if ((lastp = *p) <= 0x7F) {
 474                                 *q++ = *p++;
 475                                 *q++ = '\0';
 476                                 if (slen != (size_t)-1) {
 477                                         slen--;
 478                                 }
 479                                 dlen -= 2;
 480                                 retval += 2;
 481                                 if (!lastp)
 482                                         break;
 483                         } else {
 484 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 485                                 goto general_case;
 486 #else
 487                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 488 #endif
 489                         }
 490                 }
 491                 if (!dlen) {
 492                         /* Even if we fast path we should note if we ran out of room. */
 493                         if (((slen != (size_t)-1) && slen) ||
 494                                         ((slen == (size_t)-1) && lastp)) {
 495                                 errno = E2BIG;
 496                         }
 497                 }
 498                 return retval;
 499         }
 500
 501 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 502   general_case:
 503 #endif
 504         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 505 }
 506
 507 /**
 508  * Convert between character sets, allocating a new buffer for the result.
 509  *
 510  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 511  * (this is a bad interface and needs fixing. JRA).
 512  * @param srclen length of source buffer.
 513  * @param dest always set at least to NULL
 514  * @param converted_size set to the size of the allocated buffer on return
 515  * true
 516  * @note -1 is not accepted for srclen.
 517  *
 518  * @return true if new buffer was correctly allocated, and string was
 519  * converted.
 520  *
 521  * Ensure the srclen contains the terminating zero.
 522  *
 523  * I hate the goto's in this function. It's embarressing.....
 524  * There has to be a cleaner way to do this. JRA.
 525  **/
 526
 527 bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 528                              void const *src, size_t srclen, void *dst,
 529                              size_t *converted_size, bool allow_bad_conv)
 530 {
 531         size_t i_len, o_len, destlen = (srclen * 3) / 2;
 532         size_t retval;
 533         const char *inbuf = (const char *)src;
 534         char *outbuf = NULL, *ob = NULL;
 535         smb_iconv_t descriptor;
 536         void **dest = (void **)dst;
 537
 538         *dest = NULL;
 539
 540         if (!converted_size) {
 541                 errno = EINVAL;
 542                 return false;
 543         }
 544
 545         if (src == NULL || srclen == (size_t)-1) {
 546                 errno = EINVAL;
 547                 return false;
 548         }
 549         if (srclen == 0) {
 550                 *converted_size = 0;
 551                 return true;
 552         }
 553
 554         lazy_initialize_conv();
 555
 556         descriptor = conv_handles[from][to];
 557
 558         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 559                 if (!conv_silent)
 560                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 561                 errno = EOPNOTSUPP;
 562                 return false;
 563         }
 564
 565   convert:
 566
 567         /* +2 is for ucs2 null termination. */
 568         if ((destlen*2)+2 < destlen) {
 569                 /* wrapped ! abort. */
 570                 if (!conv_silent)
 571                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 572                 if (!ctx)
 573                         SAFE_FREE(outbuf);
 574                 errno = EOPNOTSUPP;
 575                 return false;
 576         } else {
 577                 destlen = destlen * 2;
 578         }
 579
 580         /* +2 is for ucs2 null termination. */
 581         if (ctx) {
 582                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
 583         } else {
 584                 ob = (char *)SMB_REALLOC(ob, destlen + 2);
 585         }
 586
 587         if (!ob) {
 588                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 589                 errno = ENOMEM;
 590                 return false;
 591         }
 592         outbuf = ob;
 593         i_len = srclen;
 594         o_len = destlen;
 595
 596  again:
 597
 598         retval = smb_iconv(descriptor,
 599                            &inbuf, &i_len,
 600                            &outbuf, &o_len);
 601         if(retval == (size_t)-1)                {
 602                 const char *reason="unknown error";
 603                 switch(errno) {
 604                         case EINVAL:
 605                                 reason="Incomplete multibyte sequence";
 606                                 if (!conv_silent)
 607                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 608                                 if (allow_bad_conv)
 609                                         goto use_as_is;
 610                                 break;
 611                         case E2BIG:
 612                                 goto convert;
 613                         case EILSEQ:
 614                                 reason="Illegal multibyte sequence";
 615                                 if (!conv_silent)
 616                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 617                                 if (allow_bad_conv)
 618                                         goto use_as_is;
 619                                 break;
 620                 }
 621                 if (!conv_silent)
 622                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 623                 /* smb_panic(reason); */
 624                 if (ctx) {
 625                         TALLOC_FREE(ob);
 626                 } else {
 627                         SAFE_FREE(ob);
 628                 }
 629                 return false;
 630         }
 631
 632   out:
 633
 634         destlen = destlen - o_len;
 635         /* Don't shrink unless we're reclaiming a lot of
 636          * space. This is in the hot codepath and these
 637          * reallocs *cost*. JRA.
 638          */
 639         if (o_len > 1024) {
 640                 /* We're shrinking here so we know the +2 is safe from wrap. */
 641                 if (ctx) {
 642                         ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
 643                 } else {
 644                         ob = (char *)SMB_REALLOC(ob,destlen + 2);
 645                 }
 646         }
 647
 648         if (destlen && !ob) {
 649                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 650                 errno = ENOMEM;
 651                 return false;
 652         }
 653
 654         *dest = ob;
 655
 656         /* Must ucs2 null terminate in the extra space we allocated. */
 657         ob[destlen] = '\0';
 658         ob[destlen+1] = '\0';
 659
 660         *converted_size = destlen;
 661         return true;
 662
 663  use_as_is:
 664
 665         /*
 666          * Conversion not supported. This is actually an error, but there are so
 667          * many misconfigured iconv systems and smb.conf's out there we can't just
 668          * fail. Do a very bad conversion instead.... JRA.
 669          */
 670
 671         {
 672                 if (o_len == 0 || i_len == 0)
 673                         goto out;
 674
 675                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 676                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 677                         /* Can't convert from utf16 any endian to multibyte.
 678                            Replace with the default fail char.
 679                         */
 680
 681                         if (i_len < 2)
 682                                 goto out;
 683
 684                         if (i_len >= 2) {
 685                                 *outbuf = lp_failed_convert_char();
 686
 687                                 outbuf++;
 688                                 o_len--;
 689
 690                                 inbuf += 2;
 691                                 i_len -= 2;
 692                         }
 693
 694                         if (o_len == 0 || i_len == 0)
 695                                 goto out;
 696
 697                         /* Keep trying with the next char... */
 698                         goto again;
 699
 700                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 701                         /* Can't convert to UTF16LE - just widen by adding the
 702                            default fail char then zero.
 703                         */
 704                         if (o_len < 2)
 705                                 goto out;
 706
 707                         outbuf[0] = lp_failed_convert_char();
 708                         outbuf[1] = '\0';
 709
 710                         inbuf++;
 711                         i_len--;
 712
 713                         outbuf += 2;
 714                         o_len -= 2;
 715
 716                         if (o_len == 0 || i_len == 0)
 717                                 goto out;
 718
 719                         /* Keep trying with the next char... */
 720                         goto again;
 721
 722                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 723                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 724                         /* Failed multibyte to multibyte. Just copy the default fail char and
 725                            try again. */
 726                         outbuf[0] = lp_failed_convert_char();
 727
 728                         inbuf++;
 729                         i_len--;
 730
 731                         outbuf++;
 732                         o_len--;
 733
 734                         if (o_len == 0 || i_len == 0)
 735                                 goto out;
 736
 737                         /* Keep trying with the next char... */
 738                         goto again;
 739
 740                 } else {
 741                         /* Keep compiler happy.... */
 742                         goto out;
 743                 }
 744         }
 745 }
 746
 747 /**
 748  * Convert between character sets, allocating a new buffer using talloc for the result.
 749  *
 750  * @param srclen length of source buffer.
 751  * @param dest always set at least to NULL
 752  * @parm converted_size set to the number of bytes occupied by the string in
 753  * the destination on success.
 754  * @note -1 is not accepted for srclen.
 755  *
 756  * @return true if new buffer was correctly allocated, and string was
 757  * converted.
 758  */
 759 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 760                            void const *src, size_t srclen, void *dst,
 761                            size_t *converted_size, bool allow_bad_conv)
 762 {
 763         void **dest = (void **)dst;
 764
 765         *dest = NULL;
 766         return convert_string_allocate(ctx, from, to, src, srclen, dest,
 767                                        converted_size, allow_bad_conv);
 768 }
 769
 770 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 771 {
 772         size_t size;
 773         smb_ucs2_t *buffer;
 774
 775         if (!push_ucs2_allocate(&buffer, src, &size)) {
 776                 return (size_t)-1;
 777         }
 778
 779         if (!strupper_w(buffer) && (dest == src)) {
 780                 free(buffer);
 781                 return srclen;
 782         }
 783
 784         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 785         free(buffer);
 786         return size;
 787 }
 788
 789 /**
 790  strdup() a unix string to upper case.
 791 **/
 792
 793 char *strdup_upper(const char *s)
 794 {
 795         char *out_buffer = SMB_STRDUP(s);
 796         const unsigned char *p = (const unsigned char *)s;
 797         unsigned char *q = (unsigned char *)out_buffer;
 798
 799         if (!q) {
 800                 return NULL;
 801         }
 802
 803         /* this is quite a common operation, so we want it to be
 804            fast. We optimise for the ascii case, knowing that all our
 805            supported multi-byte character sets are ascii-compatible
 806            (ie. they match for the first 128 chars) */
 807
 808         while (*p) {
 809                 if (*p & 0x80)
 810                         break;
 811                 *q++ = toupper_ascii_fast(*p);
 812                 p++;
 813         }
 814
 815         if (*p) {
 816                 /* MB case. */
 817                 size_t converted_size, converted_size2;
 818                 smb_ucs2_t *buffer = NULL;
 819
 820                 SAFE_FREE(out_buffer);
 821                 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
 822                                              strlen(s) + 1,
 823                                              (void **)(void *)&buffer,
 824                                              &converted_size, True))
 825                 {
 826                         return NULL;
 827                 }
 828
 829                 strupper_w(buffer);
 830
 831                 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
 832                                              converted_size,
 833                                              (void **)(void *)&out_buffer,
 834                                              &converted_size2, True))
 835                 {
 836                         TALLOC_FREE(buffer);
 837                         return NULL;
 838                 }
 839
 840                 /* Don't need the intermediate buffer
 841                  * anymore.
 842                  */
 843                 TALLOC_FREE(buffer);
 844         }
 845
 846         return out_buffer;
 847 }
 848
 849 /**
 850  talloc_strdup() a unix string to upper case.
 851 **/
 852
 853 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 854 {
 855         char *out_buffer = talloc_strdup(ctx,s);
 856         const unsigned char *p = (const unsigned char *)s;
 857         unsigned char *q = (unsigned char *)out_buffer;
 858
 859         if (!q) {
 860                 return NULL;
 861         }
 862
 863         /* this is quite a common operation, so we want it to be
 864            fast. We optimise for the ascii case, knowing that all our
 865            supported multi-byte character sets are ascii-compatible
 866            (ie. they match for the first 128 chars) */
 867
 868         while (*p) {
 869                 if (*p & 0x80)
 870                         break;
 871                 *q++ = toupper_ascii_fast(*p);
 872                 p++;
 873         }
 874
 875         if (*p) {
 876                 /* MB case. */
 877                 size_t converted_size, converted_size2;
 878                 smb_ucs2_t *ubuf = NULL;
 879
 880                 /* We're not using the ascii buffer above. */
 881                 TALLOC_FREE(out_buffer);
 882
 883                 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
 884                                            strlen(s)+1, (void *)&ubuf,
 885                                            &converted_size, True))
 886                 {
 887                         return NULL;
 888                 }
 889
 890                 strupper_w(ubuf);
 891
 892                 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
 893                                            converted_size, (void *)&out_buffer,
 894                                            &converted_size2, True))
 895                 {
 896                         TALLOC_FREE(ubuf);
 897                         return NULL;
 898                 }
 899
 900                 /* Don't need the intermediate buffer
 901                  * anymore.
 902                  */
 903                 TALLOC_FREE(ubuf);
 904         }
 905
 906         return out_buffer;
 907 }
 908
 909 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 910 {
 911         size_t size;
 912         smb_ucs2_t *buffer = NULL;
 913
 914         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 915                                      (void **)(void *)&buffer, &size,
 916                                      True))
 917         {
 918                 smb_panic("failed to create UCS2 buffer");
 919         }
 920         if (!strlower_w(buffer) && (dest == src)) {
 921                 SAFE_FREE(buffer);
 922                 return srclen;
 923         }
 924         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 925         SAFE_FREE(buffer);
 926         return size;
 927 }
 928
 929 /**
 930  strdup() a unix string to lower case.
 931 **/
 932
 933 char *strdup_lower(const char *s)
 934 {
 935         size_t converted_size;
 936         smb_ucs2_t *buffer = NULL;
 937         char *out_buffer;
 938
 939         if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
 940                 return NULL;
 941         }
 942
 943         strlower_w(buffer);
 944
 945         if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
 946                 SAFE_FREE(buffer);
 947                 return NULL;
 948         }
 949
 950         SAFE_FREE(buffer);
 951
 952         return out_buffer;
 953 }
 954
 955 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
 956 {
 957         size_t converted_size;
 958         smb_ucs2_t *buffer = NULL;
 959         char *out_buffer;
 960
 961         if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
 962                 return NULL;
 963         }
 964
 965         strlower_w(buffer);
 966
 967         if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
 968                 TALLOC_FREE(buffer);
 969                 return NULL;
 970         }
 971
 972         TALLOC_FREE(buffer);
 973
 974         return out_buffer;
 975 }
 976
 977
 978 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 979 {
 980         if (flags & (STR_NOALIGN|STR_ASCII))
 981                 return 0;
 982         return PTR_DIFF(p, base_ptr) & 1;
 983 }
 984
 985
 986 /**
 987  * Copy a string from a char* unix src to a dos codepage string destination.
 988  *
 989  * @return the number of bytes occupied by the string in the destination.
 990  *
 991  * @param flags can include
 992  * <dl>
 993  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 994  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 995  * </dl>
 996  *
 997  * @param dest_len the maximum length in bytes allowed in the
 998  * destination.
 999  **/
1000 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1001 {
1002         size_t src_len = strlen(src);
1003         char *tmpbuf = NULL;
1004         size_t ret;
1005
1006         /* No longer allow a length of -1. */
1007         if (dest_len == (size_t)-1) {
1008                 smb_panic("push_ascii - dest_len == -1");
1009         }
1010
1011         if (flags & STR_UPPER) {
1012                 tmpbuf = SMB_STRDUP(src);
1013                 if (!tmpbuf) {
1014                         smb_panic("malloc fail");
1015                 }
1016                 strupper_m(tmpbuf);
1017                 src = tmpbuf;
1018         }
1019
1020         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1021                 src_len++;
1022         }
1023
1024         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1025         if (ret == (size_t)-1 &&
1026                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1027                         && dest_len > 0) {
1028                 ((char *)dest)[0] = '\0';
1029         }
1030         SAFE_FREE(tmpbuf);
1031         return ret;
1032 }
1033
1034 size_t push_ascii_fstring(void *dest, const char *src)
1035 {
1036         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1037 }
1038
1039 /********************************************************************
1040  Push an nstring - ensure null terminated. Written by
1041  moriyama@miraclelinux.com (MORIYAMA Masayuki).
1042 ********************************************************************/
1043
1044 size_t push_ascii_nstring(void *dest, const char *src)
1045 {
1046         size_t i, buffer_len, dest_len;
1047         smb_ucs2_t *buffer;
1048
1049         conv_silent = True;
1050         if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1051                 smb_panic("failed to create UCS2 buffer");
1052         }
1053
1054         /* We're using buffer_len below to count ucs2 characters, not bytes. */
1055         buffer_len /= sizeof(smb_ucs2_t);
1056
1057         dest_len = 0;
1058         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1059                 unsigned char mb[10];
1060                 /* Convert one smb_ucs2_t character at a time. */
1061                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1062                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1063                         memcpy((char *)dest + dest_len, mb, mb_len);
1064                         dest_len += mb_len;
1065                 } else {
1066                         errno = E2BIG;
1067                         break;
1068                 }
1069         }
1070         ((char *)dest)[dest_len] = '\0';
1071
1072         SAFE_FREE(buffer);
1073         conv_silent = False;
1074         return dest_len;
1075 }
1076
1077 /********************************************************************
1078  Push and malloc an ascii string. src and dest null terminated.
1079 ********************************************************************/
1080
1081 bool push_ascii_allocate(char **dest, const char *src, size_t *converted_size)
1082 {
1083         size_t src_len = strlen(src)+1;
1084
1085         *dest = NULL;
1086         return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1087                                        (void **)dest, converted_size, True);
1088 }
1089
1090 /**
1091  * Copy a string from a dos codepage source to a unix char* destination.
1092  *
1093  * The resulting string in "dest" is always null terminated.
1094  *
1095  * @param flags can have:
1096  * <dl>
1097  * <dt>STR_TERMINATE</dt>
1098  * <dd>STR_TERMINATE means the string in @p src
1099  * is null terminated, and src_len is ignored.</dd>
1100  * </dl>
1101  *
1102  * @param src_len is the length of the source area in bytes.
1103  * @returns the number of bytes occupied by the string in @p src.
1104  **/
1105 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1106 {
1107         size_t ret;
1108
1109         if (dest_len == (size_t)-1) {
1110                 /* No longer allow dest_len of -1. */
1111                 smb_panic("pull_ascii - invalid dest_len of -1");
1112         }
1113
1114         if (flags & STR_TERMINATE) {
1115                 if (src_len == (size_t)-1) {
1116                         src_len = strlen((const char *)src) + 1;
1117                 } else {
1118                         size_t len = strnlen((const char *)src, src_len);
1119                         if (len < src_len)
1120                                 len++;
1121                         src_len = len;
1122                 }
1123         }
1124
1125         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1126         if (ret == (size_t)-1) {
1127                 ret = 0;
1128                 dest_len = 0;
1129         }
1130
1131         if (dest_len && ret) {
1132                 /* Did we already process the terminating zero ? */
1133                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1134                         dest[MIN(ret, dest_len-1)] = 0;
1135                 }
1136         } else  {
1137                 dest[0] = 0;
1138         }
1139
1140         return src_len;
1141 }
1142
1143 /**
1144  * Copy a string from a dos codepage source to a unix char* destination.
1145  Talloc version.
1146  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1147  needs fixing. JRA).
1148  *
1149  * The resulting string in "dest" is always null terminated.
1150  *
1151  * @param flags can have:
1152  * <dl>
1153  * <dt>STR_TERMINATE</dt>
1154  * <dd>STR_TERMINATE means the string in @p src
1155  * is null terminated, and src_len is ignored.</dd>
1156  * </dl>
1157  *
1158  * @param src_len is the length of the source area in bytes.
1159  * @returns the number of bytes occupied by the string in @p src.
1160  **/
1161
1162 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1163                                         char **ppdest,
1164                                         const void *src,
1165                                         size_t src_len,
1166                                         int flags)
1167 {
1168         char *dest = NULL;
1169         size_t dest_len;
1170
1171 #ifdef DEVELOPER
1172         /* Ensure we never use the braindead "malloc" varient. */
1173         if (ctx == NULL) {
1174                 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1175         }
1176 #endif
1177
1178         *ppdest = NULL;
1179
1180         if (!src_len) {
1181                 return 0;
1182         }
1183
1184         if (flags & STR_TERMINATE) {
1185                 if (src_len == (size_t)-1) {
1186                         src_len = strlen((const char *)src) + 1;
1187                 } else {
1188                         size_t len = strnlen((const char *)src, src_len);
1189                         if (len < src_len)
1190                                 len++;
1191                         src_len = len;
1192                 }
1193                 /* Ensure we don't use an insane length from the client. */
1194                 if (src_len >= 1024*1024) {
1195                         char *msg = talloc_asprintf(ctx,
1196                                         "Bad src length (%u) in "
1197                                         "pull_ascii_base_talloc",
1198                                         (unsigned int)src_len);
1199                         smb_panic(msg);
1200                 }
1201         } else {
1202                 /* Can't have an unlimited length
1203                  * non STR_TERMINATE'd.
1204                  */
1205                 if (src_len == (size_t)-1) {
1206                         errno = EINVAL;
1207                         return 0;
1208                 }
1209         }
1210
1211         /* src_len != -1 here. */
1212
1213         if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1214                                      &dest_len, True)) {
1215                 dest_len = 0;
1216         }
1217
1218         if (dest_len && dest) {
1219                 /* Did we already process the terminating zero ? */
1220                 if (dest[dest_len-1] != 0) {
1221                         size_t size = talloc_get_size(dest);
1222                         /* Have we got space to append the '\0' ? */
1223                         if (size <= dest_len) {
1224                                 /* No, realloc. */
1225                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1226                                                 dest_len+1);
1227                                 if (!dest) {
1228                                         /* talloc fail. */
1229                                         dest_len = (size_t)-1;
1230                                         return 0;
1231                                 }
1232                         }
1233                         /* Yay - space ! */
1234                         dest[dest_len] = '\0';
1235                         dest_len++;
1236                 }
1237         } else if (dest) {
1238                 dest[0] = 0;
1239         }
1240
1241         *ppdest = dest;
1242         return src_len;
1243 }
1244
1245 size_t pull_ascii_fstring(char *dest, const void *src)
1246 {
1247         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1248 }
1249
1250 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1251
1252 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1253 {
1254         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1255 }
1256
1257 /**
1258  * Copy a string from a char* src to a unicode destination.
1259  *
1260  * @returns the number of bytes occupied by the string in the destination.
1261  *
1262  * @param flags can have:
1263  *
1264  * <dl>
1265  * <dt>STR_TERMINATE <dd>means include the null termination.
1266  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1267  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1268  * </dl>
1269  *
1270  * @param dest_len is the maximum length allowed in the
1271  * destination.
1272  **/
1273
1274 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1275 {
1276         size_t len=0;
1277         size_t src_len;
1278         size_t ret;
1279
1280         if (dest_len == (size_t)-1) {
1281                 /* No longer allow dest_len of -1. */
1282                 smb_panic("push_ucs2 - invalid dest_len of -1");
1283         }
1284
1285         if (flags & STR_TERMINATE)
1286                 src_len = (size_t)-1;
1287         else
1288                 src_len = strlen(src);
1289
1290         if (ucs2_align(base_ptr, dest, flags)) {
1291                 *(char *)dest = 0;
1292                 dest = (void *)((char *)dest + 1);
1293                 if (dest_len)
1294                         dest_len--;
1295                 len++;
1296         }
1297
1298         /* ucs2 is always a multiple of 2 bytes */
1299         dest_len &= ~1;
1300
1301         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1302         if (ret == (size_t)-1) {
1303                 if ((flags & STR_TERMINATE) &&
1304                                 dest &&
1305                                 dest_len) {
1306                         *(char *)dest = 0;
1307                 }
1308                 return len;
1309         }
1310
1311         len += ret;
1312
1313         if (flags & STR_UPPER) {
1314                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1315                 size_t i;
1316
1317                 /* We check for i < (ret / 2) below as the dest string isn't null
1318                    terminated if STR_TERMINATE isn't set. */
1319
1320                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1321                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1322                         if (v != dest_ucs2[i]) {
1323                                 dest_ucs2[i] = v;
1324                         }
1325                 }
1326         }
1327
1328         return len;
1329 }
1330
1331
1332 /**
1333  * Copy a string from a unix char* src to a UCS2 destination,
1334  * allocating a buffer using talloc().
1335  *
1336  * @param dest always set at least to NULL
1337  * @parm converted_size set to the number of bytes occupied by the string in
1338  * the destination on success.
1339  *
1340  * @return true if new buffer was correctly allocated, and string was
1341  * converted.
1342  **/
1343 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1344                       size_t *converted_size)
1345 {
1346         size_t src_len = strlen(src)+1;
1347
1348         *dest = NULL;
1349         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1350                                      (void **)dest, converted_size, True);
1351 }
1352
1353
1354 /**
1355  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1356  *
1357  * @param dest always set at least to NULL
1358  * @parm converted_size set to the number of bytes occupied by the string in
1359  * the destination on success.
1360  *
1361  * @return true if new buffer was correctly allocated, and string was
1362  * converted.
1363  **/
1364
1365 bool push_ucs2_allocate(smb_ucs2_t **dest, const char *src,
1366                         size_t *converted_size)
1367 {
1368         size_t src_len = strlen(src)+1;
1369
1370         *dest = NULL;
1371         return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1372                                        (void **)dest, converted_size, True);
1373 }
1374
1375 /**
1376  Copy a string from a char* src to a UTF-8 destination.
1377  Return the number of bytes occupied by the string in the destination
1378  Flags can have:
1379   STR_TERMINATE means include the null termination
1380   STR_UPPER     means uppercase in the destination
1381  dest_len is the maximum length allowed in the destination. If dest_len
1382  is -1 then no maxiumum is used.
1383 **/
1384
1385 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1386 {
1387         size_t src_len = 0;
1388         size_t ret;
1389         char *tmpbuf = NULL;
1390
1391         if (dest_len == (size_t)-1) {
1392                 /* No longer allow dest_len of -1. */
1393                 smb_panic("push_utf8 - invalid dest_len of -1");
1394         }
1395
1396         if (flags & STR_UPPER) {
1397                 tmpbuf = strdup_upper(src);
1398                 if (!tmpbuf) {
1399                         return (size_t)-1;
1400                 }
1401                 src = tmpbuf;
1402                 src_len = strlen(src);
1403         }
1404
1405         src_len = strlen(src);
1406         if (flags & STR_TERMINATE) {
1407                 src_len++;
1408         }
1409
1410         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1411         SAFE_FREE(tmpbuf);
1412         return ret;
1413 }
1414
1415 size_t push_utf8_fstring(void *dest, const char *src)
1416 {
1417         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1418 }
1419
1420 /**
1421  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1422  *
1423  * @param dest always set at least to NULL
1424  * @parm converted_size set to the number of bytes occupied by the string in
1425  * the destination on success.
1426  *
1427  * @return true if new buffer was correctly allocated, and string was
1428  * converted.
1429  **/
1430
1431 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1432                       size_t *converted_size)
1433 {
1434         size_t src_len = strlen(src)+1;
1435
1436         *dest = NULL;
1437         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1438                                      (void**)dest, converted_size, True);
1439 }
1440
1441 /**
1442  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1443  *
1444  * @param dest always set at least to NULL
1445  * @parm converted_size set to the number of bytes occupied by the string in
1446  * the destination on success.
1447  *
1448  * @return true if new buffer was correctly allocated, and string was
1449  * converted.
1450  **/
1451
1452 bool push_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1453 {
1454         size_t src_len = strlen(src)+1;
1455
1456         *dest = NULL;
1457         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1458                                        (void **)dest, converted_size, True);
1459 }
1460
1461 /**
1462  Copy a string from a ucs2 source to a unix char* destination.
1463  Flags can have:
1464   STR_TERMINATE means the string in src is null terminated.
1465   STR_NOALIGN   means don't try to align.
1466  if STR_TERMINATE is set then src_len is ignored if it is -1.
1467  src_len is the length of the source area in bytes
1468  Return the number of bytes occupied by the string in src.
1469  The resulting string in "dest" is always null terminated.
1470 **/
1471
1472 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1473 {
1474         size_t ret;
1475
1476         if (dest_len == (size_t)-1) {
1477                 /* No longer allow dest_len of -1. */
1478                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1479         }
1480
1481         if (!src_len) {
1482                 if (dest && dest_len > 0) {
1483                         dest[0] = '\0';
1484                 }
1485                 return 0;
1486         }
1487
1488         if (ucs2_align(base_ptr, src, flags)) {
1489                 src = (const void *)((const char *)src + 1);
1490                 if (src_len != (size_t)-1)
1491                         src_len--;
1492         }
1493
1494         if (flags & STR_TERMINATE) {
1495                 /* src_len -1 is the default for null terminated strings. */
1496                 if (src_len != (size_t)-1) {
1497                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1498                                                 src_len/2);
1499                         if (len < src_len/2)
1500                                 len++;
1501                         src_len = len*2;
1502                 }
1503         }
1504
1505         /* ucs2 is always a multiple of 2 bytes */
1506         if (src_len != (size_t)-1)
1507                 src_len &= ~1;
1508
1509         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1510         if (ret == (size_t)-1) {
1511                 ret = 0;
1512                 dest_len = 0;
1513         }
1514
1515         if (src_len == (size_t)-1)
1516                 src_len = ret*2;
1517
1518         if (dest_len && ret) {
1519                 /* Did we already process the terminating zero ? */
1520                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1521                         dest[MIN(ret, dest_len-1)] = 0;
1522                 }
1523         } else {
1524                 dest[0] = 0;
1525         }
1526
1527         return src_len;
1528 }
1529
1530 /**
1531  Copy a string from a ucs2 source to a unix char* destination.
1532  Talloc version with a base pointer.
1533  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1534  needs fixing. JRA).
1535  Flags can have:
1536   STR_TERMINATE means the string in src is null terminated.
1537   STR_NOALIGN   means don't try to align.
1538  if STR_TERMINATE is set then src_len is ignored if it is -1.
1539  src_len is the length of the source area in bytes
1540  Return the number of bytes occupied by the string in src.
1541  The resulting string in "dest" is always null terminated.
1542 **/
1543
1544 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1545                         const void *base_ptr,
1546                         char **ppdest,
1547                         const void *src,
1548                         size_t src_len,
1549                         int flags)
1550 {
1551         char *dest;
1552         size_t dest_len;
1553
1554         *ppdest = NULL;
1555
1556 #ifdef DEVELOPER
1557         /* Ensure we never use the braindead "malloc" varient. */
1558         if (ctx == NULL) {
1559                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1560         }
1561 #endif
1562
1563         if (!src_len) {
1564                 return 0;
1565         }
1566
1567         if (ucs2_align(base_ptr, src, flags)) {
1568                 src = (const void *)((const char *)src + 1);
1569                 if (src_len != (size_t)-1)
1570                         src_len--;
1571         }
1572
1573         if (flags & STR_TERMINATE) {
1574                 /* src_len -1 is the default for null terminated strings. */
1575                 if (src_len != (size_t)-1) {
1576                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1577                                                 src_len/2);
1578                         if (len < src_len/2)
1579                                 len++;
1580                         src_len = len*2;
1581                 } else {
1582                         /*
1583                          * src_len == -1 - alloc interface won't take this
1584                          * so we must calculate.
1585                          */
1586                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1587                 }
1588                 /* Ensure we don't use an insane length from the client. */
1589                 if (src_len >= 1024*1024) {
1590                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1591                 }
1592         } else {
1593                 /* Can't have an unlimited length
1594                  * non STR_TERMINATE'd.
1595                  */
1596                 if (src_len == (size_t)-1) {
1597                         errno = EINVAL;
1598                         return 0;
1599                 }
1600         }
1601
1602         /* src_len != -1 here. */
1603
1604         /* ucs2 is always a multiple of 2 bytes */
1605         src_len &= ~1;
1606
1607         if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1608                                    (void *)&dest, &dest_len, True)) {
1609                 dest_len = 0;
1610         }
1611
1612         if (dest_len) {
1613                 /* Did we already process the terminating zero ? */
1614                 if (dest[dest_len-1] != 0) {
1615                         size_t size = talloc_get_size(dest);
1616                         /* Have we got space to append the '\0' ? */
1617                         if (size <= dest_len) {
1618                                 /* No, realloc. */
1619                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1620                                                 dest_len+1);
1621                                 if (!dest) {
1622                                         /* talloc fail. */
1623                                         dest_len = (size_t)-1;
1624                                         return 0;
1625                                 }
1626                         }
1627                         /* Yay - space ! */
1628                         dest[dest_len] = '\0';
1629                         dest_len++;
1630                 }
1631         } else if (dest) {
1632                 dest[0] = 0;
1633         }
1634
1635         *ppdest = dest;
1636         return src_len;
1637 }
1638
1639 size_t pull_ucs2_fstring(char *dest, const void *src)
1640 {
1641         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1642 }
1643
1644 /**
1645  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1646  *
1647  * @param dest always set at least to NULL
1648  * @parm converted_size set to the number of bytes occupied by the string in
1649  * the destination on success.
1650  *
1651  * @return true if new buffer was correctly allocated, and string was
1652  * converted.
1653  **/
1654
1655 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1656                       size_t *converted_size)
1657 {
1658         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1659
1660         *dest = NULL;
1661         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1662                                      (void **)dest, converted_size, True);
1663 }
1664
1665 /**
1666  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1667  *
1668  * @param dest always set at least to NULL
1669  * @parm converted_size set to the number of bytes occupied by the string in
1670  * the destination on success.
1671  * @return true if new buffer was correctly allocated, and string was
1672  * converted.
1673  **/
1674
1675 bool pull_ucs2_allocate(char **dest, const smb_ucs2_t *src,
1676                         size_t *converted_size)
1677 {
1678         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1679
1680         *dest = NULL;
1681         return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1682                                        (void **)dest, converted_size, True);
1683 }
1684
1685 /**
1686  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1687  *
1688  * @param dest always set at least to NULL
1689  * @parm converted_size set to the number of bytes occupied by the string in
1690  * the destination on success.
1691  *
1692  * @return true if new buffer was correctly allocated, and string was
1693  * converted.
1694  **/
1695
1696 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1697                       size_t *converted_size)
1698 {
1699         size_t src_len = strlen(src)+1;
1700
1701         *dest = NULL;
1702         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1703                                      (void **)dest, converted_size, True);
1704 }
1705
1706 /**
1707  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1708  *
1709  * @param dest always set at least to NULL
1710  * @parm converted_size set to the number of bytes occupied by the string in
1711  * the destination on success.
1712  *
1713  * @return true if new buffer was correctly allocated, and string was
1714  * converted.
1715  **/
1716
1717 bool pull_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1718 {
1719         size_t src_len = strlen(src)+1;
1720
1721         *dest = NULL;
1722         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1723                                        (void **)dest, converted_size, True);
1724 }
1725
1726 /**
1727  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1728  *
1729  * @param dest always set at least to NULL
1730  * @parm converted_size set to the number of bytes occupied by the string in
1731  * the destination on success.
1732  *
1733  * @return true if new buffer was correctly allocated, and string was
1734  * converted.
1735  **/
1736
1737 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1738                        size_t *converted_size)
1739 {
1740         size_t src_len = strlen(src)+1;
1741
1742         *dest = NULL;
1743         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1744                                      (void **)dest, converted_size, True);
1745 }
1746
1747 /**
1748  Copy a string from a char* src to a unicode or ascii
1749  dos codepage destination choosing unicode or ascii based on the
1750  flags in the SMB buffer starting at base_ptr.
1751  Return the number of bytes occupied by the string in the destination.
1752  flags can have:
1753   STR_TERMINATE means include the null termination.
1754   STR_UPPER     means uppercase in the destination.
1755   STR_ASCII     use ascii even with unicode packet.
1756   STR_NOALIGN   means don't do alignment.
1757  dest_len is the maximum length allowed in the destination. If dest_len
1758  is -1 then no maxiumum is used.
1759 **/
1760
1761 size_t push_string_fn(const char *function, unsigned int line,
1762                       const void *base_ptr, uint16 flags2,
1763                       void *dest, const char *src,
1764                       size_t dest_len, int flags)
1765 {
1766 #ifdef DEVELOPER
1767         /* We really need to zero fill here, not clobber
1768          * region, as we want to ensure that valgrind thinks
1769          * all of the outgoing buffer has been written to
1770          * so a send() or write() won't trap an error.
1771          * JRA.
1772          */
1773 #if 0
1774         clobber_region(function, line, dest, dest_len);
1775 #else
1776         memset(dest, '\0', dest_len);
1777 #endif
1778 #endif
1779
1780         if (!(flags & STR_ASCII) && \
1781             ((flags & STR_UNICODE || \
1782               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1783                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1784         }
1785         return push_ascii(dest, src, dest_len, flags);
1786 }
1787
1788
1789 /**
1790  Copy a string from a unicode or ascii source (depending on
1791  the packet flags) to a char* destination.
1792  Flags can have:
1793   STR_TERMINATE means the string in src is null terminated.
1794   STR_UNICODE   means to force as unicode.
1795   STR_ASCII     use ascii even with unicode packet.
1796   STR_NOALIGN   means don't do alignment.
1797  if STR_TERMINATE is set then src_len is ignored is it is -1
1798  src_len is the length of the source area in bytes.
1799  Return the number of bytes occupied by the string in src.
1800  The resulting string in "dest" is always null terminated.
1801 **/
1802
1803 size_t pull_string_fn(const char *function,
1804                         unsigned int line,
1805                         const void *base_ptr,
1806                         uint16 smb_flags2,
1807                         char *dest,
1808                         const void *src,
1809                         size_t dest_len,
1810                         size_t src_len,
1811                         int flags)
1812 {
1813 #ifdef DEVELOPER
1814         clobber_region(function, line, dest, dest_len);
1815 #endif
1816
1817         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1818                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1819                           "UNICODE defined");
1820         }
1821
1822         if (!(flags & STR_ASCII) && \
1823             ((flags & STR_UNICODE || \
1824               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1825                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1826         }
1827         return pull_ascii(dest, src, dest_len, src_len, flags);
1828 }
1829
1830 /**
1831  Copy a string from a unicode or ascii source (depending on
1832  the packet flags) to a char* destination.
1833  Variant that uses talloc.
1834  Flags can have:
1835   STR_TERMINATE means the string in src is null terminated.
1836   STR_UNICODE   means to force as unicode.
1837   STR_ASCII     use ascii even with unicode packet.
1838   STR_NOALIGN   means don't do alignment.
1839  if STR_TERMINATE is set then src_len is ignored is it is -1
1840  src_len is the length of the source area in bytes.
1841  Return the number of bytes occupied by the string in src.
1842  The resulting string in "dest" is always null terminated.
1843 **/
1844
1845 size_t pull_string_talloc_fn(const char *function,
1846                         unsigned int line,
1847                         TALLOC_CTX *ctx,
1848                         const void *base_ptr,
1849                         uint16 smb_flags2,
1850                         char **ppdest,
1851                         const void *src,
1852                         size_t src_len,
1853                         int flags)
1854 {
1855         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1856                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1857                           "UNICODE defined");
1858         }
1859
1860         if (!(flags & STR_ASCII) && \
1861             ((flags & STR_UNICODE || \
1862               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1863                 return pull_ucs2_base_talloc(ctx,
1864                                         base_ptr,
1865                                         ppdest,
1866                                         src,
1867                                         src_len,
1868                                         flags);
1869         }
1870         return pull_ascii_base_talloc(ctx,
1871                                         ppdest,
1872                                         src,
1873                                         src_len,
1874                                         flags);
1875 }
1876
1877
1878 size_t align_string(const void *base_ptr, const char *p, int flags)
1879 {
1880         if (!(flags & STR_ASCII) && \
1881             ((flags & STR_UNICODE || \
1882               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1883                 return ucs2_align(base_ptr, p, flags);
1884         }
1885         return 0;
1886 }
1887
1888 /*
1889   Return the unicode codepoint for the next multi-byte CH_UNIX character
1890   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1891
1892   Also return the number of bytes consumed (which tells the caller
1893   how many bytes to skip to get to the next CH_UNIX character).
1894
1895   Return INVALID_CODEPOINT if the next character cannot be converted.
1896 */
1897
1898 codepoint_t next_codepoint(const char *str, size_t *size)
1899 {
1900         /* It cannot occupy more than 4 bytes in UTF16 format */
1901         uint8_t buf[4];
1902         smb_iconv_t descriptor;
1903         size_t ilen_orig;
1904         size_t ilen;
1905         size_t olen;
1906         char *outbuf;
1907
1908         if ((str[0] & 0x80) == 0) {
1909                 *size = 1;
1910                 return (codepoint_t)str[0];
1911         }
1912
1913         /* We assume that no multi-byte character can take
1914            more than 5 bytes. This is OK as we only
1915            support codepoints up to 1M */
1916
1917         ilen_orig = strnlen(str, 5);
1918         ilen = ilen_orig;
1919
1920         lazy_initialize_conv();
1921
1922         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1923         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1924                 *size = 1;
1925                 return INVALID_CODEPOINT;
1926         }
1927
1928         /* This looks a little strange, but it is needed to cope
1929            with codepoints above 64k which are encoded as per RFC2781. */
1930         olen = 2;
1931         outbuf = (char *)buf;
1932         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1933         if (olen == 2) {
1934                 /* We failed to convert to a 2 byte character.
1935                    See if we can convert to a 4 UTF16-LE byte char encoding.
1936                 */
1937                 olen = 4;
1938                 outbuf = (char *)buf;
1939                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1940                 if (olen == 4) {
1941                         /* We didn't convert any bytes */
1942                         *size = 1;
1943                         return INVALID_CODEPOINT;
1944                 }
1945                 olen = 4 - olen;
1946         } else {
1947                 olen = 2 - olen;
1948         }
1949
1950         *size = ilen_orig - ilen;
1951
1952         if (olen == 2) {
1953                 /* 2 byte, UTF16-LE encoded value. */
1954                 return (codepoint_t)SVAL(buf, 0);
1955         }
1956         if (olen == 4) {
1957                 /* Decode a 4 byte UTF16-LE character manually.
1958                    See RFC2871 for the encoding machanism.
1959                 */
1960                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1961                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1962
1963                 return (codepoint_t)0x10000 +
1964                                 (w1 << 10) + w2;
1965         }
1966
1967         /* no other length is valid */
1968         return INVALID_CODEPOINT;
1969 }