source/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50
  51 /**
  52  * Return the name of a charset to give to iconv().
  53  **/
  54 static const char *charset_name(charset_t ch)
  55 {
  56         const char *ret = NULL;
  57
  58         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  59         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  60         else if (ch == CH_UNIX) ret = lp_unix_charset();
  61         else if (ch == CH_DOS) ret = lp_dos_charset();
  62         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  63         else if (ch == CH_UTF8) ret = "UTF8";
  64
  65 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  66         if (ret && !strcmp(ret, "LOCALE")) {
  67                 const char *ln = NULL;
  68
  69 #ifdef HAVE_SETLOCALE
  70                 setlocale(LC_ALL, "");
  71 #endif
  72                 ln = nl_langinfo(CODESET);
  73                 if (ln) {
  74                         /* Check whether the charset name is supported
  75                            by iconv */
  76                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  77                         if (handle == (smb_iconv_t) -1) {
  78                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  79                                 ln = NULL;
  80                         } else {
  81                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  82                                 smb_iconv_close(handle);
  83                         }
  84                 }
  85                 ret = ln;
  86         }
  87 #endif
  88
  89         if (!ret || !*ret) ret = "ASCII";
  90         return ret;
  91 }
  92
  93 void lazy_initialize_conv(void)
  94 {
  95         static int initialized = False;
  96
  97         if (!initialized) {
  98                 initialized = True;
  99                 load_case_tables();
 100                 init_iconv();
 101         }
 102 }
 103
 104 /**
 105  * Destroy global objects allocated by init_iconv()
 106  **/
 107 void gfree_charcnv(void)
 108 {
 109         int c1, c2;
 110
 111         for (c1=0;c1<NUM_CHARSETS;c1++) {
 112                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 113                         if ( conv_handles[c1][c2] ) {
 114                                 smb_iconv_close( conv_handles[c1][c2] );
 115                                 conv_handles[c1][c2] = 0;
 116                         }
 117                 }
 118         }
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         bool did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_valid_table();
 182                 conv_silent = False;
 183         }
 184 }
 185
 186 /**
 187  * Convert string from one encoding to another, making error checking etc
 188  * Slow path version - uses (slow) iconv.
 189  *
 190  * @param src pointer to source string (multibyte or singlebyte)
 191  * @param srclen length of the source string in bytes
 192  * @param dest pointer to destination string (multibyte or singlebyte)
 193  * @param destlen maximal length allowed for string
 194  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 195  * @returns the number of bytes occupied in the destination
 196  *
 197  * Ensure the srclen contains the terminating zero.
 198  *
 199  **/
 200
 201 static size_t convert_string_internal(charset_t from, charset_t to,
 202                       void const *src, size_t srclen,
 203                       void *dest, size_t destlen, bool allow_bad_conv)
 204 {
 205         size_t i_len, o_len;
 206         size_t retval;
 207         const char* inbuf = (const char*)src;
 208         char* outbuf = (char*)dest;
 209         smb_iconv_t descriptor;
 210
 211         lazy_initialize_conv();
 212
 213         descriptor = conv_handles[from][to];
 214
 215         if (srclen == (size_t)-1) {
 216                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 217                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 218                 } else {
 219                         srclen = strlen((const char *)src)+1;
 220                 }
 221         }
 222
 223
 224         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 225                 if (!conv_silent)
 226                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 227                 return (size_t)-1;
 228         }
 229
 230         i_len=srclen;
 231         o_len=destlen;
 232
 233  again:
 234
 235         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 236         if(retval==(size_t)-1) {
 237                 const char *reason="unknown error";
 238                 switch(errno) {
 239                         case EINVAL:
 240                                 reason="Incomplete multibyte sequence";
 241                                 if (!conv_silent)
 242                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 243                                 if (allow_bad_conv)
 244                                         goto use_as_is;
 245                                 break;
 246                         case E2BIG:
 247                                 reason="No more room";
 248                                 if (!conv_silent) {
 249                                         if (from == CH_UNIX) {
 250                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 251                                                         charset_name(from), charset_name(to),
 252                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 253                                         } else {
 254                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 255                                                         charset_name(from), charset_name(to),
 256                                                         (unsigned int)srclen, (unsigned int)destlen));
 257                                         }
 258                                 }
 259                                 break;
 260                         case EILSEQ:
 261                                 reason="Illegal multibyte sequence";
 262                                 if (!conv_silent)
 263                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 264                                 if (allow_bad_conv)
 265                                         goto use_as_is;
 266                                 break;
 267                         default:
 268                                 if (!conv_silent)
 269                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 270                                 break;
 271                 }
 272                 /* smb_panic(reason); */
 273         }
 274         return destlen-o_len;
 275
 276  use_as_is:
 277
 278         /*
 279          * Conversion not supported. This is actually an error, but there are so
 280          * many misconfigured iconv systems and smb.conf's out there we can't just
 281          * fail. Do a very bad conversion instead.... JRA.
 282          */
 283
 284         {
 285                 if (o_len == 0 || i_len == 0)
 286                         return destlen - o_len;
 287
 288                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 289                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 290                         /* Can't convert from utf16 any endian to multibyte.
 291                            Replace with the default fail char.
 292                         */
 293                         if (i_len < 2)
 294                                 return destlen - o_len;
 295                         if (i_len >= 2) {
 296                                 *outbuf = lp_failed_convert_char();
 297
 298                                 outbuf++;
 299                                 o_len--;
 300
 301                                 inbuf += 2;
 302                                 i_len -= 2;
 303                         }
 304
 305                         if (o_len == 0 || i_len == 0)
 306                                 return destlen - o_len;
 307
 308                         /* Keep trying with the next char... */
 309                         goto again;
 310
 311                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 312                         /* Can't convert to UTF16LE - just widen by adding the
 313                            default fail char then zero.
 314                         */
 315                         if (o_len < 2)
 316                                 return destlen - o_len;
 317
 318                         outbuf[0] = lp_failed_convert_char();
 319                         outbuf[1] = '\0';
 320
 321                         inbuf++;
 322                         i_len--;
 323
 324                         outbuf += 2;
 325                         o_len -= 2;
 326
 327                         if (o_len == 0 || i_len == 0)
 328                                 return destlen - o_len;
 329
 330                         /* Keep trying with the next char... */
 331                         goto again;
 332
 333                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 334                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 335                         /* Failed multibyte to multibyte. Just copy the default fail char and
 336                                 try again. */
 337                         outbuf[0] = lp_failed_convert_char();
 338
 339                         inbuf++;
 340                         i_len--;
 341
 342                         outbuf++;
 343                         o_len--;
 344
 345                         if (o_len == 0 || i_len == 0)
 346                                 return destlen - o_len;
 347
 348                         /* Keep trying with the next char... */
 349                         goto again;
 350
 351                 } else {
 352                         /* Keep compiler happy.... */
 353                         return destlen - o_len;
 354                 }
 355         }
 356 }
 357
 358 /**
 359  * Convert string from one encoding to another, making error checking etc
 360  * Fast path version - handles ASCII first.
 361  *
 362  * @param src pointer to source string (multibyte or singlebyte)
 363  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 364  * @param dest pointer to destination string (multibyte or singlebyte)
 365  * @param destlen maximal length allowed for string - *NEVER* -1.
 366  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 367  * @returns the number of bytes occupied in the destination
 368  *
 369  * Ensure the srclen contains the terminating zero.
 370  *
 371  * This function has been hand-tuned to provide a fast path.
 372  * Don't change unless you really know what you are doing. JRA.
 373  **/
 374
 375 size_t convert_string(charset_t from, charset_t to,
 376                       void const *src, size_t srclen,
 377                       void *dest, size_t destlen, bool allow_bad_conv)
 378 {
 379         /*
 380          * NB. We deliberately don't do a strlen here if srclen == -1.
 381          * This is very expensive over millions of calls and is taken
 382          * care of in the slow path in convert_string_internal. JRA.
 383          */
 384
 385 #ifdef DEVELOPER
 386         SMB_ASSERT(destlen != (size_t)-1);
 387 #endif
 388
 389         if (srclen == 0)
 390                 return 0;
 391
 392         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 393                 const unsigned char *p = (const unsigned char *)src;
 394                 unsigned char *q = (unsigned char *)dest;
 395                 size_t slen = srclen;
 396                 size_t dlen = destlen;
 397                 unsigned char lastp = '\0';
 398                 size_t retval = 0;
 399
 400                 /* If all characters are ascii, fast path here. */
 401                 while (slen && dlen) {
 402                         if ((lastp = *p) <= 0x7f) {
 403                                 *q++ = *p++;
 404                                 if (slen != (size_t)-1) {
 405                                         slen--;
 406                                 }
 407                                 dlen--;
 408                                 retval++;
 409                                 if (!lastp)
 410                                         break;
 411                         } else {
 412 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 413                                 goto general_case;
 414 #else
 415                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 416 #endif
 417                         }
 418                 }
 419                 if (!dlen) {
 420                         /* Even if we fast path we should note if we ran out of room. */
 421                         if (((slen != (size_t)-1) && slen) ||
 422                                         ((slen == (size_t)-1) && lastp)) {
 423                                 errno = E2BIG;
 424                         }
 425                 }
 426                 return retval;
 427         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 428                 const unsigned char *p = (const unsigned char *)src;
 429                 unsigned char *q = (unsigned char *)dest;
 430                 size_t retval = 0;
 431                 size_t slen = srclen;
 432                 size_t dlen = destlen;
 433                 unsigned char lastp = '\0';
 434
 435                 /* If all characters are ascii, fast path here. */
 436                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 437                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 438                                 *q++ = *p;
 439                                 if (slen != (size_t)-1) {
 440                                         slen -= 2;
 441                                 }
 442                                 p += 2;
 443                                 dlen--;
 444                                 retval++;
 445                                 if (!lastp)
 446                                         break;
 447                         } else {
 448 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 449                                 goto general_case;
 450 #else
 451                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 452 #endif
 453                         }
 454                 }
 455                 if (!dlen) {
 456                         /* Even if we fast path we should note if we ran out of room. */
 457                         if (((slen != (size_t)-1) && slen) ||
 458                                         ((slen == (size_t)-1) && lastp)) {
 459                                 errno = E2BIG;
 460                         }
 461                 }
 462                 return retval;
 463         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 464                 const unsigned char *p = (const unsigned char *)src;
 465                 unsigned char *q = (unsigned char *)dest;
 466                 size_t retval = 0;
 467                 size_t slen = srclen;
 468                 size_t dlen = destlen;
 469                 unsigned char lastp = '\0';
 470
 471                 /* If all characters are ascii, fast path here. */
 472                 while (slen && (dlen >= 2)) {
 473                         if ((lastp = *p) <= 0x7F) {
 474                                 *q++ = *p++;
 475                                 *q++ = '\0';
 476                                 if (slen != (size_t)-1) {
 477                                         slen--;
 478                                 }
 479                                 dlen -= 2;
 480                                 retval += 2;
 481                                 if (!lastp)
 482                                         break;
 483                         } else {
 484 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 485                                 goto general_case;
 486 #else
 487                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 488 #endif
 489                         }
 490                 }
 491                 if (!dlen) {
 492                         /* Even if we fast path we should note if we ran out of room. */
 493                         if (((slen != (size_t)-1) && slen) ||
 494                                         ((slen == (size_t)-1) && lastp)) {
 495                                 errno = E2BIG;
 496                         }
 497                 }
 498                 return retval;
 499         }
 500
 501 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 502   general_case:
 503 #endif
 504         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 505 }
 506
 507 /**
 508  * Convert between character sets, allocating a new buffer for the result.
 509  *
 510  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 511  * (this is a bad interface and needs fixing. JRA).
 512  * @param srclen length of source buffer.
 513  * @param dest always set at least to NULL
 514  * @note -1 is not accepted for srclen.
 515  *
 516  * @returns Size in bytes of the converted string; or -1 in case of error.
 517  *
 518  * Ensure the srclen contains the terminating zero.
 519  *
 520  * I hate the goto's in this function. It's embarressing.....
 521  * There has to be a cleaner way to do this. JRA.
 522  **/
 523
 524 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 525                                void const *src, size_t srclen, void *dst, bool allow_bad_conv)
 526 {
 527         size_t i_len, o_len, destlen = (srclen * 3) / 2;
 528         size_t retval;
 529         const char *inbuf = (const char *)src;
 530         char *outbuf = NULL, *ob = NULL;
 531         smb_iconv_t descriptor;
 532         void **dest = (void **)dst;
 533
 534         *dest = NULL;
 535
 536         if (src == NULL || srclen == (size_t)-1)
 537                 return (size_t)-1;
 538         if (srclen == 0)
 539                 return 0;
 540
 541         lazy_initialize_conv();
 542
 543         descriptor = conv_handles[from][to];
 544
 545         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 546                 if (!conv_silent)
 547                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 548                 return (size_t)-1;
 549         }
 550
 551   convert:
 552
 553         /* +2 is for ucs2 null termination. */
 554         if ((destlen*2)+2 < destlen) {
 555                 /* wrapped ! abort. */
 556                 if (!conv_silent)
 557                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 558                 if (!ctx)
 559                         SAFE_FREE(outbuf);
 560                 return (size_t)-1;
 561         } else {
 562                 destlen = destlen * 2;
 563         }
 564
 565         /* +2 is for ucs2 null termination. */
 566         if (ctx) {
 567                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
 568         } else {
 569                 ob = (char *)SMB_REALLOC(ob, destlen + 2);
 570         }
 571
 572         if (!ob) {
 573                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 574                 return (size_t)-1;
 575         }
 576         outbuf = ob;
 577         i_len = srclen;
 578         o_len = destlen;
 579
 580  again:
 581
 582         retval = smb_iconv(descriptor,
 583                            &inbuf, &i_len,
 584                            &outbuf, &o_len);
 585         if(retval == (size_t)-1)                {
 586                 const char *reason="unknown error";
 587                 switch(errno) {
 588                         case EINVAL:
 589                                 reason="Incomplete multibyte sequence";
 590                                 if (!conv_silent)
 591                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 592                                 if (allow_bad_conv)
 593                                         goto use_as_is;
 594                                 break;
 595                         case E2BIG:
 596                                 goto convert;
 597                         case EILSEQ:
 598                                 reason="Illegal multibyte sequence";
 599                                 if (!conv_silent)
 600                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 601                                 if (allow_bad_conv)
 602                                         goto use_as_is;
 603                                 break;
 604                 }
 605                 if (!conv_silent)
 606                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 607                 /* smb_panic(reason); */
 608                 if (ctx) {
 609                         TALLOC_FREE(ob);
 610                 } else {
 611                         SAFE_FREE(ob);
 612                 }
 613                 return (size_t)-1;
 614         }
 615
 616   out:
 617
 618         destlen = destlen - o_len;
 619         /* Don't shrink unless we're reclaiming a lot of
 620          * space. This is in the hot codepath and these
 621          * reallocs *cost*. JRA.
 622          */
 623         if (o_len > 1024) {
 624                 /* We're shrinking here so we know the +2 is safe from wrap. */
 625                 if (ctx) {
 626                         ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
 627                 } else {
 628                         ob = (char *)SMB_REALLOC(ob,destlen + 2);
 629                 }
 630         }
 631
 632         if (destlen && !ob) {
 633                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 634                 return (size_t)-1;
 635         }
 636
 637         *dest = ob;
 638
 639         /* Must ucs2 null terminate in the extra space we allocated. */
 640         ob[destlen] = '\0';
 641         ob[destlen+1] = '\0';
 642
 643         return destlen;
 644
 645  use_as_is:
 646
 647         /*
 648          * Conversion not supported. This is actually an error, but there are so
 649          * many misconfigured iconv systems and smb.conf's out there we can't just
 650          * fail. Do a very bad conversion instead.... JRA.
 651          */
 652
 653         {
 654                 if (o_len == 0 || i_len == 0)
 655                         goto out;
 656
 657                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 658                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 659                         /* Can't convert from utf16 any endian to multibyte.
 660                            Replace with the default fail char.
 661                         */
 662
 663                         if (i_len < 2)
 664                                 goto out;
 665
 666                         if (i_len >= 2) {
 667                                 *outbuf = lp_failed_convert_char();
 668
 669                                 outbuf++;
 670                                 o_len--;
 671
 672                                 inbuf += 2;
 673                                 i_len -= 2;
 674                         }
 675
 676                         if (o_len == 0 || i_len == 0)
 677                                 goto out;
 678
 679                         /* Keep trying with the next char... */
 680                         goto again;
 681
 682                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 683                         /* Can't convert to UTF16LE - just widen by adding the
 684                            default fail char then zero.
 685                         */
 686                         if (o_len < 2)
 687                                 goto out;
 688
 689                         outbuf[0] = lp_failed_convert_char();
 690                         outbuf[1] = '\0';
 691
 692                         inbuf++;
 693                         i_len--;
 694
 695                         outbuf += 2;
 696                         o_len -= 2;
 697
 698                         if (o_len == 0 || i_len == 0)
 699                                 goto out;
 700
 701                         /* Keep trying with the next char... */
 702                         goto again;
 703
 704                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 705                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 706                         /* Failed multibyte to multibyte. Just copy the default fail char and
 707                            try again. */
 708                         outbuf[0] = lp_failed_convert_char();
 709
 710                         inbuf++;
 711                         i_len--;
 712
 713                         outbuf++;
 714                         o_len--;
 715
 716                         if (o_len == 0 || i_len == 0)
 717                                 goto out;
 718
 719                         /* Keep trying with the next char... */
 720                         goto again;
 721
 722                 } else {
 723                         /* Keep compiler happy.... */
 724                         goto out;
 725                 }
 726         }
 727 }
 728
 729 /**
 730  * Convert between character sets, allocating a new buffer using talloc for the result.
 731  *
 732  * @param srclen length of source buffer.
 733  * @param dest always set at least to NULL
 734  * @note -1 is not accepted for srclen.
 735  *
 736  * @returns Size in bytes of the converted string; or -1 in case of error.
 737  **/
 738 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 739                              void const *src, size_t srclen, void *dst,
 740                              bool allow_bad_conv)
 741 {
 742         void **dest = (void **)dst;
 743         size_t dest_len;
 744
 745         *dest = NULL;
 746         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
 747         if (dest_len == (size_t)-1)
 748                 return (size_t)-1;
 749         if (*dest == NULL)
 750                 return (size_t)-1;
 751         return dest_len;
 752 }
 753
 754 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 755 {
 756         size_t size;
 757         smb_ucs2_t *buffer;
 758
 759         size = push_ucs2_allocate(&buffer, src);
 760         if (size == (size_t)-1) {
 761                 return (size_t)-1;
 762         }
 763         if (!strupper_w(buffer) && (dest == src)) {
 764                 free(buffer);
 765                 return srclen;
 766         }
 767
 768         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 769         free(buffer);
 770         return size;
 771 }
 772
 773 /**
 774  strdup() a unix string to upper case.
 775 **/
 776
 777 char *strdup_upper(const char *s)
 778 {
 779         char *out_buffer = SMB_STRDUP(s);
 780         const unsigned char *p = (const unsigned char *)s;
 781         unsigned char *q = (unsigned char *)out_buffer;
 782
 783         if (!q) {
 784                 return NULL;
 785         }
 786
 787         /* this is quite a common operation, so we want it to be
 788            fast. We optimise for the ascii case, knowing that all our
 789            supported multi-byte character sets are ascii-compatible
 790            (ie. they match for the first 128 chars) */
 791
 792         while (*p) {
 793                 if (*p & 0x80)
 794                         break;
 795                 *q++ = toupper_ascii_fast(*p);
 796                 p++;
 797         }
 798
 799         if (*p) {
 800                 /* MB case. */
 801                 size_t size;
 802                 smb_ucs2_t *buffer = NULL;
 803
 804                 SAFE_FREE(out_buffer);
 805                 size = convert_string_allocate(NULL,
 806                                         CH_UNIX,
 807                                         CH_UTF16LE,
 808                                         s,
 809                                         strlen(s) + 1,
 810                                         (void **)(void *)&buffer,
 811                                         True);
 812                 if (size == (size_t)-1) {
 813                         return NULL;
 814                 }
 815
 816                 strupper_w(buffer);
 817
 818                 size = convert_string_allocate(NULL,
 819                                         CH_UTF16LE,
 820                                         CH_UNIX,
 821                                         buffer,
 822                                         size,
 823                                         (void **)(void *)&out_buffer,
 824                                         True);
 825
 826                 /* Don't need the intermediate buffer
 827                  * anymore.
 828                  */
 829
 830                 TALLOC_FREE(buffer);
 831                 if (size == (size_t)-1) {
 832                         return NULL;
 833                 }
 834         }
 835
 836         return out_buffer;
 837 }
 838
 839 /**
 840  talloc_strdup() a unix string to upper case.
 841 **/
 842
 843 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 844 {
 845         char *out_buffer = talloc_strdup(ctx,s);
 846         const unsigned char *p = (const unsigned char *)s;
 847         unsigned char *q = (unsigned char *)out_buffer;
 848
 849         if (!q) {
 850                 return NULL;
 851         }
 852
 853         /* this is quite a common operation, so we want it to be
 854            fast. We optimise for the ascii case, knowing that all our
 855            supported multi-byte character sets are ascii-compatible
 856            (ie. they match for the first 128 chars) */
 857
 858         while (*p) {
 859                 if (*p & 0x80)
 860                         break;
 861                 *q++ = toupper_ascii_fast(*p);
 862                 p++;
 863         }
 864
 865         if (*p) {
 866                 /* MB case. */
 867                 size_t size;
 868                 smb_ucs2_t *ubuf = NULL;
 869
 870                 /* We're not using the ascii buffer above. */
 871                 TALLOC_FREE(out_buffer);
 872
 873                 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
 874                                 s, strlen(s)+1,
 875                                 (void *)&ubuf,
 876                                 True);
 877                 if (size == (size_t)-1) {
 878                         return NULL;
 879                 }
 880
 881                 strupper_w(ubuf);
 882
 883                 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
 884                                 ubuf, size,
 885                                 (void *)&out_buffer,
 886                                 True);
 887
 888                 /* Don't need the intermediate buffer
 889                  * anymore.
 890                  */
 891
 892                 TALLOC_FREE(ubuf);
 893
 894                 if (size == (size_t)-1) {
 895                         return NULL;
 896                 }
 897         }
 898
 899         return out_buffer;
 900 }
 901
 902 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 903 {
 904         size_t size;
 905         smb_ucs2_t *buffer = NULL;
 906
 907         size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 908                                        (void **)(void *)&buffer, True);
 909         if (size == (size_t)-1 || !buffer) {
 910                 smb_panic("failed to create UCS2 buffer");
 911         }
 912         if (!strlower_w(buffer) && (dest == src)) {
 913                 SAFE_FREE(buffer);
 914                 return srclen;
 915         }
 916         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 917         SAFE_FREE(buffer);
 918         return size;
 919 }
 920
 921 /**
 922  strdup() a unix string to lower case.
 923 **/
 924
 925 char *strdup_lower(const char *s)
 926 {
 927         size_t size;
 928         smb_ucs2_t *buffer = NULL;
 929         char *out_buffer;
 930
 931         size = push_ucs2_allocate(&buffer, s);
 932         if (size == -1 || !buffer) {
 933                 return NULL;
 934         }
 935
 936         strlower_w(buffer);
 937
 938         size = pull_ucs2_allocate(&out_buffer, buffer);
 939         SAFE_FREE(buffer);
 940
 941         if (size == (size_t)-1) {
 942                 return NULL;
 943         }
 944
 945         return out_buffer;
 946 }
 947
 948 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
 949 {
 950         size_t size;
 951         smb_ucs2_t *buffer = NULL;
 952         char *out_buffer;
 953
 954         size = push_ucs2_talloc(ctx, &buffer, s);
 955         if (size == -1 || !buffer) {
 956                 TALLOC_FREE(buffer);
 957                 return NULL;
 958         }
 959
 960         strlower_w(buffer);
 961
 962         size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
 963         TALLOC_FREE(buffer);
 964
 965         if (size == (size_t)-1) {
 966                 TALLOC_FREE(out_buffer);
 967                 return NULL;
 968         }
 969
 970         return out_buffer;
 971 }
 972
 973
 974 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 975 {
 976         if (flags & (STR_NOALIGN|STR_ASCII))
 977                 return 0;
 978         return PTR_DIFF(p, base_ptr) & 1;
 979 }
 980
 981
 982 /**
 983  * Copy a string from a char* unix src to a dos codepage string destination.
 984  *
 985  * @return the number of bytes occupied by the string in the destination.
 986  *
 987  * @param flags can include
 988  * <dl>
 989  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 990  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 991  * </dl>
 992  *
 993  * @param dest_len the maximum length in bytes allowed in the
 994  * destination.
 995  **/
 996 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 997 {
 998         size_t src_len = strlen(src);
 999         char *tmpbuf = NULL;
1000         size_t ret;
1001
1002         /* No longer allow a length of -1. */
1003         if (dest_len == (size_t)-1) {
1004                 smb_panic("push_ascii - dest_len == -1");
1005         }
1006
1007         if (flags & STR_UPPER) {
1008                 tmpbuf = SMB_STRDUP(src);
1009                 if (!tmpbuf) {
1010                         smb_panic("malloc fail");
1011                 }
1012                 strupper_m(tmpbuf);
1013                 src = tmpbuf;
1014         }
1015
1016         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1017                 src_len++;
1018         }
1019
1020         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1021         if (ret == (size_t)-1 &&
1022                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1023                         && dest_len > 0) {
1024                 ((char *)dest)[0] = '\0';
1025         }
1026         SAFE_FREE(tmpbuf);
1027         return ret;
1028 }
1029
1030 size_t push_ascii_fstring(void *dest, const char *src)
1031 {
1032         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1033 }
1034
1035 /********************************************************************
1036  Push an nstring - ensure null terminated. Written by
1037  moriyama@miraclelinux.com (MORIYAMA Masayuki).
1038 ********************************************************************/
1039
1040 size_t push_ascii_nstring(void *dest, const char *src)
1041 {
1042         size_t i, buffer_len, dest_len;
1043         smb_ucs2_t *buffer;
1044
1045         conv_silent = True;
1046         buffer_len = push_ucs2_allocate(&buffer, src);
1047         if (buffer_len == (size_t)-1) {
1048                 smb_panic("failed to create UCS2 buffer");
1049         }
1050
1051         /* We're using buffer_len below to count ucs2 characters, not bytes. */
1052         buffer_len /= sizeof(smb_ucs2_t);
1053
1054         dest_len = 0;
1055         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1056                 unsigned char mb[10];
1057                 /* Convert one smb_ucs2_t character at a time. */
1058                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1059                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1060                         memcpy((char *)dest + dest_len, mb, mb_len);
1061                         dest_len += mb_len;
1062                 } else {
1063                         errno = E2BIG;
1064                         break;
1065                 }
1066         }
1067         ((char *)dest)[dest_len] = '\0';
1068
1069         SAFE_FREE(buffer);
1070         conv_silent = False;
1071         return dest_len;
1072 }
1073
1074 /********************************************************************
1075  Push and malloc an ascii string. src and dest null terminated.
1076 ********************************************************************/
1077
1078 size_t push_ascii_allocate(char **dest, const char *src)
1079 {
1080         size_t src_len = strlen(src)+1;
1081
1082         *dest = NULL;
1083         return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len, (void **)dest, True);
1084 }
1085
1086 /**
1087  * Copy a string from a dos codepage source to a unix char* destination.
1088  *
1089  * The resulting string in "dest" is always null terminated.
1090  *
1091  * @param flags can have:
1092  * <dl>
1093  * <dt>STR_TERMINATE</dt>
1094  * <dd>STR_TERMINATE means the string in @p src
1095  * is null terminated, and src_len is ignored.</dd>
1096  * </dl>
1097  *
1098  * @param src_len is the length of the source area in bytes.
1099  * @returns the number of bytes occupied by the string in @p src.
1100  **/
1101 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1102 {
1103         size_t ret;
1104
1105         if (dest_len == (size_t)-1) {
1106                 /* No longer allow dest_len of -1. */
1107                 smb_panic("pull_ascii - invalid dest_len of -1");
1108         }
1109
1110         if (flags & STR_TERMINATE) {
1111                 if (src_len == (size_t)-1) {
1112                         src_len = strlen((const char *)src) + 1;
1113                 } else {
1114                         size_t len = strnlen((const char *)src, src_len);
1115                         if (len < src_len)
1116                                 len++;
1117                         src_len = len;
1118                 }
1119         }
1120
1121         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1122         if (ret == (size_t)-1) {
1123                 ret = 0;
1124                 dest_len = 0;
1125         }
1126
1127         if (dest_len && ret) {
1128                 /* Did we already process the terminating zero ? */
1129                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1130                         dest[MIN(ret, dest_len-1)] = 0;
1131                 }
1132         } else  {
1133                 dest[0] = 0;
1134         }
1135
1136         return src_len;
1137 }
1138
1139 /**
1140  * Copy a string from a dos codepage source to a unix char* destination.
1141  Talloc version.
1142  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1143  needs fixing. JRA).
1144  *
1145  * The resulting string in "dest" is always null terminated.
1146  *
1147  * @param flags can have:
1148  * <dl>
1149  * <dt>STR_TERMINATE</dt>
1150  * <dd>STR_TERMINATE means the string in @p src
1151  * is null terminated, and src_len is ignored.</dd>
1152  * </dl>
1153  *
1154  * @param src_len is the length of the source area in bytes.
1155  * @returns the number of bytes occupied by the string in @p src.
1156  **/
1157
1158 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1159                                         char **ppdest,
1160                                         const void *src,
1161                                         size_t src_len,
1162                                         int flags)
1163 {
1164         char *dest = NULL;
1165         size_t dest_len = 0;
1166
1167 #ifdef DEVELOPER
1168         /* Ensure we never use the braindead "malloc" varient. */
1169         if (ctx == NULL) {
1170                 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1171         }
1172 #endif
1173
1174         *ppdest = NULL;
1175
1176         if (flags & STR_TERMINATE) {
1177                 if (src_len == (size_t)-1) {
1178                         src_len = strlen((const char *)src) + 1;
1179                 } else {
1180                         size_t len = strnlen((const char *)src, src_len);
1181                         if (len < src_len)
1182                                 len++;
1183                         src_len = len;
1184                 }
1185                 /* Ensure we don't use an insane length from the client. */
1186                 if (src_len >= 1024*1024) {
1187                         char *msg = talloc_asprintf(ctx,
1188                                         "Bad src length (%u) in "
1189                                         "pull_ascii_base_talloc",
1190                                         (unsigned int)src_len);
1191                         smb_panic(msg);
1192                 }
1193         }
1194
1195         dest_len = convert_string_allocate(ctx,
1196                                 CH_DOS,
1197                                 CH_UNIX,
1198                                 src,
1199                                 src_len,
1200                                 &dest,
1201                                 True);
1202
1203         if (dest_len == (size_t)-1) {
1204                 dest_len = 0;
1205         }
1206
1207         if (dest_len && dest) {
1208                 /* Did we already process the terminating zero ? */
1209                 if (dest[dest_len-1] != 0) {
1210                         dest[dest_len-1] = 0;
1211                 }
1212         } else if (dest) {
1213                 dest[0] = 0;
1214         }
1215
1216         *ppdest = dest;
1217         return src_len;
1218 }
1219
1220 size_t pull_ascii_fstring(char *dest, const void *src)
1221 {
1222         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1223 }
1224
1225 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1226
1227 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1228 {
1229         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1230 }
1231
1232 /**
1233  * Copy a string from a char* src to a unicode destination.
1234  *
1235  * @returns the number of bytes occupied by the string in the destination.
1236  *
1237  * @param flags can have:
1238  *
1239  * <dl>
1240  * <dt>STR_TERMINATE <dd>means include the null termination.
1241  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1242  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1243  * </dl>
1244  *
1245  * @param dest_len is the maximum length allowed in the
1246  * destination.
1247  **/
1248
1249 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1250 {
1251         size_t len=0;
1252         size_t src_len;
1253         size_t ret;
1254
1255         if (dest_len == (size_t)-1) {
1256                 /* No longer allow dest_len of -1. */
1257                 smb_panic("push_ucs2 - invalid dest_len of -1");
1258         }
1259
1260         if (flags & STR_TERMINATE)
1261                 src_len = (size_t)-1;
1262         else
1263                 src_len = strlen(src);
1264
1265         if (ucs2_align(base_ptr, dest, flags)) {
1266                 *(char *)dest = 0;
1267                 dest = (void *)((char *)dest + 1);
1268                 if (dest_len)
1269                         dest_len--;
1270                 len++;
1271         }
1272
1273         /* ucs2 is always a multiple of 2 bytes */
1274         dest_len &= ~1;
1275
1276         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1277         if (ret == (size_t)-1) {
1278                 if ((flags & STR_TERMINATE) &&
1279                                 dest &&
1280                                 dest_len) {
1281                         *(char *)dest = 0;
1282                 }
1283                 return len;
1284         }
1285
1286         len += ret;
1287
1288         if (flags & STR_UPPER) {
1289                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1290                 size_t i;
1291
1292                 /* We check for i < (ret / 2) below as the dest string isn't null
1293                    terminated if STR_TERMINATE isn't set. */
1294
1295                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1296                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1297                         if (v != dest_ucs2[i]) {
1298                                 dest_ucs2[i] = v;
1299                         }
1300                 }
1301         }
1302
1303         return len;
1304 }
1305
1306
1307 /**
1308  * Copy a string from a unix char* src to a UCS2 destination,
1309  * allocating a buffer using talloc().
1310  *
1311  * @param dest always set at least to NULL
1312  *
1313  * @returns The number of bytes occupied by the string in the destination
1314  *         or -1 in case of error.
1315  **/
1316 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1317 {
1318         size_t src_len = strlen(src)+1;
1319
1320         *dest = NULL;
1321         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1322 }
1323
1324
1325 /**
1326  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1327  *
1328  * @param dest always set at least to NULL
1329  *
1330  * @returns The number of bytes occupied by the string in the destination
1331  *         or -1 in case of error.
1332  **/
1333
1334 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1335 {
1336         size_t src_len = strlen(src)+1;
1337
1338         *dest = NULL;
1339         return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1340 }
1341
1342 /**
1343  Copy a string from a char* src to a UTF-8 destination.
1344  Return the number of bytes occupied by the string in the destination
1345  Flags can have:
1346   STR_TERMINATE means include the null termination
1347   STR_UPPER     means uppercase in the destination
1348  dest_len is the maximum length allowed in the destination. If dest_len
1349  is -1 then no maxiumum is used.
1350 **/
1351
1352 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1353 {
1354         size_t src_len = 0;
1355         size_t ret;
1356         char *tmpbuf = NULL;
1357
1358         if (dest_len == (size_t)-1) {
1359                 /* No longer allow dest_len of -1. */
1360                 smb_panic("push_utf8 - invalid dest_len of -1");
1361         }
1362
1363         if (flags & STR_UPPER) {
1364                 tmpbuf = strdup_upper(src);
1365                 if (!tmpbuf) {
1366                         return (size_t)-1;
1367                 }
1368                 src = tmpbuf;
1369                 src_len = strlen(src);
1370         }
1371
1372         src_len = strlen(src);
1373         if (flags & STR_TERMINATE) {
1374                 src_len++;
1375         }
1376
1377         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1378         SAFE_FREE(tmpbuf);
1379         return ret;
1380 }
1381
1382 size_t push_utf8_fstring(void *dest, const char *src)
1383 {
1384         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1385 }
1386
1387 /**
1388  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1389  *
1390  * @param dest always set at least to NULL
1391  *
1392  * @returns The number of bytes occupied by the string in the destination
1393  **/
1394
1395 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1396 {
1397         size_t src_len = strlen(src)+1;
1398
1399         *dest = NULL;
1400         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1401 }
1402
1403 /**
1404  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1405  *
1406  * @param dest always set at least to NULL
1407  *
1408  * @returns The number of bytes occupied by the string in the destination
1409  **/
1410
1411 size_t push_utf8_allocate(char **dest, const char *src)
1412 {
1413         size_t src_len = strlen(src)+1;
1414
1415         *dest = NULL;
1416         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1417 }
1418
1419 /**
1420  Copy a string from a ucs2 source to a unix char* destination.
1421  Flags can have:
1422   STR_TERMINATE means the string in src is null terminated.
1423   STR_NOALIGN   means don't try to align.
1424  if STR_TERMINATE is set then src_len is ignored if it is -1.
1425  src_len is the length of the source area in bytes
1426  Return the number of bytes occupied by the string in src.
1427  The resulting string in "dest" is always null terminated.
1428 **/
1429
1430 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1431 {
1432         size_t ret;
1433
1434         if (dest_len == (size_t)-1) {
1435                 /* No longer allow dest_len of -1. */
1436                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1437         }
1438
1439         if (!src_len) {
1440                 if (dest && dest_len > 0) {
1441                         dest[0] = '\0';
1442                 }
1443                 return 0;
1444         }
1445
1446         if (ucs2_align(base_ptr, src, flags)) {
1447                 src = (const void *)((const char *)src + 1);
1448                 if (src_len != (size_t)-1)
1449                         src_len--;
1450         }
1451
1452         if (flags & STR_TERMINATE) {
1453                 /* src_len -1 is the default for null terminated strings. */
1454                 if (src_len != (size_t)-1) {
1455                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1456                                                 src_len/2);
1457                         if (len < src_len/2)
1458                                 len++;
1459                         src_len = len*2;
1460                 }
1461         }
1462
1463         /* ucs2 is always a multiple of 2 bytes */
1464         if (src_len != (size_t)-1)
1465                 src_len &= ~1;
1466
1467         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1468         if (ret == (size_t)-1) {
1469                 ret = 0;
1470                 dest_len = 0;
1471         }
1472
1473         if (src_len == (size_t)-1)
1474                 src_len = ret*2;
1475
1476         if (dest_len && ret) {
1477                 /* Did we already process the terminating zero ? */
1478                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1479                         dest[MIN(ret, dest_len-1)] = 0;
1480                 }
1481         } else {
1482                 dest[0] = 0;
1483         }
1484
1485         return src_len;
1486 }
1487
1488 /**
1489  Copy a string from a ucs2 source to a unix char* destination.
1490  Talloc version with a base pointer.
1491  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1492  needs fixing. JRA).
1493  Flags can have:
1494   STR_TERMINATE means the string in src is null terminated.
1495   STR_NOALIGN   means don't try to align.
1496  if STR_TERMINATE is set then src_len is ignored if it is -1.
1497  src_len is the length of the source area in bytes
1498  Return the number of bytes occupied by the string in src.
1499  The resulting string in "dest" is always null terminated.
1500 **/
1501
1502 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1503                         const void *base_ptr,
1504                         char **ppdest,
1505                         const void *src,
1506                         size_t src_len,
1507                         int flags)
1508 {
1509         char *dest;
1510         size_t dest_len;
1511
1512         *ppdest = NULL;
1513
1514 #ifdef DEVELOPER
1515         /* Ensure we never use the braindead "malloc" varient. */
1516         if (ctx == NULL) {
1517                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1518         }
1519 #endif
1520
1521         if (!src_len) {
1522                 return 0;
1523         }
1524
1525         if (ucs2_align(base_ptr, src, flags)) {
1526                 src = (const void *)((const char *)src + 1);
1527                 if (src_len != (size_t)-1)
1528                         src_len--;
1529         }
1530
1531         if (flags & STR_TERMINATE) {
1532                 /* src_len -1 is the default for null terminated strings. */
1533                 if (src_len != (size_t)-1) {
1534                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1535                                                 src_len/2);
1536                         if (len < src_len/2)
1537                                 len++;
1538                         src_len = len*2;
1539                 } else {
1540                         /*
1541                          * src_len == -1 - alloc interface won't take this
1542                          * so we must calculate.
1543                          */
1544                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1545                 }
1546                 /* Ensure we don't use an insane length from the client. */
1547                 if (src_len >= 1024*1024) {
1548                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1549                 }
1550         }
1551
1552         /* ucs2 is always a multiple of 2 bytes */
1553         if (src_len != (size_t)-1) {
1554                 src_len &= ~1;
1555         }
1556
1557         dest_len = convert_string_talloc(ctx,
1558                                         CH_UTF16LE,
1559                                         CH_UNIX,
1560                                         src,
1561                                         src_len,
1562                                         (void *)&dest,
1563                                         True);
1564         if (dest_len == (size_t)-1) {
1565                 dest_len = 0;
1566         }
1567
1568         if (src_len == (size_t)-1)
1569                 src_len = dest_len*2;
1570
1571         if (dest_len) {
1572                 /* Did we already process the terminating zero ? */
1573                 if (dest[dest_len-1] != 0) {
1574                         size_t size = talloc_get_size(dest);
1575                         /* Have we got space to append the '\0' ? */
1576                         if (size <= dest_len) {
1577                                 /* No, realloc. */
1578                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1579                                                 dest_len+1);
1580                                 if (!dest) {
1581                                         /* talloc fail. */
1582                                         dest_len = (size_t)-1;
1583                                         return 0;
1584                                 }
1585                         }
1586                         /* Yay - space ! */
1587                         dest[dest_len] = '\0';
1588                         dest_len++;
1589                 }
1590         } else if (dest) {
1591                 dest[0] = 0;
1592         }
1593
1594         *ppdest = dest;
1595         return src_len;
1596 }
1597
1598 size_t pull_ucs2_fstring(char *dest, const void *src)
1599 {
1600         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1601 }
1602
1603 /**
1604  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1605  *
1606  * @param dest always set at least to NULL
1607  *
1608  * @returns The number of bytes occupied by the string in the destination
1609  **/
1610
1611 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1612 {
1613         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1614         *dest = NULL;
1615         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1616 }
1617
1618 /**
1619  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1620  *
1621  * @param dest always set at least to NULL
1622  *
1623  * @returns The number of bytes occupied by the string in the destination
1624  **/
1625
1626 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1627 {
1628         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1629         *dest = NULL;
1630         return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1631 }
1632
1633 /**
1634  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1635  *
1636  * @param dest always set at least to NULL
1637  *
1638  * @returns The number of bytes occupied by the string in the destination
1639  **/
1640
1641 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1642 {
1643         size_t src_len = strlen(src)+1;
1644         *dest = NULL;
1645         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1646 }
1647
1648 /**
1649  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1650  *
1651  * @param dest always set at least to NULL
1652  *
1653  * @returns The number of bytes occupied by the string in the destination
1654  **/
1655
1656 size_t pull_utf8_allocate(char **dest, const char *src)
1657 {
1658         size_t src_len = strlen(src)+1;
1659         *dest = NULL;
1660         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1661 }
1662
1663 /**
1664  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1665  *
1666  * @param dest always set at least to NULL
1667  *
1668  * @returns The number of bytes occupied by the string in the destination
1669  **/
1670
1671 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1672 {
1673         size_t src_len = strlen(src)+1;
1674         *dest = NULL;
1675         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1676 }
1677
1678 /**
1679  Copy a string from a char* src to a unicode or ascii
1680  dos codepage destination choosing unicode or ascii based on the
1681  flags in the SMB buffer starting at base_ptr.
1682  Return the number of bytes occupied by the string in the destination.
1683  flags can have:
1684   STR_TERMINATE means include the null termination.
1685   STR_UPPER     means uppercase in the destination.
1686   STR_ASCII     use ascii even with unicode packet.
1687   STR_NOALIGN   means don't do alignment.
1688  dest_len is the maximum length allowed in the destination. If dest_len
1689  is -1 then no maxiumum is used.
1690 **/
1691
1692 size_t push_string_fn(const char *function, unsigned int line,
1693                       const void *base_ptr, uint16 flags2,
1694                       void *dest, const char *src,
1695                       size_t dest_len, int flags)
1696 {
1697 #ifdef DEVELOPER
1698         /* We really need to zero fill here, not clobber
1699          * region, as we want to ensure that valgrind thinks
1700          * all of the outgoing buffer has been written to
1701          * so a send() or write() won't trap an error.
1702          * JRA.
1703          */
1704 #if 0
1705         clobber_region(function, line, dest, dest_len);
1706 #else
1707         memset(dest, '\0', dest_len);
1708 #endif
1709 #endif
1710
1711         if (!(flags & STR_ASCII) && \
1712             ((flags & STR_UNICODE || \
1713               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1714                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1715         }
1716         return push_ascii(dest, src, dest_len, flags);
1717 }
1718
1719
1720 /**
1721  Copy a string from a unicode or ascii source (depending on
1722  the packet flags) to a char* destination.
1723  Flags can have:
1724   STR_TERMINATE means the string in src is null terminated.
1725   STR_UNICODE   means to force as unicode.
1726   STR_ASCII     use ascii even with unicode packet.
1727   STR_NOALIGN   means don't do alignment.
1728  if STR_TERMINATE is set then src_len is ignored is it is -1
1729  src_len is the length of the source area in bytes.
1730  Return the number of bytes occupied by the string in src.
1731  The resulting string in "dest" is always null terminated.
1732 **/
1733
1734 size_t pull_string_fn(const char *function,
1735                         unsigned int line,
1736                         const void *base_ptr,
1737                         uint16 smb_flags2,
1738                         char *dest,
1739                         const void *src,
1740                         size_t dest_len,
1741                         size_t src_len,
1742                         int flags)
1743 {
1744 #ifdef DEVELOPER
1745         clobber_region(function, line, dest, dest_len);
1746 #endif
1747
1748         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1749                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1750                           "UNICODE defined");
1751         }
1752
1753         if (!(flags & STR_ASCII) && \
1754             ((flags & STR_UNICODE || \
1755               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1756                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1757         }
1758         return pull_ascii(dest, src, dest_len, src_len, flags);
1759 }
1760
1761 /**
1762  Copy a string from a unicode or ascii source (depending on
1763  the packet flags) to a char* destination.
1764  Variant that uses talloc.
1765  Flags can have:
1766   STR_TERMINATE means the string in src is null terminated.
1767   STR_UNICODE   means to force as unicode.
1768   STR_ASCII     use ascii even with unicode packet.
1769   STR_NOALIGN   means don't do alignment.
1770  if STR_TERMINATE is set then src_len is ignored is it is -1
1771  src_len is the length of the source area in bytes.
1772  Return the number of bytes occupied by the string in src.
1773  The resulting string in "dest" is always null terminated.
1774 **/
1775
1776 size_t pull_string_talloc_fn(const char *function,
1777                         unsigned int line,
1778                         TALLOC_CTX *ctx,
1779                         const void *base_ptr,
1780                         uint16 smb_flags2,
1781                         char **ppdest,
1782                         const void *src,
1783                         size_t src_len,
1784                         int flags)
1785 {
1786         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1787                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1788                           "UNICODE defined");
1789         }
1790
1791         if (!(flags & STR_ASCII) && \
1792             ((flags & STR_UNICODE || \
1793               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1794                 return pull_ucs2_base_talloc(ctx,
1795                                         base_ptr,
1796                                         ppdest,
1797                                         src,
1798                                         src_len,
1799                                         flags);
1800         }
1801         return pull_ascii_base_talloc(ctx,
1802                                         ppdest,
1803                                         src,
1804                                         src_len,
1805                                         flags);
1806 }
1807
1808
1809 size_t align_string(const void *base_ptr, const char *p, int flags)
1810 {
1811         if (!(flags & STR_ASCII) && \
1812             ((flags & STR_UNICODE || \
1813               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1814                 return ucs2_align(base_ptr, p, flags);
1815         }
1816         return 0;
1817 }
1818
1819 /*
1820   Return the unicode codepoint for the next multi-byte CH_UNIX character
1821   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1822
1823   Also return the number of bytes consumed (which tells the caller
1824   how many bytes to skip to get to the next CH_UNIX character).
1825
1826   Return INVALID_CODEPOINT if the next character cannot be converted.
1827 */
1828
1829 codepoint_t next_codepoint(const char *str, size_t *size)
1830 {
1831         /* It cannot occupy more than 4 bytes in UTF16 format */
1832         uint8_t buf[4];
1833         smb_iconv_t descriptor;
1834         size_t ilen_orig;
1835         size_t ilen;
1836         size_t olen;
1837         char *outbuf;
1838
1839         if ((str[0] & 0x80) == 0) {
1840                 *size = 1;
1841                 return (codepoint_t)str[0];
1842         }
1843
1844         /* We assume that no multi-byte character can take
1845            more than 5 bytes. This is OK as we only
1846            support codepoints up to 1M */
1847
1848         ilen_orig = strnlen(str, 5);
1849         ilen = ilen_orig;
1850
1851         lazy_initialize_conv();
1852
1853         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1854         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1855                 *size = 1;
1856                 return INVALID_CODEPOINT;
1857         }
1858
1859         /* This looks a little strange, but it is needed to cope
1860            with codepoints above 64k which are encoded as per RFC2781. */
1861         olen = 2;
1862         outbuf = (char *)buf;
1863         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1864         if (olen == 2) {
1865                 /* We failed to convert to a 2 byte character.
1866                    See if we can convert to a 4 UTF16-LE byte char encoding.
1867                 */
1868                 olen = 4;
1869                 outbuf = (char *)buf;
1870                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1871                 if (olen == 4) {
1872                         /* We didn't convert any bytes */
1873                         *size = 1;
1874                         return INVALID_CODEPOINT;
1875                 }
1876                 olen = 4 - olen;
1877         } else {
1878                 olen = 2 - olen;
1879         }
1880
1881         *size = ilen_orig - ilen;
1882
1883         if (olen == 2) {
1884                 /* 2 byte, UTF16-LE encoded value. */
1885                 return (codepoint_t)SVAL(buf, 0);
1886         }
1887         if (olen == 4) {
1888                 /* Decode a 4 byte UTF16-LE character manually.
1889                    See RFC2871 for the encoding machanism.
1890                 */
1891                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1892                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1893
1894                 return (codepoint_t)0x10000 +
1895                                 (w1 << 10) + w2;
1896         }
1897
1898         /* no other length is valid */
1899         return INVALID_CODEPOINT;
1900 }