source/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50 static bool initialized;
  51
  52 /**
  53  * Return the name of a charset to give to iconv().
  54  **/
  55 static const char *charset_name(charset_t ch)
  56 {
  57         const char *ret = NULL;
  58
  59         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  60         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  61         else if (ch == CH_UNIX) ret = lp_unix_charset();
  62         else if (ch == CH_DOS) ret = lp_dos_charset();
  63         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  64         else if (ch == CH_UTF8) ret = "UTF8";
  65
  66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  67         if (ret && !strcmp(ret, "LOCALE")) {
  68                 const char *ln = NULL;
  69
  70 #ifdef HAVE_SETLOCALE
  71                 setlocale(LC_ALL, "");
  72 #endif
  73                 ln = nl_langinfo(CODESET);
  74                 if (ln) {
  75                         /* Check whether the charset name is supported
  76                            by iconv */
  77                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  78                         if (handle == (smb_iconv_t) -1) {
  79                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  80                                 ln = NULL;
  81                         } else {
  82                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  83                                 smb_iconv_close(handle);
  84                         }
  85                 }
  86                 ret = ln;
  87         }
  88 #endif
  89
  90         if (!ret || !*ret) ret = "ASCII";
  91         return ret;
  92 }
  93
  94 void lazy_initialize_conv(void)
  95 {
  96         if (!initialized) {
  97                 load_case_tables();
  98                 init_iconv();
  99                 initialized = true;
 100         }
 101 }
 102
 103 /**
 104  * Destroy global objects allocated by init_iconv()
 105  **/
 106 void gfree_charcnv(void)
 107 {
 108         int c1, c2;
 109
 110         for (c1=0;c1<NUM_CHARSETS;c1++) {
 111                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 112                         if ( conv_handles[c1][c2] ) {
 113                                 smb_iconv_close( conv_handles[c1][c2] );
 114                                 conv_handles[c1][c2] = 0;
 115                         }
 116                 }
 117         }
 118         initialized = false;
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         bool did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_valid_table();
 182                 conv_silent = False;
 183         }
 184 }
 185
 186 /**
 187  * Convert string from one encoding to another, making error checking etc
 188  * Slow path version - uses (slow) iconv.
 189  *
 190  * @param src pointer to source string (multibyte or singlebyte)
 191  * @param srclen length of the source string in bytes
 192  * @param dest pointer to destination string (multibyte or singlebyte)
 193  * @param destlen maximal length allowed for string
 194  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 195  * @returns the number of bytes occupied in the destination
 196  *
 197  * Ensure the srclen contains the terminating zero.
 198  *
 199  **/
 200
 201 static size_t convert_string_internal(charset_t from, charset_t to,
 202                       void const *src, size_t srclen,
 203                       void *dest, size_t destlen, bool allow_bad_conv)
 204 {
 205         size_t i_len, o_len;
 206         size_t retval;
 207         const char* inbuf = (const char*)src;
 208         char* outbuf = (char*)dest;
 209         smb_iconv_t descriptor;
 210
 211         lazy_initialize_conv();
 212
 213         descriptor = conv_handles[from][to];
 214
 215         if (srclen == (size_t)-1) {
 216                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 217                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 218                 } else {
 219                         srclen = strlen((const char *)src)+1;
 220                 }
 221         }
 222
 223
 224         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 225                 if (!conv_silent)
 226                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 227                 return (size_t)-1;
 228         }
 229
 230         i_len=srclen;
 231         o_len=destlen;
 232
 233  again:
 234
 235         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 236         if(retval==(size_t)-1) {
 237                 const char *reason="unknown error";
 238                 switch(errno) {
 239                         case EINVAL:
 240                                 reason="Incomplete multibyte sequence";
 241                                 if (!conv_silent)
 242                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 243                                 if (allow_bad_conv)
 244                                         goto use_as_is;
 245                                 break;
 246                         case E2BIG:
 247                                 reason="No more room";
 248                                 if (!conv_silent) {
 249                                         if (from == CH_UNIX) {
 250                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 251                                                         charset_name(from), charset_name(to),
 252                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 253                                         } else {
 254                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 255                                                         charset_name(from), charset_name(to),
 256                                                         (unsigned int)srclen, (unsigned int)destlen));
 257                                         }
 258                                 }
 259                                 break;
 260                         case EILSEQ:
 261                                 reason="Illegal multibyte sequence";
 262                                 if (!conv_silent)
 263                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 264                                 if (allow_bad_conv)
 265                                         goto use_as_is;
 266                                 break;
 267                         default:
 268                                 if (!conv_silent)
 269                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 270                                 break;
 271                 }
 272                 /* smb_panic(reason); */
 273         }
 274         return destlen-o_len;
 275
 276  use_as_is:
 277
 278         /*
 279          * Conversion not supported. This is actually an error, but there are so
 280          * many misconfigured iconv systems and smb.conf's out there we can't just
 281          * fail. Do a very bad conversion instead.... JRA.
 282          */
 283
 284         {
 285                 if (o_len == 0 || i_len == 0)
 286                         return destlen - o_len;
 287
 288                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 289                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 290                         /* Can't convert from utf16 any endian to multibyte.
 291                            Replace with the default fail char.
 292                         */
 293                         if (i_len < 2)
 294                                 return destlen - o_len;
 295                         if (i_len >= 2) {
 296                                 *outbuf = lp_failed_convert_char();
 297
 298                                 outbuf++;
 299                                 o_len--;
 300
 301                                 inbuf += 2;
 302                                 i_len -= 2;
 303                         }
 304
 305                         if (o_len == 0 || i_len == 0)
 306                                 return destlen - o_len;
 307
 308                         /* Keep trying with the next char... */
 309                         goto again;
 310
 311                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 312                         /* Can't convert to UTF16LE - just widen by adding the
 313                            default fail char then zero.
 314                         */
 315                         if (o_len < 2)
 316                                 return destlen - o_len;
 317
 318                         outbuf[0] = lp_failed_convert_char();
 319                         outbuf[1] = '\0';
 320
 321                         inbuf++;
 322                         i_len--;
 323
 324                         outbuf += 2;
 325                         o_len -= 2;
 326
 327                         if (o_len == 0 || i_len == 0)
 328                                 return destlen - o_len;
 329
 330                         /* Keep trying with the next char... */
 331                         goto again;
 332
 333                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 334                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 335                         /* Failed multibyte to multibyte. Just copy the default fail char and
 336                                 try again. */
 337                         outbuf[0] = lp_failed_convert_char();
 338
 339                         inbuf++;
 340                         i_len--;
 341
 342                         outbuf++;
 343                         o_len--;
 344
 345                         if (o_len == 0 || i_len == 0)
 346                                 return destlen - o_len;
 347
 348                         /* Keep trying with the next char... */
 349                         goto again;
 350
 351                 } else {
 352                         /* Keep compiler happy.... */
 353                         return destlen - o_len;
 354                 }
 355         }
 356 }
 357
 358 /**
 359  * Convert string from one encoding to another, making error checking etc
 360  * Fast path version - handles ASCII first.
 361  *
 362  * @param src pointer to source string (multibyte or singlebyte)
 363  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 364  * @param dest pointer to destination string (multibyte or singlebyte)
 365  * @param destlen maximal length allowed for string - *NEVER* -1.
 366  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 367  * @returns the number of bytes occupied in the destination
 368  *
 369  * Ensure the srclen contains the terminating zero.
 370  *
 371  * This function has been hand-tuned to provide a fast path.
 372  * Don't change unless you really know what you are doing. JRA.
 373  **/
 374
 375 size_t convert_string(charset_t from, charset_t to,
 376                       void const *src, size_t srclen,
 377                       void *dest, size_t destlen, bool allow_bad_conv)
 378 {
 379         /*
 380          * NB. We deliberately don't do a strlen here if srclen == -1.
 381          * This is very expensive over millions of calls and is taken
 382          * care of in the slow path in convert_string_internal. JRA.
 383          */
 384
 385 #ifdef DEVELOPER
 386         SMB_ASSERT(destlen != (size_t)-1);
 387 #endif
 388
 389         if (srclen == 0)
 390                 return 0;
 391
 392         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 393                 const unsigned char *p = (const unsigned char *)src;
 394                 unsigned char *q = (unsigned char *)dest;
 395                 size_t slen = srclen;
 396                 size_t dlen = destlen;
 397                 unsigned char lastp = '\0';
 398                 size_t retval = 0;
 399
 400                 /* If all characters are ascii, fast path here. */
 401                 while (slen && dlen) {
 402                         if ((lastp = *p) <= 0x7f) {
 403                                 *q++ = *p++;
 404                                 if (slen != (size_t)-1) {
 405                                         slen--;
 406                                 }
 407                                 dlen--;
 408                                 retval++;
 409                                 if (!lastp)
 410                                         break;
 411                         } else {
 412 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 413                                 goto general_case;
 414 #else
 415                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 416 #endif
 417                         }
 418                 }
 419                 if (!dlen) {
 420                         /* Even if we fast path we should note if we ran out of room. */
 421                         if (((slen != (size_t)-1) && slen) ||
 422                                         ((slen == (size_t)-1) && lastp)) {
 423                                 errno = E2BIG;
 424                         }
 425                 }
 426                 return retval;
 427         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 428                 const unsigned char *p = (const unsigned char *)src;
 429                 unsigned char *q = (unsigned char *)dest;
 430                 size_t retval = 0;
 431                 size_t slen = srclen;
 432                 size_t dlen = destlen;
 433                 unsigned char lastp = '\0';
 434
 435                 /* If all characters are ascii, fast path here. */
 436                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 437                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 438                                 *q++ = *p;
 439                                 if (slen != (size_t)-1) {
 440                                         slen -= 2;
 441                                 }
 442                                 p += 2;
 443                                 dlen--;
 444                                 retval++;
 445                                 if (!lastp)
 446                                         break;
 447                         } else {
 448 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 449                                 goto general_case;
 450 #else
 451                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 452 #endif
 453                         }
 454                 }
 455                 if (!dlen) {
 456                         /* Even if we fast path we should note if we ran out of room. */
 457                         if (((slen != (size_t)-1) && slen) ||
 458                                         ((slen == (size_t)-1) && lastp)) {
 459                                 errno = E2BIG;
 460                         }
 461                 }
 462                 return retval;
 463         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 464                 const unsigned char *p = (const unsigned char *)src;
 465                 unsigned char *q = (unsigned char *)dest;
 466                 size_t retval = 0;
 467                 size_t slen = srclen;
 468                 size_t dlen = destlen;
 469                 unsigned char lastp = '\0';
 470
 471                 /* If all characters are ascii, fast path here. */
 472                 while (slen && (dlen >= 2)) {
 473                         if ((lastp = *p) <= 0x7F) {
 474                                 *q++ = *p++;
 475                                 *q++ = '\0';
 476                                 if (slen != (size_t)-1) {
 477                                         slen--;
 478                                 }
 479                                 dlen -= 2;
 480                                 retval += 2;
 481                                 if (!lastp)
 482                                         break;
 483                         } else {
 484 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 485                                 goto general_case;
 486 #else
 487                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 488 #endif
 489                         }
 490                 }
 491                 if (!dlen) {
 492                         /* Even if we fast path we should note if we ran out of room. */
 493                         if (((slen != (size_t)-1) && slen) ||
 494                                         ((slen == (size_t)-1) && lastp)) {
 495                                 errno = E2BIG;
 496                         }
 497                 }
 498                 return retval;
 499         }
 500
 501 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 502   general_case:
 503 #endif
 504         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 505 }
 506
 507 /**
 508  * Convert between character sets, allocating a new buffer for the result.
 509  *
 510  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 511  * (this is a bad interface and needs fixing. JRA).
 512  * @param srclen length of source buffer.
 513  * @param dest always set at least to NULL
 514  * @param converted_size set to the size of the allocated buffer on return
 515  * true
 516  * @note -1 is not accepted for srclen.
 517  *
 518  * @return True if new buffer was correctly allocated, and string was
 519  * converted.
 520  *
 521  * Ensure the srclen contains the terminating zero.
 522  *
 523  * I hate the goto's in this function. It's embarressing.....
 524  * There has to be a cleaner way to do this. JRA.
 525  **/
 526
 527 bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 528                              void const *src, size_t srclen, void *dst,
 529                              size_t *converted_size, bool allow_bad_conv)
 530 {
 531         size_t i_len, o_len, destlen = (srclen * 3) / 2;
 532         size_t retval;
 533         const char *inbuf = (const char *)src;
 534         char *outbuf = NULL, *ob = NULL;
 535         smb_iconv_t descriptor;
 536         void **dest = (void **)dst;
 537
 538         *dest = NULL;
 539
 540         if (!converted_size) {
 541                 errno = EINVAL;
 542                 return false;
 543         }
 544
 545         if (src == NULL || srclen == (size_t)-1) {
 546                 errno = EINVAL;
 547                 return false;
 548         }
 549         if (srclen == 0) {
 550                 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
 551                 if (ob == NULL) {
 552                         errno = ENOMEM;
 553                         return false;
 554                 }
 555                 *dest = ob;
 556                 *converted_size = 0;
 557                 return true;
 558         }
 559
 560         lazy_initialize_conv();
 561
 562         descriptor = conv_handles[from][to];
 563
 564         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 565                 if (!conv_silent)
 566                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 567                 errno = EOPNOTSUPP;
 568                 return false;
 569         }
 570
 571   convert:
 572
 573         /* +2 is for ucs2 null termination. */
 574         if ((destlen*2)+2 < destlen) {
 575                 /* wrapped ! abort. */
 576                 if (!conv_silent)
 577                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 578                 if (!ctx)
 579                         SAFE_FREE(outbuf);
 580                 errno = EOPNOTSUPP;
 581                 return false;
 582         } else {
 583                 destlen = destlen * 2;
 584         }
 585
 586         /* +2 is for ucs2 null termination. */
 587         if (ctx) {
 588                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
 589         } else {
 590                 ob = (char *)SMB_REALLOC(ob, destlen + 2);
 591         }
 592
 593         if (!ob) {
 594                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 595                 errno = ENOMEM;
 596                 return false;
 597         }
 598         outbuf = ob;
 599         i_len = srclen;
 600         o_len = destlen;
 601
 602  again:
 603
 604         retval = smb_iconv(descriptor,
 605                            &inbuf, &i_len,
 606                            &outbuf, &o_len);
 607         if(retval == (size_t)-1)                {
 608                 const char *reason="unknown error";
 609                 switch(errno) {
 610                         case EINVAL:
 611                                 reason="Incomplete multibyte sequence";
 612                                 if (!conv_silent)
 613                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 614                                 if (allow_bad_conv)
 615                                         goto use_as_is;
 616                                 break;
 617                         case E2BIG:
 618                                 goto convert;
 619                         case EILSEQ:
 620                                 reason="Illegal multibyte sequence";
 621                                 if (!conv_silent)
 622                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 623                                 if (allow_bad_conv)
 624                                         goto use_as_is;
 625                                 break;
 626                 }
 627                 if (!conv_silent)
 628                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 629                 /* smb_panic(reason); */
 630                 if (ctx) {
 631                         TALLOC_FREE(ob);
 632                 } else {
 633                         SAFE_FREE(ob);
 634                 }
 635                 return false;
 636         }
 637
 638   out:
 639
 640         destlen = destlen - o_len;
 641         /* Don't shrink unless we're reclaiming a lot of
 642          * space. This is in the hot codepath and these
 643          * reallocs *cost*. JRA.
 644          */
 645         if (o_len > 1024) {
 646                 /* We're shrinking here so we know the +2 is safe from wrap. */
 647                 if (ctx) {
 648                         ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
 649                 } else {
 650                         ob = (char *)SMB_REALLOC(ob,destlen + 2);
 651                 }
 652         }
 653
 654         if (destlen && !ob) {
 655                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 656                 errno = ENOMEM;
 657                 return false;
 658         }
 659
 660         *dest = ob;
 661
 662         /* Must ucs2 null terminate in the extra space we allocated. */
 663         ob[destlen] = '\0';
 664         ob[destlen+1] = '\0';
 665
 666         *converted_size = destlen;
 667         return true;
 668
 669  use_as_is:
 670
 671         /*
 672          * Conversion not supported. This is actually an error, but there are so
 673          * many misconfigured iconv systems and smb.conf's out there we can't just
 674          * fail. Do a very bad conversion instead.... JRA.
 675          */
 676
 677         {
 678                 if (o_len == 0 || i_len == 0)
 679                         goto out;
 680
 681                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 682                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 683                         /* Can't convert from utf16 any endian to multibyte.
 684                            Replace with the default fail char.
 685                         */
 686
 687                         if (i_len < 2)
 688                                 goto out;
 689
 690                         if (i_len >= 2) {
 691                                 *outbuf = lp_failed_convert_char();
 692
 693                                 outbuf++;
 694                                 o_len--;
 695
 696                                 inbuf += 2;
 697                                 i_len -= 2;
 698                         }
 699
 700                         if (o_len == 0 || i_len == 0)
 701                                 goto out;
 702
 703                         /* Keep trying with the next char... */
 704                         goto again;
 705
 706                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 707                         /* Can't convert to UTF16LE - just widen by adding the
 708                            default fail char then zero.
 709                         */
 710                         if (o_len < 2)
 711                                 goto out;
 712
 713                         outbuf[0] = lp_failed_convert_char();
 714                         outbuf[1] = '\0';
 715
 716                         inbuf++;
 717                         i_len--;
 718
 719                         outbuf += 2;
 720                         o_len -= 2;
 721
 722                         if (o_len == 0 || i_len == 0)
 723                                 goto out;
 724
 725                         /* Keep trying with the next char... */
 726                         goto again;
 727
 728                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 729                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 730                         /* Failed multibyte to multibyte. Just copy the default fail char and
 731                            try again. */
 732                         outbuf[0] = lp_failed_convert_char();
 733
 734                         inbuf++;
 735                         i_len--;
 736
 737                         outbuf++;
 738                         o_len--;
 739
 740                         if (o_len == 0 || i_len == 0)
 741                                 goto out;
 742
 743                         /* Keep trying with the next char... */
 744                         goto again;
 745
 746                 } else {
 747                         /* Keep compiler happy.... */
 748                         goto out;
 749                 }
 750         }
 751 }
 752
 753 /**
 754  * Convert between character sets, allocating a new buffer using talloc for the result.
 755  *
 756  * @param srclen length of source buffer.
 757  * @param dest always set at least to NULL
 758  * @note -1 is not accepted for srclen.
 759  *
 760  * @returns Size in bytes of the converted string; or -1 in case of error.
 761  **/
 762 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 763                              void const *src, size_t srclen, void *dst,
 764                              bool allow_bad_conv)
 765 {
 766         void **dest = (void **)dst;
 767         size_t dest_len;
 768
 769         *dest = NULL;
 770         if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
 771                 &dest_len, allow_bad_conv))
 772                 return (size_t)-1;
 773         if (*dest == NULL)
 774                 return (size_t)-1;
 775         return dest_len;
 776 }
 777
 778 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 779 {
 780         size_t size;
 781         smb_ucs2_t *buffer;
 782
 783         size = push_ucs2_allocate(&buffer, src);
 784         if (size == (size_t)-1) {
 785                 return (size_t)-1;
 786         }
 787         if (!strupper_w(buffer) && (dest == src)) {
 788                 free(buffer);
 789                 return srclen;
 790         }
 791
 792         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 793         free(buffer);
 794         return size;
 795 }
 796
 797 /**
 798  strdup() a unix string to upper case.
 799 **/
 800
 801 char *strdup_upper(const char *s)
 802 {
 803         char *out_buffer = SMB_STRDUP(s);
 804         const unsigned char *p = (const unsigned char *)s;
 805         unsigned char *q = (unsigned char *)out_buffer;
 806
 807         if (!q) {
 808                 return NULL;
 809         }
 810
 811         /* this is quite a common operation, so we want it to be
 812            fast. We optimise for the ascii case, knowing that all our
 813            supported multi-byte character sets are ascii-compatible
 814            (ie. they match for the first 128 chars) */
 815
 816         while (*p) {
 817                 if (*p & 0x80)
 818                         break;
 819                 *q++ = toupper_ascii_fast(*p);
 820                 p++;
 821         }
 822
 823         if (*p) {
 824                 /* MB case. */
 825                 size_t size, size2;
 826                 smb_ucs2_t *buffer = NULL;
 827
 828                 SAFE_FREE(out_buffer);
 829                 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
 830                         strlen(s) + 1, (void **)(void *)&buffer, &size,
 831                         True)) {
 832                         return NULL;
 833                 }
 834
 835                 strupper_w(buffer);
 836
 837                 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
 838                         size, (void **)(void *)&out_buffer, &size2, True)) {
 839                         TALLOC_FREE(buffer);
 840                         return NULL;
 841                 }
 842
 843                 /* Don't need the intermediate buffer
 844                  * anymore.
 845                  */
 846                 TALLOC_FREE(buffer);
 847         }
 848
 849         return out_buffer;
 850 }
 851
 852 /**
 853  talloc_strdup() a unix string to upper case.
 854 **/
 855
 856 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 857 {
 858         char *out_buffer = talloc_strdup(ctx,s);
 859         const unsigned char *p = (const unsigned char *)s;
 860         unsigned char *q = (unsigned char *)out_buffer;
 861
 862         if (!q) {
 863                 return NULL;
 864         }
 865
 866         /* this is quite a common operation, so we want it to be
 867            fast. We optimise for the ascii case, knowing that all our
 868            supported multi-byte character sets are ascii-compatible
 869            (ie. they match for the first 128 chars) */
 870
 871         while (*p) {
 872                 if (*p & 0x80)
 873                         break;
 874                 *q++ = toupper_ascii_fast(*p);
 875                 p++;
 876         }
 877
 878         if (*p) {
 879                 /* MB case. */
 880                 size_t size;
 881                 smb_ucs2_t *ubuf = NULL;
 882
 883                 /* We're not using the ascii buffer above. */
 884                 TALLOC_FREE(out_buffer);
 885
 886                 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
 887                                 s, strlen(s)+1,
 888                                 (void *)&ubuf,
 889                                 True);
 890                 if (size == (size_t)-1) {
 891                         return NULL;
 892                 }
 893
 894                 strupper_w(ubuf);
 895
 896                 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
 897                                 ubuf, size,
 898                                 (void *)&out_buffer,
 899                                 True);
 900
 901                 /* Don't need the intermediate buffer
 902                  * anymore.
 903                  */
 904
 905                 TALLOC_FREE(ubuf);
 906
 907                 if (size == (size_t)-1) {
 908                         return NULL;
 909                 }
 910         }
 911
 912         return out_buffer;
 913 }
 914
 915 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 916 {
 917         size_t size;
 918         smb_ucs2_t *buffer = NULL;
 919
 920         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 921                 (void **)(void *)&buffer, &size, True)) {
 922                 smb_panic("failed to create UCS2 buffer");
 923         }
 924         if (!strlower_w(buffer) && (dest == src)) {
 925                 SAFE_FREE(buffer);
 926                 return srclen;
 927         }
 928         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 929         SAFE_FREE(buffer);
 930         return size;
 931 }
 932
 933 /**
 934  strdup() a unix string to lower case.
 935 **/
 936
 937 char *strdup_lower(const char *s)
 938 {
 939         size_t size;
 940         smb_ucs2_t *buffer = NULL;
 941         char *out_buffer;
 942
 943         size = push_ucs2_allocate(&buffer, s);
 944         if (size == -1 || !buffer) {
 945                 return NULL;
 946         }
 947
 948         strlower_w(buffer);
 949
 950         size = pull_ucs2_allocate(&out_buffer, buffer);
 951         SAFE_FREE(buffer);
 952
 953         if (size == (size_t)-1) {
 954                 return NULL;
 955         }
 956
 957         return out_buffer;
 958 }
 959
 960 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
 961 {
 962         size_t size;
 963         smb_ucs2_t *buffer = NULL;
 964         char *out_buffer;
 965
 966         size = push_ucs2_talloc(ctx, &buffer, s);
 967         if (size == -1 || !buffer) {
 968                 TALLOC_FREE(buffer);
 969                 return NULL;
 970         }
 971
 972         strlower_w(buffer);
 973
 974         size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
 975         TALLOC_FREE(buffer);
 976
 977         if (size == (size_t)-1) {
 978                 TALLOC_FREE(out_buffer);
 979                 return NULL;
 980         }
 981
 982         return out_buffer;
 983 }
 984
 985
 986 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 987 {
 988         if (flags & (STR_NOALIGN|STR_ASCII))
 989                 return 0;
 990         return PTR_DIFF(p, base_ptr) & 1;
 991 }
 992
 993
 994 /**
 995  * Copy a string from a char* unix src to a dos codepage string destination.
 996  *
 997  * @return the number of bytes occupied by the string in the destination.
 998  *
 999  * @param flags can include
1000  * <dl>
1001  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1002  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1003  * </dl>
1004  *
1005  * @param dest_len the maximum length in bytes allowed in the
1006  * destination.
1007  **/
1008 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1009 {
1010         size_t src_len = strlen(src);
1011         char *tmpbuf = NULL;
1012         size_t ret;
1013
1014         /* No longer allow a length of -1. */
1015         if (dest_len == (size_t)-1) {
1016                 smb_panic("push_ascii - dest_len == -1");
1017         }
1018
1019         if (flags & STR_UPPER) {
1020                 tmpbuf = SMB_STRDUP(src);
1021                 if (!tmpbuf) {
1022                         smb_panic("malloc fail");
1023                 }
1024                 strupper_m(tmpbuf);
1025                 src = tmpbuf;
1026         }
1027
1028         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1029                 src_len++;
1030         }
1031
1032         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1033         if (ret == (size_t)-1 &&
1034                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1035                         && dest_len > 0) {
1036                 ((char *)dest)[0] = '\0';
1037         }
1038         SAFE_FREE(tmpbuf);
1039         return ret;
1040 }
1041
1042 size_t push_ascii_fstring(void *dest, const char *src)
1043 {
1044         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1045 }
1046
1047 /********************************************************************
1048  Push an nstring - ensure null terminated. Written by
1049  moriyama@miraclelinux.com (MORIYAMA Masayuki).
1050 ********************************************************************/
1051
1052 size_t push_ascii_nstring(void *dest, const char *src)
1053 {
1054         size_t i, buffer_len, dest_len;
1055         smb_ucs2_t *buffer;
1056
1057         conv_silent = True;
1058         buffer_len = push_ucs2_allocate(&buffer, src);
1059         if (buffer_len == (size_t)-1) {
1060                 smb_panic("failed to create UCS2 buffer");
1061         }
1062
1063         /* We're using buffer_len below to count ucs2 characters, not bytes. */
1064         buffer_len /= sizeof(smb_ucs2_t);
1065
1066         dest_len = 0;
1067         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1068                 unsigned char mb[10];
1069                 /* Convert one smb_ucs2_t character at a time. */
1070                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1071                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1072                         memcpy((char *)dest + dest_len, mb, mb_len);
1073                         dest_len += mb_len;
1074                 } else {
1075                         errno = E2BIG;
1076                         break;
1077                 }
1078         }
1079         ((char *)dest)[dest_len] = '\0';
1080
1081         SAFE_FREE(buffer);
1082         conv_silent = False;
1083         return dest_len;
1084 }
1085
1086 /********************************************************************
1087  Push and malloc an ascii string. src and dest null terminated.
1088 ********************************************************************/
1089
1090 size_t push_ascii_allocate(char **dest, const char *src)
1091 {
1092         size_t dest_len, src_len = strlen(src)+1;
1093
1094         *dest = NULL;
1095         if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1096                 (void **)dest, &dest_len, True))
1097                 return (size_t)-1;
1098         else
1099                 return dest_len;
1100 }
1101
1102 /**
1103  * Copy a string from a dos codepage source to a unix char* destination.
1104  *
1105  * The resulting string in "dest" is always null terminated.
1106  *
1107  * @param flags can have:
1108  * <dl>
1109  * <dt>STR_TERMINATE</dt>
1110  * <dd>STR_TERMINATE means the string in @p src
1111  * is null terminated, and src_len is ignored.</dd>
1112  * </dl>
1113  *
1114  * @param src_len is the length of the source area in bytes.
1115  * @returns the number of bytes occupied by the string in @p src.
1116  **/
1117 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1118 {
1119         size_t ret;
1120
1121         if (dest_len == (size_t)-1) {
1122                 /* No longer allow dest_len of -1. */
1123                 smb_panic("pull_ascii - invalid dest_len of -1");
1124         }
1125
1126         if (flags & STR_TERMINATE) {
1127                 if (src_len == (size_t)-1) {
1128                         src_len = strlen((const char *)src) + 1;
1129                 } else {
1130                         size_t len = strnlen((const char *)src, src_len);
1131                         if (len < src_len)
1132                                 len++;
1133                         src_len = len;
1134                 }
1135         }
1136
1137         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1138         if (ret == (size_t)-1) {
1139                 ret = 0;
1140                 dest_len = 0;
1141         }
1142
1143         if (dest_len && ret) {
1144                 /* Did we already process the terminating zero ? */
1145                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1146                         dest[MIN(ret, dest_len-1)] = 0;
1147                 }
1148         } else  {
1149                 dest[0] = 0;
1150         }
1151
1152         return src_len;
1153 }
1154
1155 /**
1156  * Copy a string from a dos codepage source to a unix char* destination.
1157  Talloc version.
1158  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1159  needs fixing. JRA).
1160  *
1161  * The resulting string in "dest" is always null terminated.
1162  *
1163  * @param flags can have:
1164  * <dl>
1165  * <dt>STR_TERMINATE</dt>
1166  * <dd>STR_TERMINATE means the string in @p src
1167  * is null terminated, and src_len is ignored.</dd>
1168  * </dl>
1169  *
1170  * @param src_len is the length of the source area in bytes.
1171  * @returns the number of bytes occupied by the string in @p src.
1172  **/
1173
1174 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1175                                         char **ppdest,
1176                                         const void *src,
1177                                         size_t src_len,
1178                                         int flags)
1179 {
1180         char *dest = NULL;
1181         size_t dest_len = 0;
1182
1183 #ifdef DEVELOPER
1184         /* Ensure we never use the braindead "malloc" varient. */
1185         if (ctx == NULL) {
1186                 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1187         }
1188 #endif
1189
1190         *ppdest = NULL;
1191
1192         if (!src_len) {
1193                 return 0;
1194         }
1195
1196         if (flags & STR_TERMINATE) {
1197                 if (src_len == (size_t)-1) {
1198                         src_len = strlen((const char *)src) + 1;
1199                 } else {
1200                         size_t len = strnlen((const char *)src, src_len);
1201                         if (len < src_len)
1202                                 len++;
1203                         src_len = len;
1204                 }
1205                 /* Ensure we don't use an insane length from the client. */
1206                 if (src_len >= 1024*1024) {
1207                         char *msg = talloc_asprintf(ctx,
1208                                         "Bad src length (%u) in "
1209                                         "pull_ascii_base_talloc",
1210                                         (unsigned int)src_len);
1211                         smb_panic(msg);
1212                 }
1213         } else {
1214                 /* Can't have an unlimited length
1215                  * non STR_TERMINATE'd.
1216                  */
1217                 if (src_len == (size_t)-1) {
1218                         errno = EINVAL;
1219                         return 0;
1220                 }
1221         }
1222
1223         /* src_len != -1 here. */
1224
1225         if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1226                 &dest_len, True)) {
1227                 dest_len = 0;
1228         }
1229
1230         if (dest_len && dest) {
1231                 /* Did we already process the terminating zero ? */
1232                 if (dest[dest_len-1] != 0) {
1233                         size_t size = talloc_get_size(dest);
1234                         /* Have we got space to append the '\0' ? */
1235                         if (size <= dest_len) {
1236                                 /* No, realloc. */
1237                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1238                                                 dest_len+1);
1239                                 if (!dest) {
1240                                         /* talloc fail. */
1241                                         dest_len = (size_t)-1;
1242                                         return 0;
1243                                 }
1244                         }
1245                         /* Yay - space ! */
1246                         dest[dest_len] = '\0';
1247                         dest_len++;
1248                 }
1249         } else if (dest) {
1250                 dest[0] = 0;
1251         }
1252
1253         *ppdest = dest;
1254         return src_len;
1255 }
1256
1257 size_t pull_ascii_fstring(char *dest, const void *src)
1258 {
1259         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1260 }
1261
1262 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1263
1264 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1265 {
1266         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1267 }
1268
1269 /**
1270  * Copy a string from a char* src to a unicode destination.
1271  *
1272  * @returns the number of bytes occupied by the string in the destination.
1273  *
1274  * @param flags can have:
1275  *
1276  * <dl>
1277  * <dt>STR_TERMINATE <dd>means include the null termination.
1278  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1279  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1280  * </dl>
1281  *
1282  * @param dest_len is the maximum length allowed in the
1283  * destination.
1284  **/
1285
1286 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1287 {
1288         size_t len=0;
1289         size_t src_len;
1290         size_t ret;
1291
1292         if (dest_len == (size_t)-1) {
1293                 /* No longer allow dest_len of -1. */
1294                 smb_panic("push_ucs2 - invalid dest_len of -1");
1295         }
1296
1297         if (flags & STR_TERMINATE)
1298                 src_len = (size_t)-1;
1299         else
1300                 src_len = strlen(src);
1301
1302         if (ucs2_align(base_ptr, dest, flags)) {
1303                 *(char *)dest = 0;
1304                 dest = (void *)((char *)dest + 1);
1305                 if (dest_len)
1306                         dest_len--;
1307                 len++;
1308         }
1309
1310         /* ucs2 is always a multiple of 2 bytes */
1311         dest_len &= ~1;
1312
1313         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1314         if (ret == (size_t)-1) {
1315                 if ((flags & STR_TERMINATE) &&
1316                                 dest &&
1317                                 dest_len) {
1318                         *(char *)dest = 0;
1319                 }
1320                 return len;
1321         }
1322
1323         len += ret;
1324
1325         if (flags & STR_UPPER) {
1326                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1327                 size_t i;
1328
1329                 /* We check for i < (ret / 2) below as the dest string isn't null
1330                    terminated if STR_TERMINATE isn't set. */
1331
1332                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1333                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1334                         if (v != dest_ucs2[i]) {
1335                                 dest_ucs2[i] = v;
1336                         }
1337                 }
1338         }
1339
1340         return len;
1341 }
1342
1343
1344 /**
1345  * Copy a string from a unix char* src to a UCS2 destination,
1346  * allocating a buffer using talloc().
1347  *
1348  * @param dest always set at least to NULL
1349  *
1350  * @returns The number of bytes occupied by the string in the destination
1351  *         or -1 in case of error.
1352  **/
1353 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1354 {
1355         size_t src_len = strlen(src)+1;
1356
1357         *dest = NULL;
1358         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1359 }
1360
1361
1362 /**
1363  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1364  *
1365  * @param dest always set at least to NULL
1366  *
1367  * @returns The number of bytes occupied by the string in the destination
1368  *         or -1 in case of error.
1369  **/
1370
1371 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1372 {
1373         size_t dest_len, src_len = strlen(src)+1;
1374
1375         *dest = NULL;
1376         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1377                 (void **)dest, &dest_len, True))
1378                 return (size_t)-1;
1379         else
1380                 return dest_len;
1381 }
1382
1383 /**
1384  Copy a string from a char* src to a UTF-8 destination.
1385  Return the number of bytes occupied by the string in the destination
1386  Flags can have:
1387   STR_TERMINATE means include the null termination
1388   STR_UPPER     means uppercase in the destination
1389  dest_len is the maximum length allowed in the destination. If dest_len
1390  is -1 then no maxiumum is used.
1391 **/
1392
1393 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1394 {
1395         size_t src_len = 0;
1396         size_t ret;
1397         char *tmpbuf = NULL;
1398
1399         if (dest_len == (size_t)-1) {
1400                 /* No longer allow dest_len of -1. */
1401                 smb_panic("push_utf8 - invalid dest_len of -1");
1402         }
1403
1404         if (flags & STR_UPPER) {
1405                 tmpbuf = strdup_upper(src);
1406                 if (!tmpbuf) {
1407                         return (size_t)-1;
1408                 }
1409                 src = tmpbuf;
1410                 src_len = strlen(src);
1411         }
1412
1413         src_len = strlen(src);
1414         if (flags & STR_TERMINATE) {
1415                 src_len++;
1416         }
1417
1418         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1419         SAFE_FREE(tmpbuf);
1420         return ret;
1421 }
1422
1423 size_t push_utf8_fstring(void *dest, const char *src)
1424 {
1425         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1426 }
1427
1428 /**
1429  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1430  *
1431  * @param dest always set at least to NULL
1432  *
1433  * @returns The number of bytes occupied by the string in the destination
1434  **/
1435
1436 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1437 {
1438         size_t src_len = strlen(src)+1;
1439
1440         *dest = NULL;
1441         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1442 }
1443
1444 /**
1445  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1446  *
1447  * @param dest always set at least to NULL
1448  *
1449  * @returns The number of bytes occupied by the string in the destination
1450  **/
1451
1452 size_t push_utf8_allocate(char **dest, const char *src)
1453 {
1454         size_t dest_len, src_len = strlen(src)+1;
1455
1456         *dest = NULL;
1457         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1458                 (void **)dest, &dest_len, True))
1459                 return (size_t)-1;
1460         else
1461                 return dest_len;
1462 }
1463
1464 /**
1465  Copy a string from a ucs2 source to a unix char* destination.
1466  Flags can have:
1467   STR_TERMINATE means the string in src is null terminated.
1468   STR_NOALIGN   means don't try to align.
1469  if STR_TERMINATE is set then src_len is ignored if it is -1.
1470  src_len is the length of the source area in bytes
1471  Return the number of bytes occupied by the string in src.
1472  The resulting string in "dest" is always null terminated.
1473 **/
1474
1475 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1476 {
1477         size_t ret;
1478
1479         if (dest_len == (size_t)-1) {
1480                 /* No longer allow dest_len of -1. */
1481                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1482         }
1483
1484         if (!src_len) {
1485                 if (dest && dest_len > 0) {
1486                         dest[0] = '\0';
1487                 }
1488                 return 0;
1489         }
1490
1491         if (ucs2_align(base_ptr, src, flags)) {
1492                 src = (const void *)((const char *)src + 1);
1493                 if (src_len != (size_t)-1)
1494                         src_len--;
1495         }
1496
1497         if (flags & STR_TERMINATE) {
1498                 /* src_len -1 is the default for null terminated strings. */
1499                 if (src_len != (size_t)-1) {
1500                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1501                                                 src_len/2);
1502                         if (len < src_len/2)
1503                                 len++;
1504                         src_len = len*2;
1505                 }
1506         }
1507
1508         /* ucs2 is always a multiple of 2 bytes */
1509         if (src_len != (size_t)-1)
1510                 src_len &= ~1;
1511
1512         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1513         if (ret == (size_t)-1) {
1514                 ret = 0;
1515                 dest_len = 0;
1516         }
1517
1518         if (src_len == (size_t)-1)
1519                 src_len = ret*2;
1520
1521         if (dest_len && ret) {
1522                 /* Did we already process the terminating zero ? */
1523                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1524                         dest[MIN(ret, dest_len-1)] = 0;
1525                 }
1526         } else {
1527                 dest[0] = 0;
1528         }
1529
1530         return src_len;
1531 }
1532
1533 /**
1534  Copy a string from a ucs2 source to a unix char* destination.
1535  Talloc version with a base pointer.
1536  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1537  needs fixing. JRA).
1538  Flags can have:
1539   STR_TERMINATE means the string in src is null terminated.
1540   STR_NOALIGN   means don't try to align.
1541  if STR_TERMINATE is set then src_len is ignored if it is -1.
1542  src_len is the length of the source area in bytes
1543  Return the number of bytes occupied by the string in src.
1544  The resulting string in "dest" is always null terminated.
1545 **/
1546
1547 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1548                         const void *base_ptr,
1549                         char **ppdest,
1550                         const void *src,
1551                         size_t src_len,
1552                         int flags)
1553 {
1554         char *dest;
1555         size_t dest_len;
1556
1557         *ppdest = NULL;
1558
1559 #ifdef DEVELOPER
1560         /* Ensure we never use the braindead "malloc" varient. */
1561         if (ctx == NULL) {
1562                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1563         }
1564 #endif
1565
1566         if (!src_len) {
1567                 return 0;
1568         }
1569
1570         if (ucs2_align(base_ptr, src, flags)) {
1571                 src = (const void *)((const char *)src + 1);
1572                 if (src_len != (size_t)-1)
1573                         src_len--;
1574         }
1575
1576         if (flags & STR_TERMINATE) {
1577                 /* src_len -1 is the default for null terminated strings. */
1578                 if (src_len != (size_t)-1) {
1579                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1580                                                 src_len/2);
1581                         if (len < src_len/2)
1582                                 len++;
1583                         src_len = len*2;
1584                 } else {
1585                         /*
1586                          * src_len == -1 - alloc interface won't take this
1587                          * so we must calculate.
1588                          */
1589                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1590                 }
1591                 /* Ensure we don't use an insane length from the client. */
1592                 if (src_len >= 1024*1024) {
1593                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1594                 }
1595         } else {
1596                 /* Can't have an unlimited length
1597                  * non STR_TERMINATE'd.
1598                  */
1599                 if (src_len == (size_t)-1) {
1600                         errno = EINVAL;
1601                         return 0;
1602                 }
1603         }
1604
1605         /* src_len != -1 here. */
1606
1607         /* ucs2 is always a multiple of 2 bytes */
1608         src_len &= ~1;
1609
1610         dest_len = convert_string_talloc(ctx,
1611                                         CH_UTF16LE,
1612                                         CH_UNIX,
1613                                         src,
1614                                         src_len,
1615                                         (void *)&dest,
1616                                         True);
1617         if (dest_len == (size_t)-1) {
1618                 dest_len = 0;
1619         }
1620
1621         if (dest_len) {
1622                 /* Did we already process the terminating zero ? */
1623                 if (dest[dest_len-1] != 0) {
1624                         size_t size = talloc_get_size(dest);
1625                         /* Have we got space to append the '\0' ? */
1626                         if (size <= dest_len) {
1627                                 /* No, realloc. */
1628                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1629                                                 dest_len+1);
1630                                 if (!dest) {
1631                                         /* talloc fail. */
1632                                         dest_len = (size_t)-1;
1633                                         return 0;
1634                                 }
1635                         }
1636                         /* Yay - space ! */
1637                         dest[dest_len] = '\0';
1638                         dest_len++;
1639                 }
1640         } else if (dest) {
1641                 dest[0] = 0;
1642         }
1643
1644         *ppdest = dest;
1645         return src_len;
1646 }
1647
1648 size_t pull_ucs2_fstring(char *dest, const void *src)
1649 {
1650         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1651 }
1652
1653 /**
1654  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1655  *
1656  * @param dest always set at least to NULL
1657  *
1658  * @returns The number of bytes occupied by the string in the destination
1659  **/
1660
1661 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1662 {
1663         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1664         *dest = NULL;
1665         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1666 }
1667
1668 /**
1669  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1670  *
1671  * @param dest always set at least to NULL
1672  *
1673  * @returns The number of bytes occupied by the string in the destination
1674  **/
1675
1676 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1677 {
1678         size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1679         *dest = NULL;
1680         if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1681                 (void **)dest, &dest_len, True))
1682                 return (size_t)-1;
1683         else
1684                 return dest_len;
1685 }
1686
1687 /**
1688  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1689  *
1690  * @param dest always set at least to NULL
1691  *
1692  * @returns The number of bytes occupied by the string in the destination
1693  **/
1694
1695 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1696 {
1697         size_t src_len = strlen(src)+1;
1698         *dest = NULL;
1699         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1700 }
1701
1702 /**
1703  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1704  *
1705  * @param dest always set at least to NULL
1706  *
1707  * @returns The number of bytes occupied by the string in the destination
1708  **/
1709
1710 size_t pull_utf8_allocate(char **dest, const char *src)
1711 {
1712         size_t dest_len, src_len = strlen(src)+1;
1713         *dest = NULL;
1714         if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1715                 (void **)dest, &dest_len, True))
1716                 return (size_t)-1;
1717         else
1718                 return dest_len;
1719 }
1720
1721 /**
1722  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1723  *
1724  * @param dest always set at least to NULL
1725  *
1726  * @returns The number of bytes occupied by the string in the destination
1727  **/
1728
1729 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1730 {
1731         size_t src_len = strlen(src)+1;
1732         *dest = NULL;
1733         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1734 }
1735
1736 /**
1737  Copy a string from a char* src to a unicode or ascii
1738  dos codepage destination choosing unicode or ascii based on the
1739  flags in the SMB buffer starting at base_ptr.
1740  Return the number of bytes occupied by the string in the destination.
1741  flags can have:
1742   STR_TERMINATE means include the null termination.
1743   STR_UPPER     means uppercase in the destination.
1744   STR_ASCII     use ascii even with unicode packet.
1745   STR_NOALIGN   means don't do alignment.
1746  dest_len is the maximum length allowed in the destination. If dest_len
1747  is -1 then no maxiumum is used.
1748 **/
1749
1750 size_t push_string_fn(const char *function, unsigned int line,
1751                       const void *base_ptr, uint16 flags2,
1752                       void *dest, const char *src,
1753                       size_t dest_len, int flags)
1754 {
1755 #ifdef DEVELOPER
1756         /* We really need to zero fill here, not clobber
1757          * region, as we want to ensure that valgrind thinks
1758          * all of the outgoing buffer has been written to
1759          * so a send() or write() won't trap an error.
1760          * JRA.
1761          */
1762 #if 0
1763         clobber_region(function, line, dest, dest_len);
1764 #else
1765         memset(dest, '\0', dest_len);
1766 #endif
1767 #endif
1768
1769         if (!(flags & STR_ASCII) && \
1770             ((flags & STR_UNICODE || \
1771               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1772                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1773         }
1774         return push_ascii(dest, src, dest_len, flags);
1775 }
1776
1777
1778 /**
1779  Copy a string from a unicode or ascii source (depending on
1780  the packet flags) to a char* destination.
1781  Flags can have:
1782   STR_TERMINATE means the string in src is null terminated.
1783   STR_UNICODE   means to force as unicode.
1784   STR_ASCII     use ascii even with unicode packet.
1785   STR_NOALIGN   means don't do alignment.
1786  if STR_TERMINATE is set then src_len is ignored is it is -1
1787  src_len is the length of the source area in bytes.
1788  Return the number of bytes occupied by the string in src.
1789  The resulting string in "dest" is always null terminated.
1790 **/
1791
1792 size_t pull_string_fn(const char *function,
1793                         unsigned int line,
1794                         const void *base_ptr,
1795                         uint16 smb_flags2,
1796                         char *dest,
1797                         const void *src,
1798                         size_t dest_len,
1799                         size_t src_len,
1800                         int flags)
1801 {
1802 #ifdef DEVELOPER
1803         clobber_region(function, line, dest, dest_len);
1804 #endif
1805
1806         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1807                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1808                           "UNICODE defined");
1809         }
1810
1811         if (!(flags & STR_ASCII) && \
1812             ((flags & STR_UNICODE || \
1813               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1814                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1815         }
1816         return pull_ascii(dest, src, dest_len, src_len, flags);
1817 }
1818
1819 /**
1820  Copy a string from a unicode or ascii source (depending on
1821  the packet flags) to a char* destination.
1822  Variant that uses talloc.
1823  Flags can have:
1824   STR_TERMINATE means the string in src is null terminated.
1825   STR_UNICODE   means to force as unicode.
1826   STR_ASCII     use ascii even with unicode packet.
1827   STR_NOALIGN   means don't do alignment.
1828  if STR_TERMINATE is set then src_len is ignored is it is -1
1829  src_len is the length of the source area in bytes.
1830  Return the number of bytes occupied by the string in src.
1831  The resulting string in "dest" is always null terminated.
1832 **/
1833
1834 size_t pull_string_talloc_fn(const char *function,
1835                         unsigned int line,
1836                         TALLOC_CTX *ctx,
1837                         const void *base_ptr,
1838                         uint16 smb_flags2,
1839                         char **ppdest,
1840                         const void *src,
1841                         size_t src_len,
1842                         int flags)
1843 {
1844         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1845                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1846                           "UNICODE defined");
1847         }
1848
1849         if (!(flags & STR_ASCII) && \
1850             ((flags & STR_UNICODE || \
1851               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1852                 return pull_ucs2_base_talloc(ctx,
1853                                         base_ptr,
1854                                         ppdest,
1855                                         src,
1856                                         src_len,
1857                                         flags);
1858         }
1859         return pull_ascii_base_talloc(ctx,
1860                                         ppdest,
1861                                         src,
1862                                         src_len,
1863                                         flags);
1864 }
1865
1866
1867 size_t align_string(const void *base_ptr, const char *p, int flags)
1868 {
1869         if (!(flags & STR_ASCII) && \
1870             ((flags & STR_UNICODE || \
1871               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1872                 return ucs2_align(base_ptr, p, flags);
1873         }
1874         return 0;
1875 }
1876
1877 /*
1878   Return the unicode codepoint for the next multi-byte CH_UNIX character
1879   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1880
1881   Also return the number of bytes consumed (which tells the caller
1882   how many bytes to skip to get to the next CH_UNIX character).
1883
1884   Return INVALID_CODEPOINT if the next character cannot be converted.
1885 */
1886
1887 codepoint_t next_codepoint(const char *str, size_t *size)
1888 {
1889         /* It cannot occupy more than 4 bytes in UTF16 format */
1890         uint8_t buf[4];
1891         smb_iconv_t descriptor;
1892         size_t ilen_orig;
1893         size_t ilen;
1894         size_t olen;
1895         char *outbuf;
1896
1897         if ((str[0] & 0x80) == 0) {
1898                 *size = 1;
1899                 return (codepoint_t)str[0];
1900         }
1901
1902         /* We assume that no multi-byte character can take
1903            more than 5 bytes. This is OK as we only
1904            support codepoints up to 1M */
1905
1906         ilen_orig = strnlen(str, 5);
1907         ilen = ilen_orig;
1908
1909         lazy_initialize_conv();
1910
1911         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1912         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1913                 *size = 1;
1914                 return INVALID_CODEPOINT;
1915         }
1916
1917         /* This looks a little strange, but it is needed to cope
1918            with codepoints above 64k which are encoded as per RFC2781. */
1919         olen = 2;
1920         outbuf = (char *)buf;
1921         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1922         if (olen == 2) {
1923                 /* We failed to convert to a 2 byte character.
1924                    See if we can convert to a 4 UTF16-LE byte char encoding.
1925                 */
1926                 olen = 4;
1927                 outbuf = (char *)buf;
1928                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1929                 if (olen == 4) {
1930                         /* We didn't convert any bytes */
1931                         *size = 1;
1932                         return INVALID_CODEPOINT;
1933                 }
1934                 olen = 4 - olen;
1935         } else {
1936                 olen = 2 - olen;
1937         }
1938
1939         *size = ilen_orig - ilen;
1940
1941         if (olen == 2) {
1942                 /* 2 byte, UTF16-LE encoded value. */
1943                 return (codepoint_t)SVAL(buf, 0);
1944         }
1945         if (olen == 4) {
1946                 /* Decode a 4 byte UTF16-LE character manually.
1947                    See RFC2871 for the encoding machanism.
1948                 */
1949                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1950                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1951
1952                 return (codepoint_t)0x10000 +
1953                                 (w1 << 10) + w2;
1954         }
1955
1956         /* no other length is valid */
1957         return INVALID_CODEPOINT;
1958 }