source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50 static bool initialized;
  51
  52 /**
  53  * Return the name of a charset to give to iconv().
  54  **/
  55 static const char *charset_name(charset_t ch)
  56 {
  57         const char *ret = NULL;
  58
  59         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  60         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  61         else if (ch == CH_UNIX) ret = lp_unix_charset();
  62         else if (ch == CH_DOS) ret = lp_dos_charset();
  63         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  64         else if (ch == CH_UTF8) ret = "UTF8";
  65
  66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  67         if (ret && !strcmp(ret, "LOCALE")) {
  68                 const char *ln = NULL;
  69
  70 #ifdef HAVE_SETLOCALE
  71                 setlocale(LC_ALL, "");
  72 #endif
  73                 ln = nl_langinfo(CODESET);
  74                 if (ln) {
  75                         /* Check whether the charset name is supported
  76                            by iconv */
  77                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  78                         if (handle == (smb_iconv_t) -1) {
  79                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  80                                 ln = NULL;
  81                         } else {
  82                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  83                                 smb_iconv_close(handle);
  84                         }
  85                 }
  86                 ret = ln;
  87         }
  88 #endif
  89
  90         if (!ret || !*ret) ret = "ASCII";
  91         return ret;
  92 }
  93
  94 void lazy_initialize_conv(void)
  95 {
  96         if (!initialized) {
  97                 load_case_tables();
  98                 init_iconv();
  99                 initialized = true;
 100         }
 101 }
 102
 103 /**
 104  * Destroy global objects allocated by init_iconv()
 105  **/
 106 void gfree_charcnv(void)
 107 {
 108         int c1, c2;
 109
 110         for (c1=0;c1<NUM_CHARSETS;c1++) {
 111                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 112                         if ( conv_handles[c1][c2] ) {
 113                                 smb_iconv_close( conv_handles[c1][c2] );
 114                                 conv_handles[c1][c2] = 0;
 115                         }
 116                 }
 117         }
 118         initialized = false;
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         bool did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_valid_table();
 182                 conv_silent = False;
 183         }
 184 }
 185
 186 /**
 187  * Convert string from one encoding to another, making error checking etc
 188  * Slow path version - uses (slow) iconv.
 189  *
 190  * @param src pointer to source string (multibyte or singlebyte)
 191  * @param srclen length of the source string in bytes
 192  * @param dest pointer to destination string (multibyte or singlebyte)
 193  * @param destlen maximal length allowed for string
 194  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 195  * @returns the number of bytes occupied in the destination
 196  *
 197  * Ensure the srclen contains the terminating zero.
 198  *
 199  **/
 200
 201 static size_t convert_string_internal(charset_t from, charset_t to,
 202                       void const *src, size_t srclen,
 203                       void *dest, size_t destlen, bool allow_bad_conv)
 204 {
 205         size_t i_len, o_len;
 206         size_t retval;
 207         const char* inbuf = (const char*)src;
 208         char* outbuf = (char*)dest;
 209         smb_iconv_t descriptor;
 210
 211         lazy_initialize_conv();
 212
 213         descriptor = conv_handles[from][to];
 214
 215         if (srclen == (size_t)-1) {
 216                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 217                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 218                 } else {
 219                         srclen = strlen((const char *)src)+1;
 220                 }
 221         }
 222
 223
 224         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 225                 if (!conv_silent)
 226                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 227                 return (size_t)-1;
 228         }
 229
 230         i_len=srclen;
 231         o_len=destlen;
 232
 233  again:
 234
 235         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 236         if(retval==(size_t)-1) {
 237                 const char *reason="unknown error";
 238                 switch(errno) {
 239                         case EINVAL:
 240                                 reason="Incomplete multibyte sequence";
 241                                 if (!conv_silent)
 242                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 243                                 if (allow_bad_conv)
 244                                         goto use_as_is;
 245                                 return (size_t)-1;
 246                         case E2BIG:
 247                                 reason="No more room";
 248                                 if (!conv_silent) {
 249                                         if (from == CH_UNIX) {
 250                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 251                                                         charset_name(from), charset_name(to),
 252                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 253                                         } else {
 254                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 255                                                         charset_name(from), charset_name(to),
 256                                                         (unsigned int)srclen, (unsigned int)destlen));
 257                                         }
 258                                 }
 259                                 break;
 260                         case EILSEQ:
 261                                 reason="Illegal multibyte sequence";
 262                                 if (!conv_silent)
 263                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 264                                 if (allow_bad_conv)
 265                                         goto use_as_is;
 266
 267                                 return (size_t)-1;
 268                         default:
 269                                 if (!conv_silent)
 270                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 271                                 return (size_t)-1;
 272                 }
 273                 /* smb_panic(reason); */
 274         }
 275         return destlen-o_len;
 276
 277  use_as_is:
 278
 279         /*
 280          * Conversion not supported. This is actually an error, but there are so
 281          * many misconfigured iconv systems and smb.conf's out there we can't just
 282          * fail. Do a very bad conversion instead.... JRA.
 283          */
 284
 285         {
 286                 if (o_len == 0 || i_len == 0)
 287                         return destlen - o_len;
 288
 289                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 290                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 291                         /* Can't convert from utf16 any endian to multibyte.
 292                            Replace with the default fail char.
 293                         */
 294                         if (i_len < 2)
 295                                 return destlen - o_len;
 296                         if (i_len >= 2) {
 297                                 *outbuf = lp_failed_convert_char();
 298
 299                                 outbuf++;
 300                                 o_len--;
 301
 302                                 inbuf += 2;
 303                                 i_len -= 2;
 304                         }
 305
 306                         if (o_len == 0 || i_len == 0)
 307                                 return destlen - o_len;
 308
 309                         /* Keep trying with the next char... */
 310                         goto again;
 311
 312                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 313                         /* Can't convert to UTF16LE - just widen by adding the
 314                            default fail char then zero.
 315                         */
 316                         if (o_len < 2)
 317                                 return destlen - o_len;
 318
 319                         outbuf[0] = lp_failed_convert_char();
 320                         outbuf[1] = '\0';
 321
 322                         inbuf++;
 323                         i_len--;
 324
 325                         outbuf += 2;
 326                         o_len -= 2;
 327
 328                         if (o_len == 0 || i_len == 0)
 329                                 return destlen - o_len;
 330
 331                         /* Keep trying with the next char... */
 332                         goto again;
 333
 334                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 335                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 336                         /* Failed multibyte to multibyte. Just copy the default fail char and
 337                                 try again. */
 338                         outbuf[0] = lp_failed_convert_char();
 339
 340                         inbuf++;
 341                         i_len--;
 342
 343                         outbuf++;
 344                         o_len--;
 345
 346                         if (o_len == 0 || i_len == 0)
 347                                 return destlen - o_len;
 348
 349                         /* Keep trying with the next char... */
 350                         goto again;
 351
 352                 } else {
 353                         /* Keep compiler happy.... */
 354                         return destlen - o_len;
 355                 }
 356         }
 357 }
 358
 359 /**
 360  * Convert string from one encoding to another, making error checking etc
 361  * Fast path version - handles ASCII first.
 362  *
 363  * @param src pointer to source string (multibyte or singlebyte)
 364  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 365  * @param dest pointer to destination string (multibyte or singlebyte)
 366  * @param destlen maximal length allowed for string - *NEVER* -1.
 367  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 368  * @returns the number of bytes occupied in the destination
 369  *
 370  * Ensure the srclen contains the terminating zero.
 371  *
 372  * This function has been hand-tuned to provide a fast path.
 373  * Don't change unless you really know what you are doing. JRA.
 374  **/
 375
 376 size_t convert_string(charset_t from, charset_t to,
 377                       void const *src, size_t srclen,
 378                       void *dest, size_t destlen, bool allow_bad_conv)
 379 {
 380         /*
 381          * NB. We deliberately don't do a strlen here if srclen == -1.
 382          * This is very expensive over millions of calls and is taken
 383          * care of in the slow path in convert_string_internal. JRA.
 384          */
 385
 386 #ifdef DEVELOPER
 387         SMB_ASSERT(destlen != (size_t)-1);
 388 #endif
 389
 390         if (srclen == 0)
 391                 return 0;
 392
 393         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 394                 const unsigned char *p = (const unsigned char *)src;
 395                 unsigned char *q = (unsigned char *)dest;
 396                 size_t slen = srclen;
 397                 size_t dlen = destlen;
 398                 unsigned char lastp = '\0';
 399                 size_t retval = 0;
 400
 401                 /* If all characters are ascii, fast path here. */
 402                 while (slen && dlen) {
 403                         if ((lastp = *p) <= 0x7f) {
 404                                 *q++ = *p++;
 405                                 if (slen != (size_t)-1) {
 406                                         slen--;
 407                                 }
 408                                 dlen--;
 409                                 retval++;
 410                                 if (!lastp)
 411                                         break;
 412                         } else {
 413 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 414                                 goto general_case;
 415 #else
 416                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 417                                 if (ret == (size_t)-1) {
 418                                         return ret;
 419                                 }
 420                                 return retval + ret;
 421 #endif
 422                         }
 423                 }
 424                 if (!dlen) {
 425                         /* Even if we fast path we should note if we ran out of room. */
 426                         if (((slen != (size_t)-1) && slen) ||
 427                                         ((slen == (size_t)-1) && lastp)) {
 428                                 errno = E2BIG;
 429                         }
 430                 }
 431                 return retval;
 432         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 433                 const unsigned char *p = (const unsigned char *)src;
 434                 unsigned char *q = (unsigned char *)dest;
 435                 size_t retval = 0;
 436                 size_t slen = srclen;
 437                 size_t dlen = destlen;
 438                 unsigned char lastp = '\0';
 439
 440                 /* If all characters are ascii, fast path here. */
 441                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 442                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 443                                 *q++ = *p;
 444                                 if (slen != (size_t)-1) {
 445                                         slen -= 2;
 446                                 }
 447                                 p += 2;
 448                                 dlen--;
 449                                 retval++;
 450                                 if (!lastp)
 451                                         break;
 452                         } else {
 453 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 454                                 goto general_case;
 455 #else
 456                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 457                                 if (ret == (size_t)-1) {
 458                                         return ret;
 459                                 }
 460                                 return retval + ret;
 461 #endif
 462                         }
 463                 }
 464                 if (!dlen) {
 465                         /* Even if we fast path we should note if we ran out of room. */
 466                         if (((slen != (size_t)-1) && slen) ||
 467                                         ((slen == (size_t)-1) && lastp)) {
 468                                 errno = E2BIG;
 469                         }
 470                 }
 471                 return retval;
 472         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 473                 const unsigned char *p = (const unsigned char *)src;
 474                 unsigned char *q = (unsigned char *)dest;
 475                 size_t retval = 0;
 476                 size_t slen = srclen;
 477                 size_t dlen = destlen;
 478                 unsigned char lastp = '\0';
 479
 480                 /* If all characters are ascii, fast path here. */
 481                 while (slen && (dlen >= 2)) {
 482                         if ((lastp = *p) <= 0x7F) {
 483                                 *q++ = *p++;
 484                                 *q++ = '\0';
 485                                 if (slen != (size_t)-1) {
 486                                         slen--;
 487                                 }
 488                                 dlen -= 2;
 489                                 retval += 2;
 490                                 if (!lastp)
 491                                         break;
 492                         } else {
 493 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 494                                 goto general_case;
 495 #else
 496                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 497                                 if (ret == (size_t)-1) {
 498                                         return ret;
 499                                 }
 500                                 return retval + ret;
 501 #endif
 502                         }
 503                 }
 504                 if (!dlen) {
 505                         /* Even if we fast path we should note if we ran out of room. */
 506                         if (((slen != (size_t)-1) && slen) ||
 507                                         ((slen == (size_t)-1) && lastp)) {
 508                                 errno = E2BIG;
 509                         }
 510                 }
 511                 return retval;
 512         }
 513
 514 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 515   general_case:
 516 #endif
 517         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 518 }
 519
 520 /**
 521  * Convert between character sets, allocating a new buffer using talloc for the result.
 522  *
 523  * @param srclen length of source buffer.
 524  * @param dest always set at least to NULL
 525  * @parm converted_size set to the number of bytes occupied by the string in
 526  * the destination on success.
 527  * @note -1 is not accepted for srclen.
 528  *
 529  * @return true if new buffer was correctly allocated, and string was
 530  * converted.
 531  *
 532  * Ensure the srclen contains the terminating zero.
 533  *
 534  * I hate the goto's in this function. It's embarressing.....
 535  * There has to be a cleaner way to do this. JRA.
 536  */
 537 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 538                            void const *src, size_t srclen, void *dst,
 539                            size_t *converted_size, bool allow_bad_conv)
 540
 541 {
 542         size_t i_len, o_len, destlen = (srclen * 3) / 2;
 543         size_t retval;
 544         const char *inbuf = (const char *)src;
 545         char *outbuf = NULL, *ob = NULL;
 546         smb_iconv_t descriptor;
 547         void **dest = (void **)dst;
 548
 549         *dest = NULL;
 550
 551         if (!converted_size) {
 552                 errno = EINVAL;
 553                 return false;
 554         }
 555
 556         if (src == NULL || srclen == (size_t)-1) {
 557                 errno = EINVAL;
 558                 return false;
 559         }
 560         if (srclen == 0) {
 561                 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
 562                 if (ob == NULL) {
 563                         errno = ENOMEM;
 564                         return false;
 565                 }
 566                 *dest = ob;
 567                 *converted_size = 0;
 568                 return true;
 569         }
 570
 571         lazy_initialize_conv();
 572
 573         descriptor = conv_handles[from][to];
 574
 575         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 576                 if (!conv_silent)
 577                         DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
 578                 errno = EOPNOTSUPP;
 579                 return false;
 580         }
 581
 582   convert:
 583
 584         /* +2 is for ucs2 null termination. */
 585         if ((destlen*2)+2 < destlen) {
 586                 /* wrapped ! abort. */
 587                 if (!conv_silent)
 588                         DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
 589                 if (!ctx)
 590                         SAFE_FREE(outbuf);
 591                 errno = EOPNOTSUPP;
 592                 return false;
 593         } else {
 594                 destlen = destlen * 2;
 595         }
 596
 597         /* +2 is for ucs2 null termination. */
 598         ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
 599
 600         if (!ob) {
 601                 DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
 602                 errno = ENOMEM;
 603                 return false;
 604         }
 605         outbuf = ob;
 606         i_len = srclen;
 607         o_len = destlen;
 608
 609  again:
 610
 611         retval = smb_iconv(descriptor,
 612                            &inbuf, &i_len,
 613                            &outbuf, &o_len);
 614         if(retval == (size_t)-1)                {
 615                 const char *reason="unknown error";
 616                 switch(errno) {
 617                         case EINVAL:
 618                                 reason="Incomplete multibyte sequence";
 619                                 if (!conv_silent)
 620                                         DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
 621                                 if (allow_bad_conv)
 622                                         goto use_as_is;
 623                                 break;
 624                         case E2BIG:
 625                                 goto convert;
 626                         case EILSEQ:
 627                                 reason="Illegal multibyte sequence";
 628                                 if (!conv_silent)
 629                                         DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
 630                                 if (allow_bad_conv)
 631                                         goto use_as_is;
 632                                 break;
 633                 }
 634                 if (!conv_silent)
 635                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 636                 /* smb_panic(reason); */
 637                 TALLOC_FREE(ob);
 638                 return false;
 639         }
 640
 641   out:
 642
 643         destlen = destlen - o_len;
 644         /* Don't shrink unless we're reclaiming a lot of
 645          * space. This is in the hot codepath and these
 646          * reallocs *cost*. JRA.
 647          */
 648         if (o_len > 1024) {
 649                 /* We're shrinking here so we know the +2 is safe from wrap. */
 650                 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
 651         }
 652
 653         if (destlen && !ob) {
 654                 DEBUG(0, ("convert_string_talloc: out of memory!\n"));
 655                 errno = ENOMEM;
 656                 return false;
 657         }
 658
 659         *dest = ob;
 660
 661         /* Must ucs2 null terminate in the extra space we allocated. */
 662         ob[destlen] = '\0';
 663         ob[destlen+1] = '\0';
 664
 665         *converted_size = destlen;
 666         return true;
 667
 668  use_as_is:
 669
 670         /*
 671          * Conversion not supported. This is actually an error, but there are so
 672          * many misconfigured iconv systems and smb.conf's out there we can't just
 673          * fail. Do a very bad conversion instead.... JRA.
 674          */
 675
 676         {
 677                 if (o_len == 0 || i_len == 0)
 678                         goto out;
 679
 680                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 681                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 682                         /* Can't convert from utf16 any endian to multibyte.
 683                            Replace with the default fail char.
 684                         */
 685
 686                         if (i_len < 2)
 687                                 goto out;
 688
 689                         if (i_len >= 2) {
 690                                 *outbuf = lp_failed_convert_char();
 691
 692                                 outbuf++;
 693                                 o_len--;
 694
 695                                 inbuf += 2;
 696                                 i_len -= 2;
 697                         }
 698
 699                         if (o_len == 0 || i_len == 0)
 700                                 goto out;
 701
 702                         /* Keep trying with the next char... */
 703                         goto again;
 704
 705                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 706                         /* Can't convert to UTF16LE - just widen by adding the
 707                            default fail char then zero.
 708                         */
 709                         if (o_len < 2)
 710                                 goto out;
 711
 712                         outbuf[0] = lp_failed_convert_char();
 713                         outbuf[1] = '\0';
 714
 715                         inbuf++;
 716                         i_len--;
 717
 718                         outbuf += 2;
 719                         o_len -= 2;
 720
 721                         if (o_len == 0 || i_len == 0)
 722                                 goto out;
 723
 724                         /* Keep trying with the next char... */
 725                         goto again;
 726
 727                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 728                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 729                         /* Failed multibyte to multibyte. Just copy the default fail char and
 730                            try again. */
 731                         outbuf[0] = lp_failed_convert_char();
 732
 733                         inbuf++;
 734                         i_len--;
 735
 736                         outbuf++;
 737                         o_len--;
 738
 739                         if (o_len == 0 || i_len == 0)
 740                                 goto out;
 741
 742                         /* Keep trying with the next char... */
 743                         goto again;
 744
 745                 } else {
 746                         /* Keep compiler happy.... */
 747                         goto out;
 748                 }
 749         }
 750 }
 751
 752 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 753 {
 754         size_t size;
 755         smb_ucs2_t *buffer;
 756
 757         if (!push_ucs2_talloc(NULL, &buffer, src, &size)) {
 758                 return (size_t)-1;
 759         }
 760
 761         if (!strupper_w(buffer) && (dest == src)) {
 762                 TALLOC_FREE(buffer);
 763                 return srclen;
 764         }
 765
 766         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 767         TALLOC_FREE(buffer);
 768         return size;
 769 }
 770
 771 /**
 772  talloc_strdup() a unix string to upper case.
 773 **/
 774
 775 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 776 {
 777         char *out_buffer = talloc_strdup(ctx,s);
 778         const unsigned char *p = (const unsigned char *)s;
 779         unsigned char *q = (unsigned char *)out_buffer;
 780
 781         if (!q) {
 782                 return NULL;
 783         }
 784
 785         /* this is quite a common operation, so we want it to be
 786            fast. We optimise for the ascii case, knowing that all our
 787            supported multi-byte character sets are ascii-compatible
 788            (ie. they match for the first 128 chars) */
 789
 790         while (*p) {
 791                 if (*p & 0x80)
 792                         break;
 793                 *q++ = toupper_ascii_fast(*p);
 794                 p++;
 795         }
 796
 797         if (*p) {
 798                 /* MB case. */
 799                 size_t converted_size, converted_size2;
 800                 smb_ucs2_t *ubuf = NULL;
 801
 802                 /* We're not using the ascii buffer above. */
 803                 TALLOC_FREE(out_buffer);
 804
 805                 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
 806                                            strlen(s)+1, (void *)&ubuf,
 807                                            &converted_size, True))
 808                 {
 809                         return NULL;
 810                 }
 811
 812                 strupper_w(ubuf);
 813
 814                 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
 815                                            converted_size, (void *)&out_buffer,
 816                                            &converted_size2, True))
 817                 {
 818                         TALLOC_FREE(ubuf);
 819                         return NULL;
 820                 }
 821
 822                 /* Don't need the intermediate buffer
 823                  * anymore.
 824                  */
 825                 TALLOC_FREE(ubuf);
 826         }
 827
 828         return out_buffer;
 829 }
 830
 831 char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
 832         return talloc_strdup_upper(ctx, s);
 833 }
 834
 835
 836 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 837 {
 838         size_t size;
 839         smb_ucs2_t *buffer = NULL;
 840
 841         if (!convert_string_talloc(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 842                                    (void **)(void *)&buffer, &size,
 843                                    True))
 844         {
 845                 smb_panic("failed to create UCS2 buffer");
 846         }
 847         if (!strlower_w(buffer) && (dest == src)) {
 848                 TALLOC_FREE(buffer);
 849                 return srclen;
 850         }
 851         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 852         TALLOC_FREE(buffer);
 853         return size;
 854 }
 855
 856
 857 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
 858 {
 859         size_t converted_size;
 860         smb_ucs2_t *buffer = NULL;
 861         char *out_buffer;
 862
 863         if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
 864                 return NULL;
 865         }
 866
 867         strlower_w(buffer);
 868
 869         if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
 870                 TALLOC_FREE(buffer);
 871                 return NULL;
 872         }
 873
 874         TALLOC_FREE(buffer);
 875
 876         return out_buffer;
 877 }
 878
 879 char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
 880         return talloc_strdup_lower(ctx, s);
 881 }
 882
 883 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 884 {
 885         if (flags & (STR_NOALIGN|STR_ASCII))
 886                 return 0;
 887         return PTR_DIFF(p, base_ptr) & 1;
 888 }
 889
 890
 891 /**
 892  * Copy a string from a char* unix src to a dos codepage string destination.
 893  *
 894  * @return the number of bytes occupied by the string in the destination.
 895  *
 896  * @param flags can include
 897  * <dl>
 898  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 899  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 900  * </dl>
 901  *
 902  * @param dest_len the maximum length in bytes allowed in the
 903  * destination.
 904  **/
 905 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 906 {
 907         size_t src_len = strlen(src);
 908         char *tmpbuf = NULL;
 909         size_t ret;
 910
 911         /* No longer allow a length of -1. */
 912         if (dest_len == (size_t)-1) {
 913                 smb_panic("push_ascii - dest_len == -1");
 914         }
 915
 916         if (flags & STR_UPPER) {
 917                 tmpbuf = SMB_STRDUP(src);
 918                 if (!tmpbuf) {
 919                         smb_panic("malloc fail");
 920                 }
 921                 strupper_m(tmpbuf);
 922                 src = tmpbuf;
 923         }
 924
 925         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 926                 src_len++;
 927         }
 928
 929         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 930         if (ret == (size_t)-1 &&
 931                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 932                         && dest_len > 0) {
 933                 ((char *)dest)[0] = '\0';
 934         }
 935         SAFE_FREE(tmpbuf);
 936         return ret;
 937 }
 938
 939 size_t push_ascii_fstring(void *dest, const char *src)
 940 {
 941         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 942 }
 943
 944 /********************************************************************
 945  Push an nstring - ensure null terminated. Written by
 946  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 947 ********************************************************************/
 948
 949 size_t push_ascii_nstring(void *dest, const char *src)
 950 {
 951         size_t i, buffer_len, dest_len;
 952         smb_ucs2_t *buffer;
 953
 954         conv_silent = True;
 955         if (!push_ucs2_talloc(NULL, &buffer, src, &buffer_len)) {
 956                 smb_panic("failed to create UCS2 buffer");
 957         }
 958
 959         /* We're using buffer_len below to count ucs2 characters, not bytes. */
 960         buffer_len /= sizeof(smb_ucs2_t);
 961
 962         dest_len = 0;
 963         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
 964                 unsigned char mb[10];
 965                 /* Convert one smb_ucs2_t character at a time. */
 966                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
 967                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
 968                         memcpy((char *)dest + dest_len, mb, mb_len);
 969                         dest_len += mb_len;
 970                 } else {
 971                         errno = E2BIG;
 972                         break;
 973                 }
 974         }
 975         ((char *)dest)[dest_len] = '\0';
 976
 977         conv_silent = False;
 978         TALLOC_FREE(buffer);
 979         return dest_len;
 980 }
 981
 982 /********************************************************************
 983  Push and malloc an ascii string. src and dest null terminated.
 984 ********************************************************************/
 985
 986 bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
 987 {
 988         size_t src_len = strlen(src)+1;
 989
 990         *dest = NULL;
 991         return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
 992                                      (void **)dest, converted_size, True);
 993 }
 994
 995 /**
 996  * Copy a string from a dos codepage source to a unix char* destination.
 997  *
 998  * The resulting string in "dest" is always null terminated.
 999  *
1000  * @param flags can have:
1001  * <dl>
1002  * <dt>STR_TERMINATE</dt>
1003  * <dd>STR_TERMINATE means the string in @p src
1004  * is null terminated, and src_len is ignored.</dd>
1005  * </dl>
1006  *
1007  * @param src_len is the length of the source area in bytes.
1008  * @returns the number of bytes occupied by the string in @p src.
1009  **/
1010 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1011 {
1012         size_t ret;
1013
1014         if (dest_len == (size_t)-1) {
1015                 /* No longer allow dest_len of -1. */
1016                 smb_panic("pull_ascii - invalid dest_len of -1");
1017         }
1018
1019         if (flags & STR_TERMINATE) {
1020                 if (src_len == (size_t)-1) {
1021                         src_len = strlen((const char *)src) + 1;
1022                 } else {
1023                         size_t len = strnlen((const char *)src, src_len);
1024                         if (len < src_len)
1025                                 len++;
1026                         src_len = len;
1027                 }
1028         }
1029
1030         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1031         if (ret == (size_t)-1) {
1032                 ret = 0;
1033                 dest_len = 0;
1034         }
1035
1036         if (dest_len && ret) {
1037                 /* Did we already process the terminating zero ? */
1038                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1039                         dest[MIN(ret, dest_len-1)] = 0;
1040                 }
1041         } else  {
1042                 dest[0] = 0;
1043         }
1044
1045         return src_len;
1046 }
1047
1048 /**
1049  * Copy a string from a dos codepage source to a unix char* destination.
1050  Talloc version.
1051  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1052  needs fixing. JRA).
1053  *
1054  * The resulting string in "dest" is always null terminated.
1055  *
1056  * @param flags can have:
1057  * <dl>
1058  * <dt>STR_TERMINATE</dt>
1059  * <dd>STR_TERMINATE means the string in @p src
1060  * is null terminated, and src_len is ignored.</dd>
1061  * </dl>
1062  *
1063  * @param src_len is the length of the source area in bytes.
1064  * @returns the number of bytes occupied by the string in @p src.
1065  **/
1066
1067 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1068                                      char **ppdest,
1069                                      const void *src,
1070                                      size_t src_len,
1071                                      int flags)
1072 {
1073         char *dest = NULL;
1074         size_t dest_len;
1075
1076         *ppdest = NULL;
1077
1078         if (!src_len) {
1079                 return 0;
1080         }
1081
1082         if (flags & STR_TERMINATE) {
1083                 if (src_len == (size_t)-1) {
1084                         src_len = strlen((const char *)src) + 1;
1085                 } else {
1086                         size_t len = strnlen((const char *)src, src_len);
1087                         if (len < src_len)
1088                                 len++;
1089                         src_len = len;
1090                 }
1091                 /* Ensure we don't use an insane length from the client. */
1092                 if (src_len >= 1024*1024) {
1093                         char *msg = talloc_asprintf(ctx,
1094                                         "Bad src length (%u) in "
1095                                         "pull_ascii_base_talloc",
1096                                         (unsigned int)src_len);
1097                         smb_panic(msg);
1098                 }
1099         } else {
1100                 /* Can't have an unlimited length
1101                  * non STR_TERMINATE'd.
1102                  */
1103                 if (src_len == (size_t)-1) {
1104                         errno = EINVAL;
1105                         return 0;
1106                 }
1107         }
1108
1109         /* src_len != -1 here. */
1110
1111         if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1112                                      &dest_len, True)) {
1113                 dest_len = 0;
1114         }
1115
1116         if (dest_len && dest) {
1117                 /* Did we already process the terminating zero ? */
1118                 if (dest[dest_len-1] != 0) {
1119                         size_t size = talloc_get_size(dest);
1120                         /* Have we got space to append the '\0' ? */
1121                         if (size <= dest_len) {
1122                                 /* No, realloc. */
1123                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1124                                                 dest_len+1);
1125                                 if (!dest) {
1126                                         /* talloc fail. */
1127                                         dest_len = (size_t)-1;
1128                                         return 0;
1129                                 }
1130                         }
1131                         /* Yay - space ! */
1132                         dest[dest_len] = '\0';
1133                         dest_len++;
1134                 }
1135         } else if (dest) {
1136                 dest[0] = 0;
1137         }
1138
1139         *ppdest = dest;
1140         return src_len;
1141 }
1142
1143 size_t pull_ascii_fstring(char *dest, const void *src)
1144 {
1145         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1146 }
1147
1148 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1149
1150 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1151 {
1152         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1153 }
1154
1155 /**
1156  * Copy a string from a char* src to a unicode destination.
1157  *
1158  * @returns the number of bytes occupied by the string in the destination.
1159  *
1160  * @param flags can have:
1161  *
1162  * <dl>
1163  * <dt>STR_TERMINATE <dd>means include the null termination.
1164  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1165  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1166  * </dl>
1167  *
1168  * @param dest_len is the maximum length allowed in the
1169  * destination.
1170  **/
1171
1172 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1173 {
1174         size_t len=0;
1175         size_t src_len;
1176         size_t ret;
1177
1178         if (dest_len == (size_t)-1) {
1179                 /* No longer allow dest_len of -1. */
1180                 smb_panic("push_ucs2 - invalid dest_len of -1");
1181         }
1182
1183         if (flags & STR_TERMINATE)
1184                 src_len = (size_t)-1;
1185         else
1186                 src_len = strlen(src);
1187
1188         if (ucs2_align(base_ptr, dest, flags)) {
1189                 *(char *)dest = 0;
1190                 dest = (void *)((char *)dest + 1);
1191                 if (dest_len)
1192                         dest_len--;
1193                 len++;
1194         }
1195
1196         /* ucs2 is always a multiple of 2 bytes */
1197         dest_len &= ~1;
1198
1199         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1200         if (ret == (size_t)-1) {
1201                 if ((flags & STR_TERMINATE) &&
1202                                 dest &&
1203                                 dest_len) {
1204                         *(char *)dest = 0;
1205                 }
1206                 return len;
1207         }
1208
1209         len += ret;
1210
1211         if (flags & STR_UPPER) {
1212                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1213                 size_t i;
1214
1215                 /* We check for i < (ret / 2) below as the dest string isn't null
1216                    terminated if STR_TERMINATE isn't set. */
1217
1218                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1219                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1220                         if (v != dest_ucs2[i]) {
1221                                 dest_ucs2[i] = v;
1222                         }
1223                 }
1224         }
1225
1226         return len;
1227 }
1228
1229
1230 /**
1231  * Copy a string from a unix char* src to a UCS2 destination,
1232  * allocating a buffer using talloc().
1233  *
1234  * @param dest always set at least to NULL
1235  * @parm converted_size set to the number of bytes occupied by the string in
1236  * the destination on success.
1237  *
1238  * @return true if new buffer was correctly allocated, and string was
1239  * converted.
1240  **/
1241 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1242                       size_t *converted_size)
1243 {
1244         size_t src_len = strlen(src)+1;
1245
1246         *dest = NULL;
1247         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1248                                      (void **)dest, converted_size, True);
1249 }
1250
1251
1252 /**
1253  Copy a string from a char* src to a UTF-8 destination.
1254  Return the number of bytes occupied by the string in the destination
1255  Flags can have:
1256   STR_TERMINATE means include the null termination
1257   STR_UPPER     means uppercase in the destination
1258  dest_len is the maximum length allowed in the destination. If dest_len
1259  is -1 then no maxiumum is used.
1260 **/
1261
1262 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1263 {
1264         size_t src_len = 0;
1265         size_t ret;
1266         char *tmpbuf = NULL;
1267
1268         if (dest_len == (size_t)-1) {
1269                 /* No longer allow dest_len of -1. */
1270                 smb_panic("push_utf8 - invalid dest_len of -1");
1271         }
1272
1273         if (flags & STR_UPPER) {
1274                 tmpbuf = strupper_talloc(NULL, src);
1275                 if (!tmpbuf) {
1276                         return (size_t)-1;
1277                 }
1278                 src = tmpbuf;
1279                 src_len = strlen(src);
1280         }
1281
1282         src_len = strlen(src);
1283         if (flags & STR_TERMINATE) {
1284                 src_len++;
1285         }
1286
1287         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1288         TALLOC_FREE(tmpbuf);
1289         return ret;
1290 }
1291
1292 size_t push_utf8_fstring(void *dest, const char *src)
1293 {
1294         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1295 }
1296
1297 /**
1298  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1299  *
1300  * @param dest always set at least to NULL
1301  * @parm converted_size set to the number of bytes occupied by the string in
1302  * the destination on success.
1303  *
1304  * @return true if new buffer was correctly allocated, and string was
1305  * converted.
1306  **/
1307
1308 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1309                       size_t *converted_size)
1310 {
1311         size_t src_len = strlen(src)+1;
1312
1313         *dest = NULL;
1314         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1315                                      (void**)dest, converted_size, True);
1316 }
1317
1318 /**
1319  Copy a string from a ucs2 source to a unix char* destination.
1320  Flags can have:
1321   STR_TERMINATE means the string in src is null terminated.
1322   STR_NOALIGN   means don't try to align.
1323  if STR_TERMINATE is set then src_len is ignored if it is -1.
1324  src_len is the length of the source area in bytes
1325  Return the number of bytes occupied by the string in src.
1326  The resulting string in "dest" is always null terminated.
1327 **/
1328
1329 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1330 {
1331         size_t ret;
1332
1333         if (dest_len == (size_t)-1) {
1334                 /* No longer allow dest_len of -1. */
1335                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1336         }
1337
1338         if (!src_len) {
1339                 if (dest && dest_len > 0) {
1340                         dest[0] = '\0';
1341                 }
1342                 return 0;
1343         }
1344
1345         if (ucs2_align(base_ptr, src, flags)) {
1346                 src = (const void *)((const char *)src + 1);
1347                 if (src_len != (size_t)-1)
1348                         src_len--;
1349         }
1350
1351         if (flags & STR_TERMINATE) {
1352                 /* src_len -1 is the default for null terminated strings. */
1353                 if (src_len != (size_t)-1) {
1354                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1355                                                 src_len/2);
1356                         if (len < src_len/2)
1357                                 len++;
1358                         src_len = len*2;
1359                 }
1360         }
1361
1362         /* ucs2 is always a multiple of 2 bytes */
1363         if (src_len != (size_t)-1)
1364                 src_len &= ~1;
1365
1366         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1367         if (ret == (size_t)-1) {
1368                 ret = 0;
1369                 dest_len = 0;
1370         }
1371
1372         if (src_len == (size_t)-1)
1373                 src_len = ret*2;
1374
1375         if (dest_len && ret) {
1376                 /* Did we already process the terminating zero ? */
1377                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1378                         dest[MIN(ret, dest_len-1)] = 0;
1379                 }
1380         } else {
1381                 dest[0] = 0;
1382         }
1383
1384         return src_len;
1385 }
1386
1387 /**
1388  Copy a string from a ucs2 source to a unix char* destination.
1389  Talloc version with a base pointer.
1390  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1391  needs fixing. JRA).
1392  Flags can have:
1393   STR_TERMINATE means the string in src is null terminated.
1394   STR_NOALIGN   means don't try to align.
1395  if STR_TERMINATE is set then src_len is ignored if it is -1.
1396  src_len is the length of the source area in bytes
1397  Return the number of bytes occupied by the string in src.
1398  The resulting string in "dest" is always null terminated.
1399 **/
1400
1401 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1402                         const void *base_ptr,
1403                         char **ppdest,
1404                         const void *src,
1405                         size_t src_len,
1406                         int flags)
1407 {
1408         char *dest;
1409         size_t dest_len;
1410
1411         *ppdest = NULL;
1412
1413 #ifdef DEVELOPER
1414         /* Ensure we never use the braindead "malloc" varient. */
1415         if (ctx == NULL) {
1416                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1417         }
1418 #endif
1419
1420         if (!src_len) {
1421                 return 0;
1422         }
1423
1424         if (ucs2_align(base_ptr, src, flags)) {
1425                 src = (const void *)((const char *)src + 1);
1426                 if (src_len != (size_t)-1)
1427                         src_len--;
1428         }
1429
1430         if (flags & STR_TERMINATE) {
1431                 /* src_len -1 is the default for null terminated strings. */
1432                 if (src_len != (size_t)-1) {
1433                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1434                                                 src_len/2);
1435                         if (len < src_len/2)
1436                                 len++;
1437                         src_len = len*2;
1438                 } else {
1439                         /*
1440                          * src_len == -1 - alloc interface won't take this
1441                          * so we must calculate.
1442                          */
1443                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1444                 }
1445                 /* Ensure we don't use an insane length from the client. */
1446                 if (src_len >= 1024*1024) {
1447                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1448                 }
1449         } else {
1450                 /* Can't have an unlimited length
1451                  * non STR_TERMINATE'd.
1452                  */
1453                 if (src_len == (size_t)-1) {
1454                         errno = EINVAL;
1455                         return 0;
1456                 }
1457         }
1458
1459         /* src_len != -1 here. */
1460
1461         /* ucs2 is always a multiple of 2 bytes */
1462         src_len &= ~1;
1463
1464         if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1465                                    (void *)&dest, &dest_len, True)) {
1466                 dest_len = 0;
1467         }
1468
1469         if (dest_len) {
1470                 /* Did we already process the terminating zero ? */
1471                 if (dest[dest_len-1] != 0) {
1472                         size_t size = talloc_get_size(dest);
1473                         /* Have we got space to append the '\0' ? */
1474                         if (size <= dest_len) {
1475                                 /* No, realloc. */
1476                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1477                                                 dest_len+1);
1478                                 if (!dest) {
1479                                         /* talloc fail. */
1480                                         dest_len = (size_t)-1;
1481                                         return 0;
1482                                 }
1483                         }
1484                         /* Yay - space ! */
1485                         dest[dest_len] = '\0';
1486                         dest_len++;
1487                 }
1488         } else if (dest) {
1489                 dest[0] = 0;
1490         }
1491
1492         *ppdest = dest;
1493         return src_len;
1494 }
1495
1496 size_t pull_ucs2_fstring(char *dest, const void *src)
1497 {
1498         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1499 }
1500
1501 /**
1502  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1503  *
1504  * @param dest always set at least to NULL
1505  * @parm converted_size set to the number of bytes occupied by the string in
1506  * the destination on success.
1507  *
1508  * @return true if new buffer was correctly allocated, and string was
1509  * converted.
1510  **/
1511
1512 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1513                       size_t *converted_size)
1514 {
1515         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1516
1517         *dest = NULL;
1518         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1519                                      (void **)dest, converted_size, True);
1520 }
1521
1522 /**
1523  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1524  *
1525  * @param dest always set at least to NULL
1526  * @parm converted_size set to the number of bytes occupied by the string in
1527  * the destination on success.
1528  *
1529  * @return true if new buffer was correctly allocated, and string was
1530  * converted.
1531  **/
1532
1533 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1534                       size_t *converted_size)
1535 {
1536         size_t src_len = strlen(src)+1;
1537
1538         *dest = NULL;
1539         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1540                                      (void **)dest, converted_size, True);
1541 }
1542
1543
1544 /**
1545  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1546  *
1547  * @param dest always set at least to NULL
1548  * @parm converted_size set to the number of bytes occupied by the string in
1549  * the destination on success.
1550  *
1551  * @return true if new buffer was correctly allocated, and string was
1552  * converted.
1553  **/
1554
1555 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1556                        size_t *converted_size)
1557 {
1558         size_t src_len = strlen(src)+1;
1559
1560         *dest = NULL;
1561         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1562                                      (void **)dest, converted_size, True);
1563 }
1564
1565 /**
1566  Copy a string from a char* src to a unicode or ascii
1567  dos codepage destination choosing unicode or ascii based on the
1568  flags supplied
1569  Return the number of bytes occupied by the string in the destination.
1570  flags can have:
1571   STR_TERMINATE means include the null termination.
1572   STR_UPPER     means uppercase in the destination.
1573   STR_ASCII     use ascii even with unicode packet.
1574   STR_NOALIGN   means don't do alignment.
1575  dest_len is the maximum length allowed in the destination. If dest_len
1576  is -1 then no maxiumum is used.
1577 **/
1578
1579 size_t push_string_check_fn(const char *function, unsigned int line,
1580                             void *dest, const char *src,
1581                             size_t dest_len, int flags)
1582 {
1583 #ifdef DEVELOPER
1584         /* We really need to zero fill here, not clobber
1585          * region, as we want to ensure that valgrind thinks
1586          * all of the outgoing buffer has been written to
1587          * so a send() or write() won't trap an error.
1588          * JRA.
1589          */
1590 #if 0
1591         clobber_region(function, line, dest, dest_len);
1592 #else
1593         memset(dest, '\0', dest_len);
1594 #endif
1595 #endif
1596
1597         if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1598                 return push_ucs2(NULL, dest, src, dest_len, flags);
1599         }
1600         return push_ascii(dest, src, dest_len, flags);
1601 }
1602
1603
1604 /**
1605  Copy a string from a char* src to a unicode or ascii
1606  dos codepage destination choosing unicode or ascii based on the
1607  flags in the SMB buffer starting at base_ptr.
1608  Return the number of bytes occupied by the string in the destination.
1609  flags can have:
1610   STR_TERMINATE means include the null termination.
1611   STR_UPPER     means uppercase in the destination.
1612   STR_ASCII     use ascii even with unicode packet.
1613   STR_NOALIGN   means don't do alignment.
1614  dest_len is the maximum length allowed in the destination. If dest_len
1615  is -1 then no maxiumum is used.
1616 **/
1617
1618 size_t push_string_base(const char *function, unsigned int line,
1619                         const char *base, uint16 flags2,
1620                         void *dest, const char *src,
1621                         size_t dest_len, int flags)
1622 {
1623 #ifdef DEVELOPER
1624         /* We really need to zero fill here, not clobber
1625          * region, as we want to ensure that valgrind thinks
1626          * all of the outgoing buffer has been written to
1627          * so a send() or write() won't trap an error.
1628          * JRA.
1629          */
1630 #if 0
1631         clobber_region(function, line, dest, dest_len);
1632 #else
1633         memset(dest, '\0', dest_len);
1634 #endif
1635 #endif
1636
1637         if (!(flags & STR_ASCII) && \
1638             ((flags & STR_UNICODE || \
1639               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1640                 return push_ucs2(base, dest, src, dest_len, flags);
1641         }
1642         return push_ascii(dest, src, dest_len, flags);
1643 }
1644
1645 /**
1646  Copy a string from a char* src to a unicode or ascii
1647  dos codepage destination choosing unicode or ascii based on the
1648  flags supplied
1649  Return the number of bytes occupied by the string in the destination.
1650  flags can have:
1651   STR_TERMINATE means include the null termination.
1652   STR_UPPER     means uppercase in the destination.
1653   STR_ASCII     use ascii even with unicode packet.
1654   STR_NOALIGN   means don't do alignment.
1655  dest_len is the maximum length allowed in the destination. If dest_len
1656  is -1 then no maxiumum is used.
1657 **/
1658
1659 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
1660 {
1661         size_t ret;
1662 #ifdef DEVELOPER
1663         /* We really need to zero fill here, not clobber
1664          * region, as we want to ensure that valgrind thinks
1665          * all of the outgoing buffer has been written to
1666          * so a send() or write() won't trap an error.
1667          * JRA.
1668          */
1669         memset(dest, '\0', dest_len);
1670 #endif
1671
1672         if (!(flags & STR_ASCII) && \
1673             (flags & STR_UNICODE)) {
1674                 ret = push_ucs2(NULL, dest, src, dest_len, flags);
1675         } else {
1676                 ret = push_ascii(dest, src, dest_len, flags);
1677         }
1678         if (ret == (size_t)-1) {
1679                 return -1;
1680         }
1681         return ret;
1682 }
1683
1684 /**
1685  Copy a string from a unicode or ascii source (depending on
1686  the packet flags) to a char* destination.
1687  Flags can have:
1688   STR_TERMINATE means the string in src is null terminated.
1689   STR_UNICODE   means to force as unicode.
1690   STR_ASCII     use ascii even with unicode packet.
1691   STR_NOALIGN   means don't do alignment.
1692  if STR_TERMINATE is set then src_len is ignored is it is -1
1693  src_len is the length of the source area in bytes.
1694  Return the number of bytes occupied by the string in src.
1695  The resulting string in "dest" is always null terminated.
1696 **/
1697
1698 size_t pull_string_fn(const char *function,
1699                         unsigned int line,
1700                         const void *base_ptr,
1701                         uint16 smb_flags2,
1702                         char *dest,
1703                         const void *src,
1704                         size_t dest_len,
1705                         size_t src_len,
1706                         int flags)
1707 {
1708 #ifdef DEVELOPER
1709         clobber_region(function, line, dest, dest_len);
1710 #endif
1711
1712         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1713                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1714                           "UNICODE defined");
1715         }
1716
1717         if (!(flags & STR_ASCII) && \
1718             ((flags & STR_UNICODE || \
1719               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1720                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1721         }
1722         return pull_ascii(dest, src, dest_len, src_len, flags);
1723 }
1724
1725 /**
1726  Copy a string from a unicode or ascii source (depending on
1727  the packet flags) to a char* destination.
1728  Variant that uses talloc.
1729  Flags can have:
1730   STR_TERMINATE means the string in src is null terminated.
1731   STR_UNICODE   means to force as unicode.
1732   STR_ASCII     use ascii even with unicode packet.
1733   STR_NOALIGN   means don't do alignment.
1734  if STR_TERMINATE is set then src_len is ignored is it is -1
1735  src_len is the length of the source area in bytes.
1736  Return the number of bytes occupied by the string in src.
1737  The resulting string in "dest" is always null terminated.
1738 **/
1739
1740 size_t pull_string_talloc_fn(const char *function,
1741                         unsigned int line,
1742                         TALLOC_CTX *ctx,
1743                         const void *base_ptr,
1744                         uint16 smb_flags2,
1745                         char **ppdest,
1746                         const void *src,
1747                         size_t src_len,
1748                         int flags)
1749 {
1750         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1751                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1752                           "UNICODE defined");
1753         }
1754
1755         if (!(flags & STR_ASCII) && \
1756             ((flags & STR_UNICODE || \
1757               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1758                 return pull_ucs2_base_talloc(ctx,
1759                                         base_ptr,
1760                                         ppdest,
1761                                         src,
1762                                         src_len,
1763                                         flags);
1764         }
1765         return pull_ascii_base_talloc(ctx,
1766                                         ppdest,
1767                                         src,
1768                                         src_len,
1769                                         flags);
1770 }
1771
1772
1773 size_t align_string(const void *base_ptr, const char *p, int flags)
1774 {
1775         if (!(flags & STR_ASCII) && \
1776             ((flags & STR_UNICODE || \
1777               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1778                 return ucs2_align(base_ptr, p, flags);
1779         }
1780         return 0;
1781 }
1782
1783 /*
1784   Return the unicode codepoint for the next multi-byte CH_UNIX character
1785   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1786
1787   Also return the number of bytes consumed (which tells the caller
1788   how many bytes to skip to get to the next CH_UNIX character).
1789
1790   Return INVALID_CODEPOINT if the next character cannot be converted.
1791 */
1792
1793 codepoint_t next_codepoint(const char *str, size_t *size)
1794 {
1795         /* It cannot occupy more than 4 bytes in UTF16 format */
1796         uint8_t buf[4];
1797         smb_iconv_t descriptor;
1798         size_t ilen_orig;
1799         size_t ilen;
1800         size_t olen;
1801         char *outbuf;
1802
1803         if ((str[0] & 0x80) == 0) {
1804                 *size = 1;
1805                 return (codepoint_t)str[0];
1806         }
1807
1808         /* We assume that no multi-byte character can take
1809            more than 5 bytes. This is OK as we only
1810            support codepoints up to 1M */
1811
1812         ilen_orig = strnlen(str, 5);
1813         ilen = ilen_orig;
1814
1815         lazy_initialize_conv();
1816
1817         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1818         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1819                 *size = 1;
1820                 return INVALID_CODEPOINT;
1821         }
1822
1823         /* This looks a little strange, but it is needed to cope
1824            with codepoints above 64k which are encoded as per RFC2781. */
1825         olen = 2;
1826         outbuf = (char *)buf;
1827         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1828         if (olen == 2) {
1829                 /* We failed to convert to a 2 byte character.
1830                    See if we can convert to a 4 UTF16-LE byte char encoding.
1831                 */
1832                 olen = 4;
1833                 outbuf = (char *)buf;
1834                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1835                 if (olen == 4) {
1836                         /* We didn't convert any bytes */
1837                         *size = 1;
1838                         return INVALID_CODEPOINT;
1839                 }
1840                 olen = 4 - olen;
1841         } else {
1842                 olen = 2 - olen;
1843         }
1844
1845         *size = ilen_orig - ilen;
1846
1847         if (olen == 2) {
1848                 /* 2 byte, UTF16-LE encoded value. */
1849                 return (codepoint_t)SVAL(buf, 0);
1850         }
1851         if (olen == 4) {
1852                 /* Decode a 4 byte UTF16-LE character manually.
1853                    See RFC2871 for the encoding machanism.
1854                 */
1855                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1856                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1857
1858                 return (codepoint_t)0x10000 +
1859                                 (w1 << 10) + w2;
1860         }
1861
1862         /* no other length is valid */
1863         return INVALID_CODEPOINT;
1864 }
1865
1866 /*
1867   push a single codepoint into a CH_UNIX string the target string must
1868   be able to hold the full character, which is guaranteed if it is at
1869   least 5 bytes in size. The caller may pass less than 5 bytes if they
1870   are sure the character will fit (for example, you can assume that
1871   uppercase/lowercase of a character will not add more than 1 byte)
1872
1873   return the number of bytes occupied by the CH_UNIX character, or
1874   -1 on failure
1875 */
1876 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1877 {
1878         smb_iconv_t descriptor;
1879         uint8_t buf[4];
1880         size_t ilen, olen;
1881         const char *inbuf;
1882
1883         if (c < 128) {
1884                 *str = c;
1885                 return 1;
1886         }
1887
1888         lazy_initialize_conv();
1889
1890         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1891         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1892                 return -1;
1893         }
1894
1895         if (c < 0x10000) {
1896                 ilen = 2;
1897                 olen = 5;
1898                 inbuf = (char *)buf;
1899                 SSVAL(buf, 0, c);
1900                 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1901                 if (ilen != 0) {
1902                         return -1;
1903                 }
1904                 return 5 - olen;
1905         }
1906
1907         c -= 0x10000;
1908
1909         buf[0] = (c>>10) & 0xFF;
1910         buf[1] = (c>>18) | 0xd8;
1911         buf[2] = c & 0xFF;
1912         buf[3] = ((c>>8) & 0x3) | 0xdc;
1913
1914         ilen = 4;
1915         olen = 5;
1916         inbuf = (char *)buf;
1917
1918         smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1919         if (ilen != 0) {
1920                 return -1;
1921         }
1922         return 5 - olen;
1923 }
1924
1925