source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50 static bool initialized;
  51
  52 /**
  53  * Return the name of a charset to give to iconv().
  54  **/
  55 static const char *charset_name(charset_t ch)
  56 {
  57         const char *ret;
  58
  59         switch (ch) {
  60         case CH_UTF16LE:
  61                 ret = "UTF-16LE";
  62                 break;
  63         case CH_UTF16BE:
  64                 ret = "UTF-16BE";
  65                 break;
  66         case CH_UNIX:
  67                 ret = lp_unix_charset();
  68                 break;
  69         case CH_DOS:
  70                 ret = lp_dos_charset();
  71                 break;
  72         case CH_DISPLAY:
  73                 ret = lp_display_charset();
  74                 break;
  75         case CH_UTF8:
  76                 ret = "UTF8";
  77                 break;
  78         default:
  79                 ret = NULL;
  80         }
  81
  82 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  83         if (ret && !strcmp(ret, "LOCALE")) {
  84                 const char *ln = NULL;
  85
  86 #ifdef HAVE_SETLOCALE
  87                 setlocale(LC_ALL, "");
  88 #endif
  89                 ln = nl_langinfo(CODESET);
  90                 if (ln) {
  91                         /* Check whether the charset name is supported
  92                            by iconv */
  93                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  94                         if (handle == (smb_iconv_t) -1) {
  95                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  96                                 ln = NULL;
  97                         } else {
  98                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  99                                 smb_iconv_close(handle);
 100                         }
 101                 }
 102                 ret = ln;
 103         }
 104 #endif
 105
 106         if (!ret || !*ret) ret = "ASCII";
 107         return ret;
 108 }
 109
 110 void lazy_initialize_conv(void)
 111 {
 112         if (!initialized) {
 113                 load_case_tables();
 114                 init_iconv();
 115                 initialized = true;
 116         }
 117 }
 118
 119 /**
 120  * Destroy global objects allocated by init_iconv()
 121  **/
 122 void gfree_charcnv(void)
 123 {
 124         int c1, c2;
 125
 126         for (c1=0;c1<NUM_CHARSETS;c1++) {
 127                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 128                         if ( conv_handles[c1][c2] ) {
 129                                 smb_iconv_close( conv_handles[c1][c2] );
 130                                 conv_handles[c1][c2] = 0;
 131                         }
 132                 }
 133         }
 134         initialized = false;
 135 }
 136
 137 /**
 138  * Initialize iconv conversion descriptors.
 139  *
 140  * This is called the first time it is needed, and also called again
 141  * every time the configuration is reloaded, because the charset or
 142  * codepage might have changed.
 143  **/
 144 void init_iconv(void)
 145 {
 146         int c1, c2;
 147         bool did_reload = False;
 148
 149         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 150            first */
 151         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 152                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 153
 154         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 155                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 156
 157         for (c1=0;c1<NUM_CHARSETS;c1++) {
 158                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 159                         const char *n1 = charset_name((charset_t)c1);
 160                         const char *n2 = charset_name((charset_t)c2);
 161                         if (conv_handles[c1][c2] &&
 162                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 163                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 164                                 continue;
 165
 166                         did_reload = True;
 167
 168                         if (conv_handles[c1][c2])
 169                                 smb_iconv_close(conv_handles[c1][c2]);
 170
 171                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 172                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 173                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 174                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 175                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 176                                         n1 = "ASCII";
 177                                 }
 178                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 179                                         n2 = "ASCII";
 180                                 }
 181                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 182                                         n1, n2 ));
 183                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 184                                 if (!conv_handles[c1][c2]) {
 185                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 186                                         smb_panic("init_iconv: conv_handle initialization failed");
 187                                 }
 188                         }
 189                 }
 190         }
 191
 192         if (did_reload) {
 193                 /* XXX: Does this really get called every time the dos
 194                  * codepage changes? */
 195                 /* XXX: Is the did_reload test too strict? */
 196                 conv_silent = True;
 197                 init_valid_table();
 198                 conv_silent = False;
 199         }
 200 }
 201
 202 /**
 203  * Convert string from one encoding to another, making error checking etc
 204  * Slow path version - uses (slow) iconv.
 205  *
 206  * @param src pointer to source string (multibyte or singlebyte)
 207  * @param srclen length of the source string in bytes
 208  * @param dest pointer to destination string (multibyte or singlebyte)
 209  * @param destlen maximal length allowed for string
 210  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 211  * @returns the number of bytes occupied in the destination
 212  *
 213  * Ensure the srclen contains the terminating zero.
 214  *
 215  **/
 216
 217 static size_t convert_string_internal(charset_t from, charset_t to,
 218                       void const *src, size_t srclen,
 219                       void *dest, size_t destlen, bool allow_bad_conv)
 220 {
 221         size_t i_len, o_len;
 222         size_t retval;
 223         const char* inbuf = (const char*)src;
 224         char* outbuf = (char*)dest;
 225         smb_iconv_t descriptor;
 226
 227         lazy_initialize_conv();
 228
 229         descriptor = conv_handles[from][to];
 230
 231         if (srclen == (size_t)-1) {
 232                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 233                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 234                 } else {
 235                         srclen = strlen((const char *)src)+1;
 236                 }
 237         }
 238
 239
 240         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 241                 if (!conv_silent)
 242                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 243                 return (size_t)-1;
 244         }
 245
 246         i_len=srclen;
 247         o_len=destlen;
 248
 249  again:
 250
 251         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 252         if(retval==(size_t)-1) {
 253                 const char *reason="unknown error";
 254                 switch(errno) {
 255                         case EINVAL:
 256                                 reason="Incomplete multibyte sequence";
 257                                 if (!conv_silent)
 258                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 259                                 if (allow_bad_conv)
 260                                         goto use_as_is;
 261                                 return (size_t)-1;
 262                         case E2BIG:
 263                                 reason="No more room";
 264                                 if (!conv_silent) {
 265                                         if (from == CH_UNIX) {
 266                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 267                                                         charset_name(from), charset_name(to),
 268                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 269                                         } else {
 270                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 271                                                         charset_name(from), charset_name(to),
 272                                                         (unsigned int)srclen, (unsigned int)destlen));
 273                                         }
 274                                 }
 275                                 break;
 276                         case EILSEQ:
 277                                 reason="Illegal multibyte sequence";
 278                                 if (!conv_silent)
 279                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 280                                 if (allow_bad_conv)
 281                                         goto use_as_is;
 282
 283                                 return (size_t)-1;
 284                         default:
 285                                 if (!conv_silent)
 286                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 287                                 return (size_t)-1;
 288                 }
 289                 /* smb_panic(reason); */
 290         }
 291         return destlen-o_len;
 292
 293  use_as_is:
 294
 295         /*
 296          * Conversion not supported. This is actually an error, but there are so
 297          * many misconfigured iconv systems and smb.conf's out there we can't just
 298          * fail. Do a very bad conversion instead.... JRA.
 299          */
 300
 301         {
 302                 if (o_len == 0 || i_len == 0)
 303                         return destlen - o_len;
 304
 305                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 306                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 307                         /* Can't convert from utf16 any endian to multibyte.
 308                            Replace with the default fail char.
 309                         */
 310                         if (i_len < 2)
 311                                 return destlen - o_len;
 312                         if (i_len >= 2) {
 313                                 *outbuf = lp_failed_convert_char();
 314
 315                                 outbuf++;
 316                                 o_len--;
 317
 318                                 inbuf += 2;
 319                                 i_len -= 2;
 320                         }
 321
 322                         if (o_len == 0 || i_len == 0)
 323                                 return destlen - o_len;
 324
 325                         /* Keep trying with the next char... */
 326                         goto again;
 327
 328                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 329                         /* Can't convert to UTF16LE - just widen by adding the
 330                            default fail char then zero.
 331                         */
 332                         if (o_len < 2)
 333                                 return destlen - o_len;
 334
 335                         outbuf[0] = lp_failed_convert_char();
 336                         outbuf[1] = '\0';
 337
 338                         inbuf++;
 339                         i_len--;
 340
 341                         outbuf += 2;
 342                         o_len -= 2;
 343
 344                         if (o_len == 0 || i_len == 0)
 345                                 return destlen - o_len;
 346
 347                         /* Keep trying with the next char... */
 348                         goto again;
 349
 350                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 351                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 352                         /* Failed multibyte to multibyte. Just copy the default fail char and
 353                                 try again. */
 354                         outbuf[0] = lp_failed_convert_char();
 355
 356                         inbuf++;
 357                         i_len--;
 358
 359                         outbuf++;
 360                         o_len--;
 361
 362                         if (o_len == 0 || i_len == 0)
 363                                 return destlen - o_len;
 364
 365                         /* Keep trying with the next char... */
 366                         goto again;
 367
 368                 } else {
 369                         /* Keep compiler happy.... */
 370                         return destlen - o_len;
 371                 }
 372         }
 373 }
 374
 375 /**
 376  * Convert string from one encoding to another, making error checking etc
 377  * Fast path version - handles ASCII first.
 378  *
 379  * @param src pointer to source string (multibyte or singlebyte)
 380  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 381  * @param dest pointer to destination string (multibyte or singlebyte)
 382  * @param destlen maximal length allowed for string - *NEVER* -1.
 383  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 384  * @returns the number of bytes occupied in the destination
 385  *
 386  * Ensure the srclen contains the terminating zero.
 387  *
 388  * This function has been hand-tuned to provide a fast path.
 389  * Don't change unless you really know what you are doing. JRA.
 390  **/
 391
 392 size_t convert_string(charset_t from, charset_t to,
 393                       void const *src, size_t srclen,
 394                       void *dest, size_t destlen, bool allow_bad_conv)
 395 {
 396         /*
 397          * NB. We deliberately don't do a strlen here if srclen == -1.
 398          * This is very expensive over millions of calls and is taken
 399          * care of in the slow path in convert_string_internal. JRA.
 400          */
 401
 402 #ifdef DEVELOPER
 403         SMB_ASSERT(destlen != (size_t)-1);
 404 #endif
 405
 406         if (srclen == 0)
 407                 return 0;
 408
 409         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 410                 const unsigned char *p = (const unsigned char *)src;
 411                 unsigned char *q = (unsigned char *)dest;
 412                 size_t slen = srclen;
 413                 size_t dlen = destlen;
 414                 unsigned char lastp = '\0';
 415                 size_t retval = 0;
 416
 417                 /* If all characters are ascii, fast path here. */
 418                 while (slen && dlen) {
 419                         if ((lastp = *p) <= 0x7f) {
 420                                 *q++ = *p++;
 421                                 if (slen != (size_t)-1) {
 422                                         slen--;
 423                                 }
 424                                 dlen--;
 425                                 retval++;
 426                                 if (!lastp)
 427                                         break;
 428                         } else {
 429 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 430                                 goto general_case;
 431 #else
 432                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 433                                 if (ret == (size_t)-1) {
 434                                         return ret;
 435                                 }
 436                                 return retval + ret;
 437 #endif
 438                         }
 439                 }
 440                 if (!dlen) {
 441                         /* Even if we fast path we should note if we ran out of room. */
 442                         if (((slen != (size_t)-1) && slen) ||
 443                                         ((slen == (size_t)-1) && lastp)) {
 444                                 errno = E2BIG;
 445                         }
 446                 }
 447                 return retval;
 448         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 449                 const unsigned char *p = (const unsigned char *)src;
 450                 unsigned char *q = (unsigned char *)dest;
 451                 size_t retval = 0;
 452                 size_t slen = srclen;
 453                 size_t dlen = destlen;
 454                 unsigned char lastp = '\0';
 455
 456                 /* If all characters are ascii, fast path here. */
 457                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 458                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 459                                 *q++ = *p;
 460                                 if (slen != (size_t)-1) {
 461                                         slen -= 2;
 462                                 }
 463                                 p += 2;
 464                                 dlen--;
 465                                 retval++;
 466                                 if (!lastp)
 467                                         break;
 468                         } else {
 469 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 470                                 goto general_case;
 471 #else
 472                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 473                                 if (ret == (size_t)-1) {
 474                                         return ret;
 475                                 }
 476                                 return retval + ret;
 477 #endif
 478                         }
 479                 }
 480                 if (!dlen) {
 481                         /* Even if we fast path we should note if we ran out of room. */
 482                         if (((slen != (size_t)-1) && slen) ||
 483                                         ((slen == (size_t)-1) && lastp)) {
 484                                 errno = E2BIG;
 485                         }
 486                 }
 487                 return retval;
 488         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 489                 const unsigned char *p = (const unsigned char *)src;
 490                 unsigned char *q = (unsigned char *)dest;
 491                 size_t retval = 0;
 492                 size_t slen = srclen;
 493                 size_t dlen = destlen;
 494                 unsigned char lastp = '\0';
 495
 496                 /* If all characters are ascii, fast path here. */
 497                 while (slen && (dlen >= 2)) {
 498                         if ((lastp = *p) <= 0x7F) {
 499                                 *q++ = *p++;
 500                                 *q++ = '\0';
 501                                 if (slen != (size_t)-1) {
 502                                         slen--;
 503                                 }
 504                                 dlen -= 2;
 505                                 retval += 2;
 506                                 if (!lastp)
 507                                         break;
 508                         } else {
 509 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 510                                 goto general_case;
 511 #else
 512                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 513                                 if (ret == (size_t)-1) {
 514                                         return ret;
 515                                 }
 516                                 return retval + ret;
 517 #endif
 518                         }
 519                 }
 520                 if (!dlen) {
 521                         /* Even if we fast path we should note if we ran out of room. */
 522                         if (((slen != (size_t)-1) && slen) ||
 523                                         ((slen == (size_t)-1) && lastp)) {
 524                                 errno = E2BIG;
 525                         }
 526                 }
 527                 return retval;
 528         }
 529
 530 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 531   general_case:
 532 #endif
 533         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 534 }
 535
 536 /**
 537  * Convert between character sets, allocating a new buffer using talloc for the result.
 538  *
 539  * @param srclen length of source buffer.
 540  * @param dest always set at least to NULL
 541  * @parm converted_size set to the number of bytes occupied by the string in
 542  * the destination on success.
 543  * @note -1 is not accepted for srclen.
 544  *
 545  * @return true if new buffer was correctly allocated, and string was
 546  * converted.
 547  *
 548  * Ensure the srclen contains the terminating zero.
 549  *
 550  * I hate the goto's in this function. It's embarressing.....
 551  * There has to be a cleaner way to do this. JRA.
 552  */
 553 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 554                            void const *src, size_t srclen, void *dst,
 555                            size_t *converted_size, bool allow_bad_conv)
 556
 557 {
 558         size_t i_len, o_len, destlen = (srclen * 3) / 2;
 559         size_t retval;
 560         const char *inbuf = (const char *)src;
 561         char *outbuf = NULL, *ob = NULL;
 562         smb_iconv_t descriptor;
 563         void **dest = (void **)dst;
 564
 565         *dest = NULL;
 566
 567         if (!converted_size) {
 568                 errno = EINVAL;
 569                 return false;
 570         }
 571
 572         if (src == NULL || srclen == (size_t)-1) {
 573                 errno = EINVAL;
 574                 return false;
 575         }
 576
 577         if (srclen == 0) {
 578                 /* We really should treat this as an error, but
 579                    there are too many callers that need this to
 580                    return a NULL terminated string in the correct
 581                    character set. */
 582                 if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
 583                         destlen = 2;
 584                 } else {
 585                         destlen = 1;
 586                 }
 587                 ob = talloc_zero_array(ctx, char, destlen);
 588                 if (ob == NULL) {
 589                         errno = ENOMEM;
 590                         return false;
 591                 }
 592                 *converted_size = destlen;
 593                 *dest = ob;
 594                 return true;
 595         }
 596
 597         lazy_initialize_conv();
 598
 599         descriptor = conv_handles[from][to];
 600
 601         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 602                 if (!conv_silent)
 603                         DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
 604                 errno = EOPNOTSUPP;
 605                 return false;
 606         }
 607
 608   convert:
 609
 610         /* +2 is for ucs2 null termination. */
 611         if ((destlen*2)+2 < destlen) {
 612                 /* wrapped ! abort. */
 613                 if (!conv_silent)
 614                         DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
 615                 TALLOC_FREE(outbuf);
 616                 errno = EOPNOTSUPP;
 617                 return false;
 618         } else {
 619                 destlen = destlen * 2;
 620         }
 621
 622         /* +2 is for ucs2 null termination. */
 623         ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
 624
 625         if (!ob) {
 626                 DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
 627                 errno = ENOMEM;
 628                 return false;
 629         }
 630         outbuf = ob;
 631         i_len = srclen;
 632         o_len = destlen;
 633
 634  again:
 635
 636         retval = smb_iconv(descriptor,
 637                            &inbuf, &i_len,
 638                            &outbuf, &o_len);
 639         if(retval == (size_t)-1)                {
 640                 const char *reason="unknown error";
 641                 switch(errno) {
 642                         case EINVAL:
 643                                 reason="Incomplete multibyte sequence";
 644                                 if (!conv_silent)
 645                                         DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
 646                                 if (allow_bad_conv)
 647                                         goto use_as_is;
 648                                 break;
 649                         case E2BIG:
 650                                 goto convert;
 651                         case EILSEQ:
 652                                 reason="Illegal multibyte sequence";
 653                                 if (!conv_silent)
 654                                         DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
 655                                 if (allow_bad_conv)
 656                                         goto use_as_is;
 657                                 break;
 658                 }
 659                 if (!conv_silent)
 660                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 661                 /* smb_panic(reason); */
 662                 TALLOC_FREE(ob);
 663                 return false;
 664         }
 665
 666   out:
 667
 668         destlen = destlen - o_len;
 669         /* Don't shrink unless we're reclaiming a lot of
 670          * space. This is in the hot codepath and these
 671          * reallocs *cost*. JRA.
 672          */
 673         if (o_len > 1024) {
 674                 /* We're shrinking here so we know the +2 is safe from wrap. */
 675                 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
 676         }
 677
 678         if (destlen && !ob) {
 679                 DEBUG(0, ("convert_string_talloc: out of memory!\n"));
 680                 errno = ENOMEM;
 681                 return false;
 682         }
 683
 684         *dest = ob;
 685
 686         /* Must ucs2 null terminate in the extra space we allocated. */
 687         ob[destlen] = '\0';
 688         ob[destlen+1] = '\0';
 689
 690         /* Ensure we can never return a *converted_size of zero. */
 691         if (destlen == 0) {
 692                 /* This can happen from a bad iconv "use_as_is:" call. */
 693                 if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
 694                         destlen = 2;
 695                 } else {
 696                         destlen = 1;
 697                 }
 698         }
 699
 700         *converted_size = destlen;
 701         return true;
 702
 703  use_as_is:
 704
 705         /*
 706          * Conversion not supported. This is actually an error, but there are so
 707          * many misconfigured iconv systems and smb.conf's out there we can't just
 708          * fail. Do a very bad conversion instead.... JRA.
 709          */
 710
 711         {
 712                 if (o_len == 0 || i_len == 0)
 713                         goto out;
 714
 715                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 716                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 717                         /* Can't convert from utf16 any endian to multibyte.
 718                            Replace with the default fail char.
 719                         */
 720
 721                         if (i_len < 2)
 722                                 goto out;
 723
 724                         if (i_len >= 2) {
 725                                 *outbuf = lp_failed_convert_char();
 726
 727                                 outbuf++;
 728                                 o_len--;
 729
 730                                 inbuf += 2;
 731                                 i_len -= 2;
 732                         }
 733
 734                         if (o_len == 0 || i_len == 0)
 735                                 goto out;
 736
 737                         /* Keep trying with the next char... */
 738                         goto again;
 739
 740                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 741                         /* Can't convert to UTF16LE - just widen by adding the
 742                            default fail char then zero.
 743                         */
 744                         if (o_len < 2)
 745                                 goto out;
 746
 747                         outbuf[0] = lp_failed_convert_char();
 748                         outbuf[1] = '\0';
 749
 750                         inbuf++;
 751                         i_len--;
 752
 753                         outbuf += 2;
 754                         o_len -= 2;
 755
 756                         if (o_len == 0 || i_len == 0)
 757                                 goto out;
 758
 759                         /* Keep trying with the next char... */
 760                         goto again;
 761
 762                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 763                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 764                         /* Failed multibyte to multibyte. Just copy the default fail char and
 765                            try again. */
 766                         outbuf[0] = lp_failed_convert_char();
 767
 768                         inbuf++;
 769                         i_len--;
 770
 771                         outbuf++;
 772                         o_len--;
 773
 774                         if (o_len == 0 || i_len == 0)
 775                                 goto out;
 776
 777                         /* Keep trying with the next char... */
 778                         goto again;
 779
 780                 } else {
 781                         /* Keep compiler happy.... */
 782                         goto out;
 783                 }
 784         }
 785 }
 786
 787 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 788 {
 789         size_t size;
 790         smb_ucs2_t *buffer;
 791
 792         if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
 793                 return (size_t)-1;
 794         }
 795
 796         if (!strupper_w(buffer) && (dest == src)) {
 797                 TALLOC_FREE(buffer);
 798                 return srclen;
 799         }
 800
 801         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 802         TALLOC_FREE(buffer);
 803         return size;
 804 }
 805
 806 /**
 807  talloc_strdup() a unix string to upper case.
 808 **/
 809
 810 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 811 {
 812         char *out_buffer = talloc_strdup(ctx,s);
 813         const unsigned char *p = (const unsigned char *)s;
 814         unsigned char *q = (unsigned char *)out_buffer;
 815
 816         if (!q) {
 817                 return NULL;
 818         }
 819
 820         /* this is quite a common operation, so we want it to be
 821            fast. We optimise for the ascii case, knowing that all our
 822            supported multi-byte character sets are ascii-compatible
 823            (ie. they match for the first 128 chars) */
 824
 825         while (*p) {
 826                 if (*p & 0x80)
 827                         break;
 828                 *q++ = toupper_ascii_fast(*p);
 829                 p++;
 830         }
 831
 832         if (*p) {
 833                 /* MB case. */
 834                 size_t converted_size, converted_size2;
 835                 smb_ucs2_t *ubuf = NULL;
 836
 837                 /* We're not using the ascii buffer above. */
 838                 TALLOC_FREE(out_buffer);
 839
 840                 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
 841                                            strlen(s)+1, (void *)&ubuf,
 842                                            &converted_size, True))
 843                 {
 844                         return NULL;
 845                 }
 846
 847                 strupper_w(ubuf);
 848
 849                 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
 850                                            converted_size, (void *)&out_buffer,
 851                                            &converted_size2, True))
 852                 {
 853                         TALLOC_FREE(ubuf);
 854                         return NULL;
 855                 }
 856
 857                 /* Don't need the intermediate buffer
 858                  * anymore.
 859                  */
 860                 TALLOC_FREE(ubuf);
 861         }
 862
 863         return out_buffer;
 864 }
 865
 866 char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
 867         return talloc_strdup_upper(ctx, s);
 868 }
 869
 870
 871 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 872 {
 873         size_t size;
 874         smb_ucs2_t *buffer = NULL;
 875
 876         if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
 877                                    (void **)(void *)&buffer, &size,
 878                                    True))
 879         {
 880                 smb_panic("failed to create UCS2 buffer");
 881         }
 882         if (!strlower_w(buffer) && (dest == src)) {
 883                 TALLOC_FREE(buffer);
 884                 return srclen;
 885         }
 886         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 887         TALLOC_FREE(buffer);
 888         return size;
 889 }
 890
 891
 892 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
 893 {
 894         size_t converted_size;
 895         smb_ucs2_t *buffer = NULL;
 896         char *out_buffer;
 897
 898         if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
 899                 return NULL;
 900         }
 901
 902         strlower_w(buffer);
 903
 904         if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
 905                 TALLOC_FREE(buffer);
 906                 return NULL;
 907         }
 908
 909         TALLOC_FREE(buffer);
 910
 911         return out_buffer;
 912 }
 913
 914 char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
 915         return talloc_strdup_lower(ctx, s);
 916 }
 917
 918 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 919 {
 920         if (flags & (STR_NOALIGN|STR_ASCII))
 921                 return 0;
 922         return PTR_DIFF(p, base_ptr) & 1;
 923 }
 924
 925
 926 /**
 927  * Copy a string from a char* unix src to a dos codepage string destination.
 928  *
 929  * @return the number of bytes occupied by the string in the destination.
 930  *
 931  * @param flags can include
 932  * <dl>
 933  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 934  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 935  * </dl>
 936  *
 937  * @param dest_len the maximum length in bytes allowed in the
 938  * destination.
 939  **/
 940 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 941 {
 942         size_t src_len = strlen(src);
 943         char *tmpbuf = NULL;
 944         size_t ret;
 945
 946         /* No longer allow a length of -1. */
 947         if (dest_len == (size_t)-1) {
 948                 smb_panic("push_ascii - dest_len == -1");
 949         }
 950
 951         if (flags & STR_UPPER) {
 952                 tmpbuf = SMB_STRDUP(src);
 953                 if (!tmpbuf) {
 954                         smb_panic("malloc fail");
 955                 }
 956                 strupper_m(tmpbuf);
 957                 src = tmpbuf;
 958         }
 959
 960         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 961                 src_len++;
 962         }
 963
 964         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 965         if (ret == (size_t)-1 &&
 966                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 967                         && dest_len > 0) {
 968                 ((char *)dest)[0] = '\0';
 969         }
 970         SAFE_FREE(tmpbuf);
 971         return ret;
 972 }
 973
 974 size_t push_ascii_fstring(void *dest, const char *src)
 975 {
 976         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 977 }
 978
 979 /********************************************************************
 980  Push an nstring - ensure null terminated. Written by
 981  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 982 ********************************************************************/
 983
 984 size_t push_ascii_nstring(void *dest, const char *src)
 985 {
 986         size_t i, buffer_len, dest_len;
 987         smb_ucs2_t *buffer;
 988
 989         conv_silent = True;
 990         if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &buffer_len)) {
 991                 smb_panic("failed to create UCS2 buffer");
 992         }
 993
 994         /* We're using buffer_len below to count ucs2 characters, not bytes. */
 995         buffer_len /= sizeof(smb_ucs2_t);
 996
 997         dest_len = 0;
 998         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
 999                 unsigned char mb[10];
1000                 /* Convert one smb_ucs2_t character at a time. */
1001                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1002                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1003                         memcpy((char *)dest + dest_len, mb, mb_len);
1004                         dest_len += mb_len;
1005                 } else {
1006                         errno = E2BIG;
1007                         break;
1008                 }
1009         }
1010         ((char *)dest)[dest_len] = '\0';
1011
1012         conv_silent = False;
1013         TALLOC_FREE(buffer);
1014         return dest_len;
1015 }
1016
1017 /********************************************************************
1018  Push and malloc an ascii string. src and dest null terminated.
1019 ********************************************************************/
1020
1021 bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
1022 {
1023         size_t src_len = strlen(src)+1;
1024
1025         *dest = NULL;
1026         return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
1027                                      (void **)dest, converted_size, True);
1028 }
1029
1030 /**
1031  * Copy a string from a dos codepage source to a unix char* destination.
1032  *
1033  * The resulting string in "dest" is always null terminated.
1034  *
1035  * @param flags can have:
1036  * <dl>
1037  * <dt>STR_TERMINATE</dt>
1038  * <dd>STR_TERMINATE means the string in @p src
1039  * is null terminated, and src_len is ignored.</dd>
1040  * </dl>
1041  *
1042  * @param src_len is the length of the source area in bytes.
1043  * @returns the number of bytes occupied by the string in @p src.
1044  **/
1045 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1046 {
1047         size_t ret;
1048
1049         if (dest_len == (size_t)-1) {
1050                 /* No longer allow dest_len of -1. */
1051                 smb_panic("pull_ascii - invalid dest_len of -1");
1052         }
1053
1054         if (flags & STR_TERMINATE) {
1055                 if (src_len == (size_t)-1) {
1056                         src_len = strlen((const char *)src) + 1;
1057                 } else {
1058                         size_t len = strnlen((const char *)src, src_len);
1059                         if (len < src_len)
1060                                 len++;
1061                         src_len = len;
1062                 }
1063         }
1064
1065         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1066         if (ret == (size_t)-1) {
1067                 ret = 0;
1068                 dest_len = 0;
1069         }
1070
1071         if (dest_len && ret) {
1072                 /* Did we already process the terminating zero ? */
1073                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1074                         dest[MIN(ret, dest_len-1)] = 0;
1075                 }
1076         } else  {
1077                 dest[0] = 0;
1078         }
1079
1080         return src_len;
1081 }
1082
1083 /**
1084  * Copy a string from a dos codepage source to a unix char* destination.
1085  * Talloc version.
1086  *
1087  * The resulting string in "dest" is always null terminated.
1088  *
1089  * @param flags can have:
1090  * <dl>
1091  * <dt>STR_TERMINATE</dt>
1092  * <dd>STR_TERMINATE means the string in @p src
1093  * is null terminated, and src_len is ignored.</dd>
1094  * </dl>
1095  *
1096  * @param src_len is the length of the source area in bytes.
1097  * @returns the number of bytes occupied by the string in @p src.
1098  **/
1099
1100 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1101                                      char **ppdest,
1102                                      const void *src,
1103                                      size_t src_len,
1104                                      int flags)
1105 {
1106         char *dest = NULL;
1107         size_t dest_len;
1108
1109         *ppdest = NULL;
1110
1111         if (!src_len) {
1112                 return 0;
1113         }
1114
1115         if (flags & STR_TERMINATE) {
1116                 if (src_len == (size_t)-1) {
1117                         src_len = strlen((const char *)src) + 1;
1118                 } else {
1119                         size_t len = strnlen((const char *)src, src_len);
1120                         if (len < src_len)
1121                                 len++;
1122                         src_len = len;
1123                 }
1124                 /* Ensure we don't use an insane length from the client. */
1125                 if (src_len >= 1024*1024) {
1126                         char *msg = talloc_asprintf(ctx,
1127                                         "Bad src length (%u) in "
1128                                         "pull_ascii_base_talloc",
1129                                         (unsigned int)src_len);
1130                         smb_panic(msg);
1131                 }
1132         } else {
1133                 /* Can't have an unlimited length
1134                  * non STR_TERMINATE'd.
1135                  */
1136                 if (src_len == (size_t)-1) {
1137                         errno = EINVAL;
1138                         return 0;
1139                 }
1140         }
1141
1142         /* src_len != -1 here. */
1143
1144         if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1145                                      &dest_len, True)) {
1146                 dest_len = 0;
1147         }
1148
1149         if (dest_len && dest) {
1150                 /* Did we already process the terminating zero ? */
1151                 if (dest[dest_len-1] != 0) {
1152                         size_t size = talloc_get_size(dest);
1153                         /* Have we got space to append the '\0' ? */
1154                         if (size <= dest_len) {
1155                                 /* No, realloc. */
1156                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1157                                                 dest_len+1);
1158                                 if (!dest) {
1159                                         /* talloc fail. */
1160                                         dest_len = (size_t)-1;
1161                                         return 0;
1162                                 }
1163                         }
1164                         /* Yay - space ! */
1165                         dest[dest_len] = '\0';
1166                         dest_len++;
1167                 }
1168         } else if (dest) {
1169                 dest[0] = 0;
1170         }
1171
1172         *ppdest = dest;
1173         return src_len;
1174 }
1175
1176 size_t pull_ascii_fstring(char *dest, const void *src)
1177 {
1178         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1179 }
1180
1181 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1182
1183 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1184 {
1185         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1186 }
1187
1188 /**
1189  * Copy a string from a char* src to a unicode destination.
1190  *
1191  * @returns the number of bytes occupied by the string in the destination.
1192  *
1193  * @param flags can have:
1194  *
1195  * <dl>
1196  * <dt>STR_TERMINATE <dd>means include the null termination.
1197  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1198  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1199  * </dl>
1200  *
1201  * @param dest_len is the maximum length allowed in the
1202  * destination.
1203  **/
1204
1205 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1206 {
1207         size_t len=0;
1208         size_t src_len;
1209         size_t ret;
1210
1211         if (dest_len == (size_t)-1) {
1212                 /* No longer allow dest_len of -1. */
1213                 smb_panic("push_ucs2 - invalid dest_len of -1");
1214         }
1215
1216         if (flags & STR_TERMINATE)
1217                 src_len = (size_t)-1;
1218         else
1219                 src_len = strlen(src);
1220
1221         if (ucs2_align(base_ptr, dest, flags)) {
1222                 *(char *)dest = 0;
1223                 dest = (void *)((char *)dest + 1);
1224                 if (dest_len)
1225                         dest_len--;
1226                 len++;
1227         }
1228
1229         /* ucs2 is always a multiple of 2 bytes */
1230         dest_len &= ~1;
1231
1232         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1233         if (ret == (size_t)-1) {
1234                 if ((flags & STR_TERMINATE) &&
1235                                 dest &&
1236                                 dest_len) {
1237                         *(char *)dest = 0;
1238                 }
1239                 return len;
1240         }
1241
1242         len += ret;
1243
1244         if (flags & STR_UPPER) {
1245                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1246                 size_t i;
1247
1248                 /* We check for i < (ret / 2) below as the dest string isn't null
1249                    terminated if STR_TERMINATE isn't set. */
1250
1251                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1252                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1253                         if (v != dest_ucs2[i]) {
1254                                 dest_ucs2[i] = v;
1255                         }
1256                 }
1257         }
1258
1259         return len;
1260 }
1261
1262
1263 /**
1264  * Copy a string from a unix char* src to a UCS2 destination,
1265  * allocating a buffer using talloc().
1266  *
1267  * @param dest always set at least to NULL
1268  * @parm converted_size set to the number of bytes occupied by the string in
1269  * the destination on success.
1270  *
1271  * @return true if new buffer was correctly allocated, and string was
1272  * converted.
1273  **/
1274 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1275                       size_t *converted_size)
1276 {
1277         size_t src_len = strlen(src)+1;
1278
1279         *dest = NULL;
1280         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1281                                      (void **)dest, converted_size, True);
1282 }
1283
1284
1285 /**
1286  Copy a string from a char* src to a UTF-8 destination.
1287  Return the number of bytes occupied by the string in the destination
1288  Flags can have:
1289   STR_TERMINATE means include the null termination
1290   STR_UPPER     means uppercase in the destination
1291  dest_len is the maximum length allowed in the destination. If dest_len
1292  is -1 then no maxiumum is used.
1293 **/
1294
1295 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1296 {
1297         size_t src_len = 0;
1298         size_t ret;
1299         char *tmpbuf = NULL;
1300
1301         if (dest_len == (size_t)-1) {
1302                 /* No longer allow dest_len of -1. */
1303                 smb_panic("push_utf8 - invalid dest_len of -1");
1304         }
1305
1306         if (flags & STR_UPPER) {
1307                 tmpbuf = strupper_talloc(talloc_tos(), src);
1308                 if (!tmpbuf) {
1309                         return (size_t)-1;
1310                 }
1311                 src = tmpbuf;
1312                 src_len = strlen(src);
1313         }
1314
1315         src_len = strlen(src);
1316         if (flags & STR_TERMINATE) {
1317                 src_len++;
1318         }
1319
1320         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1321         TALLOC_FREE(tmpbuf);
1322         return ret;
1323 }
1324
1325 size_t push_utf8_fstring(void *dest, const char *src)
1326 {
1327         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1328 }
1329
1330 /**
1331  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1332  *
1333  * @param dest always set at least to NULL
1334  * @parm converted_size set to the number of bytes occupied by the string in
1335  * the destination on success.
1336  *
1337  * @return true if new buffer was correctly allocated, and string was
1338  * converted.
1339  **/
1340
1341 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1342                       size_t *converted_size)
1343 {
1344         size_t src_len = strlen(src)+1;
1345
1346         *dest = NULL;
1347         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1348                                      (void**)dest, converted_size, True);
1349 }
1350
1351 /**
1352  Copy a string from a ucs2 source to a unix char* destination.
1353  Flags can have:
1354   STR_TERMINATE means the string in src is null terminated.
1355   STR_NOALIGN   means don't try to align.
1356  if STR_TERMINATE is set then src_len is ignored if it is -1.
1357  src_len is the length of the source area in bytes
1358  Return the number of bytes occupied by the string in src.
1359  The resulting string in "dest" is always null terminated.
1360 **/
1361
1362 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1363 {
1364         size_t ret;
1365         size_t ucs2_align_len = 0;
1366
1367         if (dest_len == (size_t)-1) {
1368                 /* No longer allow dest_len of -1. */
1369                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1370         }
1371
1372         if (!src_len) {
1373                 if (dest && dest_len > 0) {
1374                         dest[0] = '\0';
1375                 }
1376                 return 0;
1377         }
1378
1379         if (ucs2_align(base_ptr, src, flags)) {
1380                 src = (const void *)((const char *)src + 1);
1381                 if (src_len != (size_t)-1)
1382                         src_len--;
1383                 ucs2_align_len = 1;
1384         }
1385
1386         if (flags & STR_TERMINATE) {
1387                 /* src_len -1 is the default for null terminated strings. */
1388                 if (src_len != (size_t)-1) {
1389                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1390                                                 src_len/2);
1391                         if (len < src_len/2)
1392                                 len++;
1393                         src_len = len*2;
1394                 }
1395         }
1396
1397         /* ucs2 is always a multiple of 2 bytes */
1398         if (src_len != (size_t)-1)
1399                 src_len &= ~1;
1400
1401         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1402         if (ret == (size_t)-1) {
1403                 ret = 0;
1404                 dest_len = 0;
1405         }
1406
1407         if (src_len == (size_t)-1)
1408                 src_len = ret*2;
1409
1410         if (dest_len && ret) {
1411                 /* Did we already process the terminating zero ? */
1412                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1413                         dest[MIN(ret, dest_len-1)] = 0;
1414                 }
1415         } else {
1416                 dest[0] = 0;
1417         }
1418
1419         return src_len + ucs2_align_len;
1420 }
1421
1422 /**
1423  Copy a string from a ucs2 source to a unix char* destination.
1424  Talloc version with a base pointer.
1425  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1426  needs fixing. JRA).
1427  Flags can have:
1428   STR_TERMINATE means the string in src is null terminated.
1429   STR_NOALIGN   means don't try to align.
1430  if STR_TERMINATE is set then src_len is ignored if it is -1.
1431  src_len is the length of the source area in bytes
1432  Return the number of bytes occupied by the string in src.
1433  The resulting string in "dest" is always null terminated.
1434 **/
1435
1436 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1437                         const void *base_ptr,
1438                         char **ppdest,
1439                         const void *src,
1440                         size_t src_len,
1441                         int flags)
1442 {
1443         char *dest;
1444         size_t dest_len;
1445         size_t ucs2_align_len = 0;
1446
1447         *ppdest = NULL;
1448
1449 #ifdef DEVELOPER
1450         /* Ensure we never use the braindead "malloc" varient. */
1451         if (ctx == NULL) {
1452                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1453         }
1454 #endif
1455
1456         if (!src_len) {
1457                 return 0;
1458         }
1459
1460         if (ucs2_align(base_ptr, src, flags)) {
1461                 src = (const void *)((const char *)src + 1);
1462                 if (src_len != (size_t)-1)
1463                         src_len--;
1464                 ucs2_align_len = 1;
1465         }
1466
1467         if (flags & STR_TERMINATE) {
1468                 /* src_len -1 is the default for null terminated strings. */
1469                 if (src_len != (size_t)-1) {
1470                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1471                                                 src_len/2);
1472                         if (len < src_len/2)
1473                                 len++;
1474                         src_len = len*2;
1475                 } else {
1476                         /*
1477                          * src_len == -1 - alloc interface won't take this
1478                          * so we must calculate.
1479                          */
1480                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1481                 }
1482                 /* Ensure we don't use an insane length from the client. */
1483                 if (src_len >= 1024*1024) {
1484                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1485                 }
1486         } else {
1487                 /* Can't have an unlimited length
1488                  * non STR_TERMINATE'd.
1489                  */
1490                 if (src_len == (size_t)-1) {
1491                         errno = EINVAL;
1492                         return 0;
1493                 }
1494         }
1495
1496         /* src_len != -1 here. */
1497
1498         /* ucs2 is always a multiple of 2 bytes */
1499         src_len &= ~1;
1500
1501         if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1502                                    (void *)&dest, &dest_len, True)) {
1503                 dest_len = 0;
1504         }
1505
1506         if (dest_len) {
1507                 /* Did we already process the terminating zero ? */
1508                 if (dest[dest_len-1] != 0) {
1509                         size_t size = talloc_get_size(dest);
1510                         /* Have we got space to append the '\0' ? */
1511                         if (size <= dest_len) {
1512                                 /* No, realloc. */
1513                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1514                                                 dest_len+1);
1515                                 if (!dest) {
1516                                         /* talloc fail. */
1517                                         dest_len = (size_t)-1;
1518                                         return 0;
1519                                 }
1520                         }
1521                         /* Yay - space ! */
1522                         dest[dest_len] = '\0';
1523                         dest_len++;
1524                 }
1525         } else if (dest) {
1526                 dest[0] = 0;
1527         }
1528
1529         *ppdest = dest;
1530         return src_len + ucs2_align_len;
1531 }
1532
1533 size_t pull_ucs2_fstring(char *dest, const void *src)
1534 {
1535         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1536 }
1537
1538 /**
1539  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1540  *
1541  * @param dest always set at least to NULL
1542  * @parm converted_size set to the number of bytes occupied by the string in
1543  * the destination on success.
1544  *
1545  * @return true if new buffer was correctly allocated, and string was
1546  * converted.
1547  **/
1548
1549 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1550                       size_t *converted_size)
1551 {
1552         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1553
1554         *dest = NULL;
1555         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1556                                      (void **)dest, converted_size, True);
1557 }
1558
1559 /**
1560  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1561  *
1562  * @param dest always set at least to NULL
1563  * @parm converted_size set to the number of bytes occupied by the string in
1564  * the destination on success.
1565  *
1566  * @return true if new buffer was correctly allocated, and string was
1567  * converted.
1568  **/
1569
1570 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1571                       size_t *converted_size)
1572 {
1573         size_t src_len = strlen(src)+1;
1574
1575         *dest = NULL;
1576         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1577                                      (void **)dest, converted_size, True);
1578 }
1579
1580
1581 /**
1582  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1583  *
1584  * @param dest always set at least to NULL
1585  * @parm converted_size set to the number of bytes occupied by the string in
1586  * the destination on success.
1587  *
1588  * @return true if new buffer was correctly allocated, and string was
1589  * converted.
1590  **/
1591
1592 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1593                        size_t *converted_size)
1594 {
1595         size_t src_len = strlen(src)+1;
1596
1597         *dest = NULL;
1598         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1599                                      (void **)dest, converted_size, True);
1600 }
1601
1602 /**
1603  Copy a string from a char* src to a unicode or ascii
1604  dos codepage destination choosing unicode or ascii based on the
1605  flags supplied
1606  Return the number of bytes occupied by the string in the destination.
1607  flags can have:
1608   STR_TERMINATE means include the null termination.
1609   STR_UPPER     means uppercase in the destination.
1610   STR_ASCII     use ascii even with unicode packet.
1611   STR_NOALIGN   means don't do alignment.
1612  dest_len is the maximum length allowed in the destination. If dest_len
1613  is -1 then no maxiumum is used.
1614 **/
1615
1616 size_t push_string_check_fn(const char *function, unsigned int line,
1617                             void *dest, const char *src,
1618                             size_t dest_len, int flags)
1619 {
1620 #ifdef DEVELOPER
1621         /* We really need to zero fill here, not clobber
1622          * region, as we want to ensure that valgrind thinks
1623          * all of the outgoing buffer has been written to
1624          * so a send() or write() won't trap an error.
1625          * JRA.
1626          */
1627 #if 0
1628         clobber_region(function, line, dest, dest_len);
1629 #else
1630         memset(dest, '\0', dest_len);
1631 #endif
1632 #endif
1633
1634         if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1635                 return push_ucs2(NULL, dest, src, dest_len, flags);
1636         }
1637         return push_ascii(dest, src, dest_len, flags);
1638 }
1639
1640
1641 /**
1642  Copy a string from a char* src to a unicode or ascii
1643  dos codepage destination choosing unicode or ascii based on the
1644  flags in the SMB buffer starting at base_ptr.
1645  Return the number of bytes occupied by the string in the destination.
1646  flags can have:
1647   STR_TERMINATE means include the null termination.
1648   STR_UPPER     means uppercase in the destination.
1649   STR_ASCII     use ascii even with unicode packet.
1650   STR_NOALIGN   means don't do alignment.
1651  dest_len is the maximum length allowed in the destination. If dest_len
1652  is -1 then no maxiumum is used.
1653 **/
1654
1655 size_t push_string_base(const char *function, unsigned int line,
1656                         const char *base, uint16 flags2,
1657                         void *dest, const char *src,
1658                         size_t dest_len, int flags)
1659 {
1660 #ifdef DEVELOPER
1661         /* We really need to zero fill here, not clobber
1662          * region, as we want to ensure that valgrind thinks
1663          * all of the outgoing buffer has been written to
1664          * so a send() or write() won't trap an error.
1665          * JRA.
1666          */
1667 #if 0
1668         clobber_region(function, line, dest, dest_len);
1669 #else
1670         memset(dest, '\0', dest_len);
1671 #endif
1672 #endif
1673
1674         if (!(flags & STR_ASCII) && \
1675             ((flags & STR_UNICODE || \
1676               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1677                 return push_ucs2(base, dest, src, dest_len, flags);
1678         }
1679         return push_ascii(dest, src, dest_len, flags);
1680 }
1681
1682 /**
1683  Copy a string from a char* src to a unicode or ascii
1684  dos codepage destination choosing unicode or ascii based on the
1685  flags supplied
1686  Return the number of bytes occupied by the string in the destination.
1687  flags can have:
1688   STR_TERMINATE means include the null termination.
1689   STR_UPPER     means uppercase in the destination.
1690   STR_ASCII     use ascii even with unicode packet.
1691   STR_NOALIGN   means don't do alignment.
1692  dest_len is the maximum length allowed in the destination. If dest_len
1693  is -1 then no maxiumum is used.
1694 **/
1695
1696 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
1697 {
1698         size_t ret;
1699 #ifdef DEVELOPER
1700         /* We really need to zero fill here, not clobber
1701          * region, as we want to ensure that valgrind thinks
1702          * all of the outgoing buffer has been written to
1703          * so a send() or write() won't trap an error.
1704          * JRA.
1705          */
1706         memset(dest, '\0', dest_len);
1707 #endif
1708
1709         if (!(flags & STR_ASCII) && \
1710             (flags & STR_UNICODE)) {
1711                 ret = push_ucs2(NULL, dest, src, dest_len, flags);
1712         } else {
1713                 ret = push_ascii(dest, src, dest_len, flags);
1714         }
1715         if (ret == (size_t)-1) {
1716                 return -1;
1717         }
1718         return ret;
1719 }
1720
1721 /**
1722  Copy a string from a unicode or ascii source (depending on
1723  the packet flags) to a char* destination.
1724  Flags can have:
1725   STR_TERMINATE means the string in src is null terminated.
1726   STR_UNICODE   means to force as unicode.
1727   STR_ASCII     use ascii even with unicode packet.
1728   STR_NOALIGN   means don't do alignment.
1729  if STR_TERMINATE is set then src_len is ignored is it is -1
1730  src_len is the length of the source area in bytes.
1731  Return the number of bytes occupied by the string in src.
1732  The resulting string in "dest" is always null terminated.
1733 **/
1734
1735 size_t pull_string_fn(const char *function,
1736                         unsigned int line,
1737                         const void *base_ptr,
1738                         uint16 smb_flags2,
1739                         char *dest,
1740                         const void *src,
1741                         size_t dest_len,
1742                         size_t src_len,
1743                         int flags)
1744 {
1745 #ifdef DEVELOPER
1746         clobber_region(function, line, dest, dest_len);
1747 #endif
1748
1749         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1750                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1751                           "UNICODE defined");
1752         }
1753
1754         if (!(flags & STR_ASCII) && \
1755             ((flags & STR_UNICODE || \
1756               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1757                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1758         }
1759         return pull_ascii(dest, src, dest_len, src_len, flags);
1760 }
1761
1762 /**
1763  Copy a string from a unicode or ascii source (depending on
1764  the packet flags) to a char* destination.
1765  Variant that uses talloc.
1766  Flags can have:
1767   STR_TERMINATE means the string in src is null terminated.
1768   STR_UNICODE   means to force as unicode.
1769   STR_ASCII     use ascii even with unicode packet.
1770   STR_NOALIGN   means don't do alignment.
1771  if STR_TERMINATE is set then src_len is ignored is it is -1
1772  src_len is the length of the source area in bytes.
1773  Return the number of bytes occupied by the string in src.
1774  The resulting string in "dest" is always null terminated.
1775 **/
1776
1777 size_t pull_string_talloc_fn(const char *function,
1778                         unsigned int line,
1779                         TALLOC_CTX *ctx,
1780                         const void *base_ptr,
1781                         uint16 smb_flags2,
1782                         char **ppdest,
1783                         const void *src,
1784                         size_t src_len,
1785                         int flags)
1786 {
1787         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1788                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1789                           "UNICODE defined");
1790         }
1791
1792         if (!(flags & STR_ASCII) && \
1793             ((flags & STR_UNICODE || \
1794               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1795                 return pull_ucs2_base_talloc(ctx,
1796                                         base_ptr,
1797                                         ppdest,
1798                                         src,
1799                                         src_len,
1800                                         flags);
1801         }
1802         return pull_ascii_base_talloc(ctx,
1803                                         ppdest,
1804                                         src,
1805                                         src_len,
1806                                         flags);
1807 }
1808
1809
1810 size_t align_string(const void *base_ptr, const char *p, int flags)
1811 {
1812         if (!(flags & STR_ASCII) && \
1813             ((flags & STR_UNICODE || \
1814               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1815                 return ucs2_align(base_ptr, p, flags);
1816         }
1817         return 0;
1818 }
1819
1820 /*
1821   Return the unicode codepoint for the next multi-byte CH_UNIX character
1822   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1823
1824   Also return the number of bytes consumed (which tells the caller
1825   how many bytes to skip to get to the next CH_UNIX character).
1826
1827   Return INVALID_CODEPOINT if the next character cannot be converted.
1828 */
1829
1830 codepoint_t next_codepoint(const char *str, size_t *size)
1831 {
1832         /* It cannot occupy more than 4 bytes in UTF16 format */
1833         uint8_t buf[4];
1834         smb_iconv_t descriptor;
1835         size_t ilen_orig;
1836         size_t ilen;
1837         size_t olen;
1838         char *outbuf;
1839
1840         if ((str[0] & 0x80) == 0) {
1841                 *size = 1;
1842                 return (codepoint_t)str[0];
1843         }
1844
1845         /* We assume that no multi-byte character can take
1846            more than 5 bytes. This is OK as we only
1847            support codepoints up to 1M */
1848
1849         ilen_orig = strnlen(str, 5);
1850         ilen = ilen_orig;
1851
1852         lazy_initialize_conv();
1853
1854         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1855         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1856                 *size = 1;
1857                 return INVALID_CODEPOINT;
1858         }
1859
1860         /* This looks a little strange, but it is needed to cope
1861            with codepoints above 64k which are encoded as per RFC2781. */
1862         olen = 2;
1863         outbuf = (char *)buf;
1864         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1865         if (olen == 2) {
1866                 /* We failed to convert to a 2 byte character.
1867                    See if we can convert to a 4 UTF16-LE byte char encoding.
1868                 */
1869                 olen = 4;
1870                 outbuf = (char *)buf;
1871                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1872                 if (olen == 4) {
1873                         /* We didn't convert any bytes */
1874                         *size = 1;
1875                         return INVALID_CODEPOINT;
1876                 }
1877                 olen = 4 - olen;
1878         } else {
1879                 olen = 2 - olen;
1880         }
1881
1882         *size = ilen_orig - ilen;
1883
1884         if (olen == 2) {
1885                 /* 2 byte, UTF16-LE encoded value. */
1886                 return (codepoint_t)SVAL(buf, 0);
1887         }
1888         if (olen == 4) {
1889                 /* Decode a 4 byte UTF16-LE character manually.
1890                    See RFC2871 for the encoding machanism.
1891                 */
1892                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1893                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1894
1895                 return (codepoint_t)0x10000 +
1896                                 (w1 << 10) + w2;
1897         }
1898
1899         /* no other length is valid */
1900         return INVALID_CODEPOINT;
1901 }
1902
1903 /*
1904   push a single codepoint into a CH_UNIX string the target string must
1905   be able to hold the full character, which is guaranteed if it is at
1906   least 5 bytes in size. The caller may pass less than 5 bytes if they
1907   are sure the character will fit (for example, you can assume that
1908   uppercase/lowercase of a character will not add more than 1 byte)
1909
1910   return the number of bytes occupied by the CH_UNIX character, or
1911   -1 on failure
1912 */
1913 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1914 {
1915         smb_iconv_t descriptor;
1916         uint8_t buf[4];
1917         size_t ilen, olen;
1918         const char *inbuf;
1919
1920         if (c < 128) {
1921                 *str = c;
1922                 return 1;
1923         }
1924
1925         lazy_initialize_conv();
1926
1927         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1928         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1929                 return -1;
1930         }
1931
1932         if (c < 0x10000) {
1933                 ilen = 2;
1934                 olen = 5;
1935                 inbuf = (char *)buf;
1936                 SSVAL(buf, 0, c);
1937                 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1938                 if (ilen != 0) {
1939                         return -1;
1940                 }
1941                 return 5 - olen;
1942         }
1943
1944         c -= 0x10000;
1945
1946         buf[0] = (c>>10) & 0xFF;
1947         buf[1] = (c>>18) | 0xd8;
1948         buf[2] = c & 0xFF;
1949         buf[3] = ((c>>8) & 0x3) | 0xdc;
1950
1951         ilen = 4;
1952         olen = 5;
1953         inbuf = (char *)buf;
1954
1955         smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1956         if (ilen != 0) {
1957                 return -1;
1958         }
1959         return 5 - olen;
1960 }
1961
1962