source/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50 static bool initialized;
  51
  52 /**
  53  * Return the name of a charset to give to iconv().
  54  **/
  55 static const char *charset_name(charset_t ch)
  56 {
  57         const char *ret = NULL;
  58
  59         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  60         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  61         else if (ch == CH_UNIX) ret = lp_unix_charset();
  62         else if (ch == CH_DOS) ret = lp_dos_charset();
  63         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  64         else if (ch == CH_UTF8) ret = "UTF8";
  65
  66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  67         if (ret && !strcmp(ret, "LOCALE")) {
  68                 const char *ln = NULL;
  69
  70 #ifdef HAVE_SETLOCALE
  71                 setlocale(LC_ALL, "");
  72 #endif
  73                 ln = nl_langinfo(CODESET);
  74                 if (ln) {
  75                         /* Check whether the charset name is supported
  76                            by iconv */
  77                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  78                         if (handle == (smb_iconv_t) -1) {
  79                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  80                                 ln = NULL;
  81                         } else {
  82                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  83                                 smb_iconv_close(handle);
  84                         }
  85                 }
  86                 ret = ln;
  87         }
  88 #endif
  89
  90         if (!ret || !*ret) ret = "ASCII";
  91         return ret;
  92 }
  93
  94 void lazy_initialize_conv(void)
  95 {
  96         if (!initialized) {
  97                 load_case_tables();
  98                 init_iconv();
  99                 initialized = true;
 100         }
 101 }
 102
 103 /**
 104  * Destroy global objects allocated by init_iconv()
 105  **/
 106 void gfree_charcnv(void)
 107 {
 108         int c1, c2;
 109
 110         for (c1=0;c1<NUM_CHARSETS;c1++) {
 111                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 112                         if ( conv_handles[c1][c2] ) {
 113                                 smb_iconv_close( conv_handles[c1][c2] );
 114                                 conv_handles[c1][c2] = 0;
 115                         }
 116                 }
 117         }
 118         initialized = false;
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         bool did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_valid_table();
 182                 conv_silent = False;
 183         }
 184 }
 185
 186 /**
 187  * Convert string from one encoding to another, making error checking etc
 188  * Slow path version - uses (slow) iconv.
 189  *
 190  * @param src pointer to source string (multibyte or singlebyte)
 191  * @param srclen length of the source string in bytes
 192  * @param dest pointer to destination string (multibyte or singlebyte)
 193  * @param destlen maximal length allowed for string
 194  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 195  * @returns the number of bytes occupied in the destination
 196  *
 197  * Ensure the srclen contains the terminating zero.
 198  *
 199  **/
 200
 201 static size_t convert_string_internal(charset_t from, charset_t to,
 202                       void const *src, size_t srclen,
 203                       void *dest, size_t destlen, bool allow_bad_conv)
 204 {
 205         size_t i_len, o_len;
 206         size_t retval;
 207         const char* inbuf = (const char*)src;
 208         char* outbuf = (char*)dest;
 209         smb_iconv_t descriptor;
 210
 211         lazy_initialize_conv();
 212
 213         descriptor = conv_handles[from][to];
 214
 215         if (srclen == (size_t)-1) {
 216                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 217                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 218                 } else {
 219                         srclen = strlen((const char *)src)+1;
 220                 }
 221         }
 222
 223
 224         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 225                 if (!conv_silent)
 226                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 227                 return (size_t)-1;
 228         }
 229
 230         i_len=srclen;
 231         o_len=destlen;
 232
 233  again:
 234
 235         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 236         if(retval==(size_t)-1) {
 237                 const char *reason="unknown error";
 238                 switch(errno) {
 239                         case EINVAL:
 240                                 reason="Incomplete multibyte sequence";
 241                                 if (!conv_silent)
 242                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 243                                 if (allow_bad_conv)
 244                                         goto use_as_is;
 245                                 return (size_t)-1;
 246                         case E2BIG:
 247                                 reason="No more room";
 248                                 if (!conv_silent) {
 249                                         if (from == CH_UNIX) {
 250                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 251                                                         charset_name(from), charset_name(to),
 252                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 253                                         } else {
 254                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 255                                                         charset_name(from), charset_name(to),
 256                                                         (unsigned int)srclen, (unsigned int)destlen));
 257                                         }
 258                                 }
 259                                 break;
 260                         case EILSEQ:
 261                                 reason="Illegal multibyte sequence";
 262                                 if (!conv_silent)
 263                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 264                                 if (allow_bad_conv)
 265                                         goto use_as_is;
 266
 267                                 return (size_t)-1;
 268                         default:
 269                                 if (!conv_silent)
 270                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 271                                 return (size_t)-1;
 272                 }
 273                 /* smb_panic(reason); */
 274         }
 275         return destlen-o_len;
 276
 277  use_as_is:
 278
 279         /*
 280          * Conversion not supported. This is actually an error, but there are so
 281          * many misconfigured iconv systems and smb.conf's out there we can't just
 282          * fail. Do a very bad conversion instead.... JRA.
 283          */
 284
 285         {
 286                 if (o_len == 0 || i_len == 0)
 287                         return destlen - o_len;
 288
 289                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 290                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 291                         /* Can't convert from utf16 any endian to multibyte.
 292                            Replace with the default fail char.
 293                         */
 294                         if (i_len < 2)
 295                                 return destlen - o_len;
 296                         if (i_len >= 2) {
 297                                 *outbuf = lp_failed_convert_char();
 298
 299                                 outbuf++;
 300                                 o_len--;
 301
 302                                 inbuf += 2;
 303                                 i_len -= 2;
 304                         }
 305
 306                         if (o_len == 0 || i_len == 0)
 307                                 return destlen - o_len;
 308
 309                         /* Keep trying with the next char... */
 310                         goto again;
 311
 312                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 313                         /* Can't convert to UTF16LE - just widen by adding the
 314                            default fail char then zero.
 315                         */
 316                         if (o_len < 2)
 317                                 return destlen - o_len;
 318
 319                         outbuf[0] = lp_failed_convert_char();
 320                         outbuf[1] = '\0';
 321
 322                         inbuf++;
 323                         i_len--;
 324
 325                         outbuf += 2;
 326                         o_len -= 2;
 327
 328                         if (o_len == 0 || i_len == 0)
 329                                 return destlen - o_len;
 330
 331                         /* Keep trying with the next char... */
 332                         goto again;
 333
 334                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 335                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 336                         /* Failed multibyte to multibyte. Just copy the default fail char and
 337                                 try again. */
 338                         outbuf[0] = lp_failed_convert_char();
 339
 340                         inbuf++;
 341                         i_len--;
 342
 343                         outbuf++;
 344                         o_len--;
 345
 346                         if (o_len == 0 || i_len == 0)
 347                                 return destlen - o_len;
 348
 349                         /* Keep trying with the next char... */
 350                         goto again;
 351
 352                 } else {
 353                         /* Keep compiler happy.... */
 354                         return destlen - o_len;
 355                 }
 356         }
 357 }
 358
 359 /**
 360  * Convert string from one encoding to another, making error checking etc
 361  * Fast path version - handles ASCII first.
 362  *
 363  * @param src pointer to source string (multibyte or singlebyte)
 364  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 365  * @param dest pointer to destination string (multibyte or singlebyte)
 366  * @param destlen maximal length allowed for string - *NEVER* -1.
 367  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 368  * @returns the number of bytes occupied in the destination
 369  *
 370  * Ensure the srclen contains the terminating zero.
 371  *
 372  * This function has been hand-tuned to provide a fast path.
 373  * Don't change unless you really know what you are doing. JRA.
 374  **/
 375
 376 size_t convert_string(charset_t from, charset_t to,
 377                       void const *src, size_t srclen,
 378                       void *dest, size_t destlen, bool allow_bad_conv)
 379 {
 380         /*
 381          * NB. We deliberately don't do a strlen here if srclen == -1.
 382          * This is very expensive over millions of calls and is taken
 383          * care of in the slow path in convert_string_internal. JRA.
 384          */
 385
 386 #ifdef DEVELOPER
 387         SMB_ASSERT(destlen != (size_t)-1);
 388 #endif
 389
 390         if (srclen == 0)
 391                 return 0;
 392
 393         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 394                 const unsigned char *p = (const unsigned char *)src;
 395                 unsigned char *q = (unsigned char *)dest;
 396                 size_t slen = srclen;
 397                 size_t dlen = destlen;
 398                 unsigned char lastp = '\0';
 399                 size_t retval = 0;
 400
 401                 /* If all characters are ascii, fast path here. */
 402                 while (slen && dlen) {
 403                         if ((lastp = *p) <= 0x7f) {
 404                                 *q++ = *p++;
 405                                 if (slen != (size_t)-1) {
 406                                         slen--;
 407                                 }
 408                                 dlen--;
 409                                 retval++;
 410                                 if (!lastp)
 411                                         break;
 412                         } else {
 413 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 414                                 goto general_case;
 415 #else
 416                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 417                                 if (ret == (size_t)-1) {
 418                                         return ret;
 419                                 }
 420                                 return retval + ret;
 421 #endif
 422                         }
 423                 }
 424                 if (!dlen) {
 425                         /* Even if we fast path we should note if we ran out of room. */
 426                         if (((slen != (size_t)-1) && slen) ||
 427                                         ((slen == (size_t)-1) && lastp)) {
 428                                 errno = E2BIG;
 429                         }
 430                 }
 431                 return retval;
 432         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 433                 const unsigned char *p = (const unsigned char *)src;
 434                 unsigned char *q = (unsigned char *)dest;
 435                 size_t retval = 0;
 436                 size_t slen = srclen;
 437                 size_t dlen = destlen;
 438                 unsigned char lastp = '\0';
 439
 440                 /* If all characters are ascii, fast path here. */
 441                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 442                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 443                                 *q++ = *p;
 444                                 if (slen != (size_t)-1) {
 445                                         slen -= 2;
 446                                 }
 447                                 p += 2;
 448                                 dlen--;
 449                                 retval++;
 450                                 if (!lastp)
 451                                         break;
 452                         } else {
 453 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 454                                 goto general_case;
 455 #else
 456                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 457                                 if (ret == (size_t)-1) {
 458                                         return ret;
 459                                 }
 460                                 return retval + ret;
 461 #endif
 462                         }
 463                 }
 464                 if (!dlen) {
 465                         /* Even if we fast path we should note if we ran out of room. */
 466                         if (((slen != (size_t)-1) && slen) ||
 467                                         ((slen == (size_t)-1) && lastp)) {
 468                                 errno = E2BIG;
 469                         }
 470                 }
 471                 return retval;
 472         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 473                 const unsigned char *p = (const unsigned char *)src;
 474                 unsigned char *q = (unsigned char *)dest;
 475                 size_t retval = 0;
 476                 size_t slen = srclen;
 477                 size_t dlen = destlen;
 478                 unsigned char lastp = '\0';
 479
 480                 /* If all characters are ascii, fast path here. */
 481                 while (slen && (dlen >= 2)) {
 482                         if ((lastp = *p) <= 0x7F) {
 483                                 *q++ = *p++;
 484                                 *q++ = '\0';
 485                                 if (slen != (size_t)-1) {
 486                                         slen--;
 487                                 }
 488                                 dlen -= 2;
 489                                 retval += 2;
 490                                 if (!lastp)
 491                                         break;
 492                         } else {
 493 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 494                                 goto general_case;
 495 #else
 496                                 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 497                                 if (ret == (size_t)-1) {
 498                                         return ret;
 499                                 }
 500                                 return retval + ret;
 501 #endif
 502                         }
 503                 }
 504                 if (!dlen) {
 505                         /* Even if we fast path we should note if we ran out of room. */
 506                         if (((slen != (size_t)-1) && slen) ||
 507                                         ((slen == (size_t)-1) && lastp)) {
 508                                 errno = E2BIG;
 509                         }
 510                 }
 511                 return retval;
 512         }
 513
 514 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 515   general_case:
 516 #endif
 517         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 518 }
 519
 520 /**
 521  * Convert between character sets, allocating a new buffer for the result.
 522  *
 523  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 524  * (this is a bad interface and needs fixing. JRA).
 525  * @param srclen length of source buffer.
 526  * @param dest always set at least to NULL
 527  * @param converted_size set to the size of the allocated buffer on return
 528  * true
 529  * @note -1 is not accepted for srclen.
 530  *
 531  * @return True if new buffer was correctly allocated, and string was
 532  * converted.
 533  *
 534  * Ensure the srclen contains the terminating zero.
 535  *
 536  * I hate the goto's in this function. It's embarressing.....
 537  * There has to be a cleaner way to do this. JRA.
 538  **/
 539
 540 bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 541                              void const *src, size_t srclen, void *dst,
 542                              size_t *converted_size, bool allow_bad_conv)
 543 {
 544         size_t i_len, o_len, destlen = (srclen * 3) / 2;
 545         size_t retval;
 546         const char *inbuf = (const char *)src;
 547         char *outbuf = NULL, *ob = NULL;
 548         smb_iconv_t descriptor;
 549         void **dest = (void **)dst;
 550
 551         *dest = NULL;
 552
 553         if (!converted_size) {
 554                 errno = EINVAL;
 555                 return false;
 556         }
 557
 558         if (src == NULL || srclen == (size_t)-1) {
 559                 errno = EINVAL;
 560                 return false;
 561         }
 562         if (srclen == 0) {
 563                 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
 564                 if (ob == NULL) {
 565                         errno = ENOMEM;
 566                         return false;
 567                 }
 568                 *dest = ob;
 569                 *converted_size = 0;
 570                 return true;
 571         }
 572
 573         lazy_initialize_conv();
 574
 575         descriptor = conv_handles[from][to];
 576
 577         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 578                 if (!conv_silent)
 579                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 580                 errno = EOPNOTSUPP;
 581                 return false;
 582         }
 583
 584   convert:
 585
 586         /* +2 is for ucs2 null termination. */
 587         if ((destlen*2)+2 < destlen) {
 588                 /* wrapped ! abort. */
 589                 if (!conv_silent)
 590                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 591                 if (!ctx)
 592                         SAFE_FREE(outbuf);
 593                 errno = EOPNOTSUPP;
 594                 return false;
 595         } else {
 596                 destlen = destlen * 2;
 597         }
 598
 599         /* +2 is for ucs2 null termination. */
 600         if (ctx) {
 601                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
 602         } else {
 603                 ob = (char *)SMB_REALLOC(ob, destlen + 2);
 604         }
 605
 606         if (!ob) {
 607                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 608                 errno = ENOMEM;
 609                 return false;
 610         }
 611         outbuf = ob;
 612         i_len = srclen;
 613         o_len = destlen;
 614
 615  again:
 616
 617         retval = smb_iconv(descriptor,
 618                            &inbuf, &i_len,
 619                            &outbuf, &o_len);
 620         if(retval == (size_t)-1)                {
 621                 const char *reason="unknown error";
 622                 switch(errno) {
 623                         case EINVAL:
 624                                 reason="Incomplete multibyte sequence";
 625                                 if (!conv_silent)
 626                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 627                                 if (allow_bad_conv)
 628                                         goto use_as_is;
 629                                 break;
 630                         case E2BIG:
 631                                 goto convert;
 632                         case EILSEQ:
 633                                 reason="Illegal multibyte sequence";
 634                                 if (!conv_silent)
 635                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 636                                 if (allow_bad_conv)
 637                                         goto use_as_is;
 638                                 break;
 639                 }
 640                 if (!conv_silent)
 641                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 642                 /* smb_panic(reason); */
 643                 if (ctx) {
 644                         TALLOC_FREE(ob);
 645                 } else {
 646                         SAFE_FREE(ob);
 647                 }
 648                 return false;
 649         }
 650
 651   out:
 652
 653         destlen = destlen - o_len;
 654         /* Don't shrink unless we're reclaiming a lot of
 655          * space. This is in the hot codepath and these
 656          * reallocs *cost*. JRA.
 657          */
 658         if (o_len > 1024) {
 659                 /* We're shrinking here so we know the +2 is safe from wrap. */
 660                 if (ctx) {
 661                         ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
 662                 } else {
 663                         ob = (char *)SMB_REALLOC(ob,destlen + 2);
 664                 }
 665         }
 666
 667         if (destlen && !ob) {
 668                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 669                 errno = ENOMEM;
 670                 return false;
 671         }
 672
 673         *dest = ob;
 674
 675         /* Must ucs2 null terminate in the extra space we allocated. */
 676         ob[destlen] = '\0';
 677         ob[destlen+1] = '\0';
 678
 679         *converted_size = destlen;
 680         return true;
 681
 682  use_as_is:
 683
 684         /*
 685          * Conversion not supported. This is actually an error, but there are so
 686          * many misconfigured iconv systems and smb.conf's out there we can't just
 687          * fail. Do a very bad conversion instead.... JRA.
 688          */
 689
 690         {
 691                 if (o_len == 0 || i_len == 0)
 692                         goto out;
 693
 694                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 695                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 696                         /* Can't convert from utf16 any endian to multibyte.
 697                            Replace with the default fail char.
 698                         */
 699
 700                         if (i_len < 2)
 701                                 goto out;
 702
 703                         if (i_len >= 2) {
 704                                 *outbuf = lp_failed_convert_char();
 705
 706                                 outbuf++;
 707                                 o_len--;
 708
 709                                 inbuf += 2;
 710                                 i_len -= 2;
 711                         }
 712
 713                         if (o_len == 0 || i_len == 0)
 714                                 goto out;
 715
 716                         /* Keep trying with the next char... */
 717                         goto again;
 718
 719                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 720                         /* Can't convert to UTF16LE - just widen by adding the
 721                            default fail char then zero.
 722                         */
 723                         if (o_len < 2)
 724                                 goto out;
 725
 726                         outbuf[0] = lp_failed_convert_char();
 727                         outbuf[1] = '\0';
 728
 729                         inbuf++;
 730                         i_len--;
 731
 732                         outbuf += 2;
 733                         o_len -= 2;
 734
 735                         if (o_len == 0 || i_len == 0)
 736                                 goto out;
 737
 738                         /* Keep trying with the next char... */
 739                         goto again;
 740
 741                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 742                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 743                         /* Failed multibyte to multibyte. Just copy the default fail char and
 744                            try again. */
 745                         outbuf[0] = lp_failed_convert_char();
 746
 747                         inbuf++;
 748                         i_len--;
 749
 750                         outbuf++;
 751                         o_len--;
 752
 753                         if (o_len == 0 || i_len == 0)
 754                                 goto out;
 755
 756                         /* Keep trying with the next char... */
 757                         goto again;
 758
 759                 } else {
 760                         /* Keep compiler happy.... */
 761                         goto out;
 762                 }
 763         }
 764 }
 765
 766 /**
 767  * Convert between character sets, allocating a new buffer using talloc for the result.
 768  *
 769  * @param srclen length of source buffer.
 770  * @param dest always set at least to NULL
 771  * @note -1 is not accepted for srclen.
 772  *
 773  * @returns Size in bytes of the converted string; or -1 in case of error.
 774  **/
 775 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 776                              void const *src, size_t srclen, void *dst,
 777                              bool allow_bad_conv)
 778 {
 779         void **dest = (void **)dst;
 780         size_t dest_len;
 781
 782         *dest = NULL;
 783         if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
 784                 &dest_len, allow_bad_conv))
 785                 return (size_t)-1;
 786         if (*dest == NULL)
 787                 return (size_t)-1;
 788         return dest_len;
 789 }
 790
 791 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 792 {
 793         size_t size;
 794         smb_ucs2_t *buffer;
 795
 796         size = push_ucs2_allocate(&buffer, src);
 797         if (size == (size_t)-1) {
 798                 return (size_t)-1;
 799         }
 800         if (!strupper_w(buffer) && (dest == src)) {
 801                 free(buffer);
 802                 return srclen;
 803         }
 804
 805         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 806         free(buffer);
 807         return size;
 808 }
 809
 810 /**
 811  strdup() a unix string to upper case.
 812 **/
 813
 814 char *strdup_upper(const char *s)
 815 {
 816         char *out_buffer = SMB_STRDUP(s);
 817         const unsigned char *p = (const unsigned char *)s;
 818         unsigned char *q = (unsigned char *)out_buffer;
 819
 820         if (!q) {
 821                 return NULL;
 822         }
 823
 824         /* this is quite a common operation, so we want it to be
 825            fast. We optimise for the ascii case, knowing that all our
 826            supported multi-byte character sets are ascii-compatible
 827            (ie. they match for the first 128 chars) */
 828
 829         while (*p) {
 830                 if (*p & 0x80)
 831                         break;
 832                 *q++ = toupper_ascii_fast(*p);
 833                 p++;
 834         }
 835
 836         if (*p) {
 837                 /* MB case. */
 838                 size_t size, size2;
 839                 smb_ucs2_t *buffer = NULL;
 840
 841                 SAFE_FREE(out_buffer);
 842                 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
 843                         strlen(s) + 1, (void **)(void *)&buffer, &size,
 844                         True)) {
 845                         return NULL;
 846                 }
 847
 848                 strupper_w(buffer);
 849
 850                 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
 851                         size, (void **)(void *)&out_buffer, &size2, True)) {
 852                         TALLOC_FREE(buffer);
 853                         return NULL;
 854                 }
 855
 856                 /* Don't need the intermediate buffer
 857                  * anymore.
 858                  */
 859                 TALLOC_FREE(buffer);
 860         }
 861
 862         return out_buffer;
 863 }
 864
 865 /**
 866  talloc_strdup() a unix string to upper case.
 867 **/
 868
 869 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 870 {
 871         char *out_buffer = talloc_strdup(ctx,s);
 872         const unsigned char *p = (const unsigned char *)s;
 873         unsigned char *q = (unsigned char *)out_buffer;
 874
 875         if (!q) {
 876                 return NULL;
 877         }
 878
 879         /* this is quite a common operation, so we want it to be
 880            fast. We optimise for the ascii case, knowing that all our
 881            supported multi-byte character sets are ascii-compatible
 882            (ie. they match for the first 128 chars) */
 883
 884         while (*p) {
 885                 if (*p & 0x80)
 886                         break;
 887                 *q++ = toupper_ascii_fast(*p);
 888                 p++;
 889         }
 890
 891         if (*p) {
 892                 /* MB case. */
 893                 size_t size;
 894                 smb_ucs2_t *ubuf = NULL;
 895
 896                 /* We're not using the ascii buffer above. */
 897                 TALLOC_FREE(out_buffer);
 898
 899                 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
 900                                 s, strlen(s)+1,
 901                                 (void *)&ubuf,
 902                                 True);
 903                 if (size == (size_t)-1) {
 904                         return NULL;
 905                 }
 906
 907                 strupper_w(ubuf);
 908
 909                 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
 910                                 ubuf, size,
 911                                 (void *)&out_buffer,
 912                                 True);
 913
 914                 /* Don't need the intermediate buffer
 915                  * anymore.
 916                  */
 917
 918                 TALLOC_FREE(ubuf);
 919
 920                 if (size == (size_t)-1) {
 921                         return NULL;
 922                 }
 923         }
 924
 925         return out_buffer;
 926 }
 927
 928 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 929 {
 930         size_t size;
 931         smb_ucs2_t *buffer = NULL;
 932
 933         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 934                 (void **)(void *)&buffer, &size, True)) {
 935                 smb_panic("failed to create UCS2 buffer");
 936         }
 937         if (!strlower_w(buffer) && (dest == src)) {
 938                 SAFE_FREE(buffer);
 939                 return srclen;
 940         }
 941         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 942         SAFE_FREE(buffer);
 943         return size;
 944 }
 945
 946 /**
 947  strdup() a unix string to lower case.
 948 **/
 949
 950 char *strdup_lower(const char *s)
 951 {
 952         size_t size;
 953         smb_ucs2_t *buffer = NULL;
 954         char *out_buffer;
 955
 956         size = push_ucs2_allocate(&buffer, s);
 957         if (size == -1 || !buffer) {
 958                 return NULL;
 959         }
 960
 961         strlower_w(buffer);
 962
 963         size = pull_ucs2_allocate(&out_buffer, buffer);
 964         SAFE_FREE(buffer);
 965
 966         if (size == (size_t)-1) {
 967                 return NULL;
 968         }
 969
 970         return out_buffer;
 971 }
 972
 973 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
 974 {
 975         size_t size;
 976         smb_ucs2_t *buffer = NULL;
 977         char *out_buffer;
 978
 979         size = push_ucs2_talloc(ctx, &buffer, s);
 980         if (size == -1 || !buffer) {
 981                 TALLOC_FREE(buffer);
 982                 return NULL;
 983         }
 984
 985         strlower_w(buffer);
 986
 987         size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
 988         TALLOC_FREE(buffer);
 989
 990         if (size == (size_t)-1) {
 991                 TALLOC_FREE(out_buffer);
 992                 return NULL;
 993         }
 994
 995         return out_buffer;
 996 }
 997
 998
 999 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
1000 {
1001         if (flags & (STR_NOALIGN|STR_ASCII))
1002                 return 0;
1003         return PTR_DIFF(p, base_ptr) & 1;
1004 }
1005
1006
1007 /**
1008  * Copy a string from a char* unix src to a dos codepage string destination.
1009  *
1010  * @return the number of bytes occupied by the string in the destination.
1011  *
1012  * @param flags can include
1013  * <dl>
1014  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1015  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1016  * </dl>
1017  *
1018  * @param dest_len the maximum length in bytes allowed in the
1019  * destination.
1020  **/
1021 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1022 {
1023         size_t src_len = strlen(src);
1024         char *tmpbuf = NULL;
1025         size_t ret;
1026
1027         /* No longer allow a length of -1. */
1028         if (dest_len == (size_t)-1) {
1029                 smb_panic("push_ascii - dest_len == -1");
1030         }
1031
1032         if (flags & STR_UPPER) {
1033                 tmpbuf = SMB_STRDUP(src);
1034                 if (!tmpbuf) {
1035                         smb_panic("malloc fail");
1036                 }
1037                 strupper_m(tmpbuf);
1038                 src = tmpbuf;
1039         }
1040
1041         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1042                 src_len++;
1043         }
1044
1045         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1046         if (ret == (size_t)-1 &&
1047                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1048                         && dest_len > 0) {
1049                 ((char *)dest)[0] = '\0';
1050         }
1051         SAFE_FREE(tmpbuf);
1052         return ret;
1053 }
1054
1055 size_t push_ascii_fstring(void *dest, const char *src)
1056 {
1057         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1058 }
1059
1060 /********************************************************************
1061  Push an nstring - ensure null terminated. Written by
1062  moriyama@miraclelinux.com (MORIYAMA Masayuki).
1063 ********************************************************************/
1064
1065 size_t push_ascii_nstring(void *dest, const char *src)
1066 {
1067         size_t i, buffer_len, dest_len;
1068         smb_ucs2_t *buffer;
1069
1070         conv_silent = True;
1071         buffer_len = push_ucs2_allocate(&buffer, src);
1072         if (buffer_len == (size_t)-1) {
1073                 smb_panic("failed to create UCS2 buffer");
1074         }
1075
1076         /* We're using buffer_len below to count ucs2 characters, not bytes. */
1077         buffer_len /= sizeof(smb_ucs2_t);
1078
1079         dest_len = 0;
1080         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1081                 unsigned char mb[10];
1082                 /* Convert one smb_ucs2_t character at a time. */
1083                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1084                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1085                         memcpy((char *)dest + dest_len, mb, mb_len);
1086                         dest_len += mb_len;
1087                 } else {
1088                         errno = E2BIG;
1089                         break;
1090                 }
1091         }
1092         ((char *)dest)[dest_len] = '\0';
1093
1094         SAFE_FREE(buffer);
1095         conv_silent = False;
1096         return dest_len;
1097 }
1098
1099 /********************************************************************
1100  Push and malloc an ascii string. src and dest null terminated.
1101 ********************************************************************/
1102
1103 size_t push_ascii_allocate(char **dest, const char *src)
1104 {
1105         size_t dest_len, src_len = strlen(src)+1;
1106
1107         *dest = NULL;
1108         if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1109                 (void **)dest, &dest_len, True))
1110                 return (size_t)-1;
1111         else
1112                 return dest_len;
1113 }
1114
1115 /**
1116  * Copy a string from a dos codepage source to a unix char* destination.
1117  *
1118  * The resulting string in "dest" is always null terminated.
1119  *
1120  * @param flags can have:
1121  * <dl>
1122  * <dt>STR_TERMINATE</dt>
1123  * <dd>STR_TERMINATE means the string in @p src
1124  * is null terminated, and src_len is ignored.</dd>
1125  * </dl>
1126  *
1127  * @param src_len is the length of the source area in bytes.
1128  * @returns the number of bytes occupied by the string in @p src.
1129  **/
1130 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1131 {
1132         size_t ret;
1133
1134         if (dest_len == (size_t)-1) {
1135                 /* No longer allow dest_len of -1. */
1136                 smb_panic("pull_ascii - invalid dest_len of -1");
1137         }
1138
1139         if (flags & STR_TERMINATE) {
1140                 if (src_len == (size_t)-1) {
1141                         src_len = strlen((const char *)src) + 1;
1142                 } else {
1143                         size_t len = strnlen((const char *)src, src_len);
1144                         if (len < src_len)
1145                                 len++;
1146                         src_len = len;
1147                 }
1148         }
1149
1150         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1151         if (ret == (size_t)-1) {
1152                 ret = 0;
1153                 dest_len = 0;
1154         }
1155
1156         if (dest_len && ret) {
1157                 /* Did we already process the terminating zero ? */
1158                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1159                         dest[MIN(ret, dest_len-1)] = 0;
1160                 }
1161         } else  {
1162                 dest[0] = 0;
1163         }
1164
1165         return src_len;
1166 }
1167
1168 /**
1169  * Copy a string from a dos codepage source to a unix char* destination.
1170  Talloc version.
1171  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1172  needs fixing. JRA).
1173  *
1174  * The resulting string in "dest" is always null terminated.
1175  *
1176  * @param flags can have:
1177  * <dl>
1178  * <dt>STR_TERMINATE</dt>
1179  * <dd>STR_TERMINATE means the string in @p src
1180  * is null terminated, and src_len is ignored.</dd>
1181  * </dl>
1182  *
1183  * @param src_len is the length of the source area in bytes.
1184  * @returns the number of bytes occupied by the string in @p src.
1185  **/
1186
1187 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1188                                         char **ppdest,
1189                                         const void *src,
1190                                         size_t src_len,
1191                                         int flags)
1192 {
1193         char *dest = NULL;
1194         size_t dest_len = 0;
1195
1196 #ifdef DEVELOPER
1197         /* Ensure we never use the braindead "malloc" varient. */
1198         if (ctx == NULL) {
1199                 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1200         }
1201 #endif
1202
1203         *ppdest = NULL;
1204
1205         if (!src_len) {
1206                 return 0;
1207         }
1208
1209         if (flags & STR_TERMINATE) {
1210                 if (src_len == (size_t)-1) {
1211                         src_len = strlen((const char *)src) + 1;
1212                 } else {
1213                         size_t len = strnlen((const char *)src, src_len);
1214                         if (len < src_len)
1215                                 len++;
1216                         src_len = len;
1217                 }
1218                 /* Ensure we don't use an insane length from the client. */
1219                 if (src_len >= 1024*1024) {
1220                         char *msg = talloc_asprintf(ctx,
1221                                         "Bad src length (%u) in "
1222                                         "pull_ascii_base_talloc",
1223                                         (unsigned int)src_len);
1224                         smb_panic(msg);
1225                 }
1226         } else {
1227                 /* Can't have an unlimited length
1228                  * non STR_TERMINATE'd.
1229                  */
1230                 if (src_len == (size_t)-1) {
1231                         errno = EINVAL;
1232                         return 0;
1233                 }
1234         }
1235
1236         /* src_len != -1 here. */
1237
1238         if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1239                 &dest_len, True)) {
1240                 dest_len = 0;
1241         }
1242
1243         if (dest_len && dest) {
1244                 /* Did we already process the terminating zero ? */
1245                 if (dest[dest_len-1] != 0) {
1246                         size_t size = talloc_get_size(dest);
1247                         /* Have we got space to append the '\0' ? */
1248                         if (size <= dest_len) {
1249                                 /* No, realloc. */
1250                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1251                                                 dest_len+1);
1252                                 if (!dest) {
1253                                         /* talloc fail. */
1254                                         dest_len = (size_t)-1;
1255                                         return 0;
1256                                 }
1257                         }
1258                         /* Yay - space ! */
1259                         dest[dest_len] = '\0';
1260                         dest_len++;
1261                 }
1262         } else if (dest) {
1263                 dest[0] = 0;
1264         }
1265
1266         *ppdest = dest;
1267         return src_len;
1268 }
1269
1270 size_t pull_ascii_fstring(char *dest, const void *src)
1271 {
1272         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1273 }
1274
1275 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1276
1277 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1278 {
1279         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1280 }
1281
1282 /**
1283  * Copy a string from a char* src to a unicode destination.
1284  *
1285  * @returns the number of bytes occupied by the string in the destination.
1286  *
1287  * @param flags can have:
1288  *
1289  * <dl>
1290  * <dt>STR_TERMINATE <dd>means include the null termination.
1291  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1292  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1293  * </dl>
1294  *
1295  * @param dest_len is the maximum length allowed in the
1296  * destination.
1297  **/
1298
1299 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1300 {
1301         size_t len=0;
1302         size_t src_len;
1303         size_t ret;
1304
1305         if (dest_len == (size_t)-1) {
1306                 /* No longer allow dest_len of -1. */
1307                 smb_panic("push_ucs2 - invalid dest_len of -1");
1308         }
1309
1310         if (flags & STR_TERMINATE)
1311                 src_len = (size_t)-1;
1312         else
1313                 src_len = strlen(src);
1314
1315         if (ucs2_align(base_ptr, dest, flags)) {
1316                 *(char *)dest = 0;
1317                 dest = (void *)((char *)dest + 1);
1318                 if (dest_len)
1319                         dest_len--;
1320                 len++;
1321         }
1322
1323         /* ucs2 is always a multiple of 2 bytes */
1324         dest_len &= ~1;
1325
1326         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1327         if (ret == (size_t)-1) {
1328                 if ((flags & STR_TERMINATE) &&
1329                                 dest &&
1330                                 dest_len) {
1331                         *(char *)dest = 0;
1332                 }
1333                 return len;
1334         }
1335
1336         len += ret;
1337
1338         if (flags & STR_UPPER) {
1339                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1340                 size_t i;
1341
1342                 /* We check for i < (ret / 2) below as the dest string isn't null
1343                    terminated if STR_TERMINATE isn't set. */
1344
1345                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1346                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1347                         if (v != dest_ucs2[i]) {
1348                                 dest_ucs2[i] = v;
1349                         }
1350                 }
1351         }
1352
1353         return len;
1354 }
1355
1356
1357 /**
1358  * Copy a string from a unix char* src to a UCS2 destination,
1359  * allocating a buffer using talloc().
1360  *
1361  * @param dest always set at least to NULL
1362  *
1363  * @returns The number of bytes occupied by the string in the destination
1364  *         or -1 in case of error.
1365  **/
1366 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1367 {
1368         size_t src_len = strlen(src)+1;
1369
1370         *dest = NULL;
1371         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1372 }
1373
1374
1375 /**
1376  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1377  *
1378  * @param dest always set at least to NULL
1379  *
1380  * @returns The number of bytes occupied by the string in the destination
1381  *         or -1 in case of error.
1382  **/
1383
1384 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1385 {
1386         size_t dest_len, src_len = strlen(src)+1;
1387
1388         *dest = NULL;
1389         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1390                 (void **)dest, &dest_len, True))
1391                 return (size_t)-1;
1392         else
1393                 return dest_len;
1394 }
1395
1396 /**
1397  Copy a string from a char* src to a UTF-8 destination.
1398  Return the number of bytes occupied by the string in the destination
1399  Flags can have:
1400   STR_TERMINATE means include the null termination
1401   STR_UPPER     means uppercase in the destination
1402  dest_len is the maximum length allowed in the destination. If dest_len
1403  is -1 then no maxiumum is used.
1404 **/
1405
1406 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1407 {
1408         size_t src_len = 0;
1409         size_t ret;
1410         char *tmpbuf = NULL;
1411
1412         if (dest_len == (size_t)-1) {
1413                 /* No longer allow dest_len of -1. */
1414                 smb_panic("push_utf8 - invalid dest_len of -1");
1415         }
1416
1417         if (flags & STR_UPPER) {
1418                 tmpbuf = strdup_upper(src);
1419                 if (!tmpbuf) {
1420                         return (size_t)-1;
1421                 }
1422                 src = tmpbuf;
1423                 src_len = strlen(src);
1424         }
1425
1426         src_len = strlen(src);
1427         if (flags & STR_TERMINATE) {
1428                 src_len++;
1429         }
1430
1431         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1432         SAFE_FREE(tmpbuf);
1433         return ret;
1434 }
1435
1436 size_t push_utf8_fstring(void *dest, const char *src)
1437 {
1438         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1439 }
1440
1441 /**
1442  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1443  *
1444  * @param dest always set at least to NULL
1445  *
1446  * @returns The number of bytes occupied by the string in the destination
1447  **/
1448
1449 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1450 {
1451         size_t src_len = strlen(src)+1;
1452
1453         *dest = NULL;
1454         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1455 }
1456
1457 /**
1458  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1459  *
1460  * @param dest always set at least to NULL
1461  *
1462  * @returns The number of bytes occupied by the string in the destination
1463  **/
1464
1465 size_t push_utf8_allocate(char **dest, const char *src)
1466 {
1467         size_t dest_len, src_len = strlen(src)+1;
1468
1469         *dest = NULL;
1470         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1471                 (void **)dest, &dest_len, True))
1472                 return (size_t)-1;
1473         else
1474                 return dest_len;
1475 }
1476
1477 /**
1478  Copy a string from a ucs2 source to a unix char* destination.
1479  Flags can have:
1480   STR_TERMINATE means the string in src is null terminated.
1481   STR_NOALIGN   means don't try to align.
1482  if STR_TERMINATE is set then src_len is ignored if it is -1.
1483  src_len is the length of the source area in bytes
1484  Return the number of bytes occupied by the string in src.
1485  The resulting string in "dest" is always null terminated.
1486 **/
1487
1488 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1489 {
1490         size_t ret;
1491
1492         if (dest_len == (size_t)-1) {
1493                 /* No longer allow dest_len of -1. */
1494                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1495         }
1496
1497         if (!src_len) {
1498                 if (dest && dest_len > 0) {
1499                         dest[0] = '\0';
1500                 }
1501                 return 0;
1502         }
1503
1504         if (ucs2_align(base_ptr, src, flags)) {
1505                 src = (const void *)((const char *)src + 1);
1506                 if (src_len != (size_t)-1)
1507                         src_len--;
1508         }
1509
1510         if (flags & STR_TERMINATE) {
1511                 /* src_len -1 is the default for null terminated strings. */
1512                 if (src_len != (size_t)-1) {
1513                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1514                                                 src_len/2);
1515                         if (len < src_len/2)
1516                                 len++;
1517                         src_len = len*2;
1518                 }
1519         }
1520
1521         /* ucs2 is always a multiple of 2 bytes */
1522         if (src_len != (size_t)-1)
1523                 src_len &= ~1;
1524
1525         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1526         if (ret == (size_t)-1) {
1527                 ret = 0;
1528                 dest_len = 0;
1529         }
1530
1531         if (src_len == (size_t)-1)
1532                 src_len = ret*2;
1533
1534         if (dest_len && ret) {
1535                 /* Did we already process the terminating zero ? */
1536                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1537                         dest[MIN(ret, dest_len-1)] = 0;
1538                 }
1539         } else {
1540                 dest[0] = 0;
1541         }
1542
1543         return src_len;
1544 }
1545
1546 /**
1547  Copy a string from a ucs2 source to a unix char* destination.
1548  Talloc version with a base pointer.
1549  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1550  needs fixing. JRA).
1551  Flags can have:
1552   STR_TERMINATE means the string in src is null terminated.
1553   STR_NOALIGN   means don't try to align.
1554  if STR_TERMINATE is set then src_len is ignored if it is -1.
1555  src_len is the length of the source area in bytes
1556  Return the number of bytes occupied by the string in src.
1557  The resulting string in "dest" is always null terminated.
1558 **/
1559
1560 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1561                         const void *base_ptr,
1562                         char **ppdest,
1563                         const void *src,
1564                         size_t src_len,
1565                         int flags)
1566 {
1567         char *dest;
1568         size_t dest_len;
1569
1570         *ppdest = NULL;
1571
1572 #ifdef DEVELOPER
1573         /* Ensure we never use the braindead "malloc" varient. */
1574         if (ctx == NULL) {
1575                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1576         }
1577 #endif
1578
1579         if (!src_len) {
1580                 return 0;
1581         }
1582
1583         if (ucs2_align(base_ptr, src, flags)) {
1584                 src = (const void *)((const char *)src + 1);
1585                 if (src_len != (size_t)-1)
1586                         src_len--;
1587         }
1588
1589         if (flags & STR_TERMINATE) {
1590                 /* src_len -1 is the default for null terminated strings. */
1591                 if (src_len != (size_t)-1) {
1592                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1593                                                 src_len/2);
1594                         if (len < src_len/2)
1595                                 len++;
1596                         src_len = len*2;
1597                 } else {
1598                         /*
1599                          * src_len == -1 - alloc interface won't take this
1600                          * so we must calculate.
1601                          */
1602                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1603                 }
1604                 /* Ensure we don't use an insane length from the client. */
1605                 if (src_len >= 1024*1024) {
1606                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1607                 }
1608         } else {
1609                 /* Can't have an unlimited length
1610                  * non STR_TERMINATE'd.
1611                  */
1612                 if (src_len == (size_t)-1) {
1613                         errno = EINVAL;
1614                         return 0;
1615                 }
1616         }
1617
1618         /* src_len != -1 here. */
1619
1620         /* ucs2 is always a multiple of 2 bytes */
1621         src_len &= ~1;
1622
1623         dest_len = convert_string_talloc(ctx,
1624                                         CH_UTF16LE,
1625                                         CH_UNIX,
1626                                         src,
1627                                         src_len,
1628                                         (void *)&dest,
1629                                         True);
1630         if (dest_len == (size_t)-1) {
1631                 dest_len = 0;
1632         }
1633
1634         if (dest_len) {
1635                 /* Did we already process the terminating zero ? */
1636                 if (dest[dest_len-1] != 0) {
1637                         size_t size = talloc_get_size(dest);
1638                         /* Have we got space to append the '\0' ? */
1639                         if (size <= dest_len) {
1640                                 /* No, realloc. */
1641                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1642                                                 dest_len+1);
1643                                 if (!dest) {
1644                                         /* talloc fail. */
1645                                         dest_len = (size_t)-1;
1646                                         return 0;
1647                                 }
1648                         }
1649                         /* Yay - space ! */
1650                         dest[dest_len] = '\0';
1651                         dest_len++;
1652                 }
1653         } else if (dest) {
1654                 dest[0] = 0;
1655         }
1656
1657         *ppdest = dest;
1658         return src_len;
1659 }
1660
1661 size_t pull_ucs2_fstring(char *dest, const void *src)
1662 {
1663         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1664 }
1665
1666 /**
1667  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1668  *
1669  * @param dest always set at least to NULL
1670  *
1671  * @returns The number of bytes occupied by the string in the destination
1672  **/
1673
1674 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1675 {
1676         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1677         *dest = NULL;
1678         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1679 }
1680
1681 /**
1682  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1683  *
1684  * @param dest always set at least to NULL
1685  *
1686  * @returns The number of bytes occupied by the string in the destination
1687  **/
1688
1689 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1690 {
1691         size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1692         *dest = NULL;
1693         if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1694                 (void **)dest, &dest_len, True))
1695                 return (size_t)-1;
1696         else
1697                 return dest_len;
1698 }
1699
1700 /**
1701  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1702  *
1703  * @param dest always set at least to NULL
1704  *
1705  * @returns The number of bytes occupied by the string in the destination
1706  **/
1707
1708 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1709 {
1710         size_t src_len = strlen(src)+1;
1711         *dest = NULL;
1712         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1713 }
1714
1715 /**
1716  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1717  *
1718  * @param dest always set at least to NULL
1719  *
1720  * @returns The number of bytes occupied by the string in the destination
1721  **/
1722
1723 size_t pull_utf8_allocate(char **dest, const char *src)
1724 {
1725         size_t dest_len, src_len = strlen(src)+1;
1726         *dest = NULL;
1727         if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1728                 (void **)dest, &dest_len, True))
1729                 return (size_t)-1;
1730         else
1731                 return dest_len;
1732 }
1733
1734 /**
1735  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1736  *
1737  * @param dest always set at least to NULL
1738  *
1739  * @returns The number of bytes occupied by the string in the destination
1740  **/
1741
1742 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1743 {
1744         size_t src_len = strlen(src)+1;
1745         *dest = NULL;
1746         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1747 }
1748
1749 /**
1750  Copy a string from a char* src to a unicode or ascii
1751  dos codepage destination choosing unicode or ascii based on the
1752  flags in the SMB buffer starting at base_ptr.
1753  Return the number of bytes occupied by the string in the destination.
1754  flags can have:
1755   STR_TERMINATE means include the null termination.
1756   STR_UPPER     means uppercase in the destination.
1757   STR_ASCII     use ascii even with unicode packet.
1758   STR_NOALIGN   means don't do alignment.
1759  dest_len is the maximum length allowed in the destination. If dest_len
1760  is -1 then no maxiumum is used.
1761 **/
1762
1763 size_t push_string_fn(const char *function, unsigned int line,
1764                       const void *base_ptr, uint16 flags2,
1765                       void *dest, const char *src,
1766                       size_t dest_len, int flags)
1767 {
1768 #ifdef DEVELOPER
1769         /* We really need to zero fill here, not clobber
1770          * region, as we want to ensure that valgrind thinks
1771          * all of the outgoing buffer has been written to
1772          * so a send() or write() won't trap an error.
1773          * JRA.
1774          */
1775 #if 0
1776         clobber_region(function, line, dest, dest_len);
1777 #else
1778         memset(dest, '\0', dest_len);
1779 #endif
1780 #endif
1781
1782         if (!(flags & STR_ASCII) && \
1783             ((flags & STR_UNICODE || \
1784               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1785                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1786         }
1787         return push_ascii(dest, src, dest_len, flags);
1788 }
1789
1790
1791 /**
1792  Copy a string from a unicode or ascii source (depending on
1793  the packet flags) to a char* destination.
1794  Flags can have:
1795   STR_TERMINATE means the string in src is null terminated.
1796   STR_UNICODE   means to force as unicode.
1797   STR_ASCII     use ascii even with unicode packet.
1798   STR_NOALIGN   means don't do alignment.
1799  if STR_TERMINATE is set then src_len is ignored is it is -1
1800  src_len is the length of the source area in bytes.
1801  Return the number of bytes occupied by the string in src.
1802  The resulting string in "dest" is always null terminated.
1803 **/
1804
1805 size_t pull_string_fn(const char *function,
1806                         unsigned int line,
1807                         const void *base_ptr,
1808                         uint16 smb_flags2,
1809                         char *dest,
1810                         const void *src,
1811                         size_t dest_len,
1812                         size_t src_len,
1813                         int flags)
1814 {
1815 #ifdef DEVELOPER
1816         clobber_region(function, line, dest, dest_len);
1817 #endif
1818
1819         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1820                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1821                           "UNICODE defined");
1822         }
1823
1824         if (!(flags & STR_ASCII) && \
1825             ((flags & STR_UNICODE || \
1826               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1827                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1828         }
1829         return pull_ascii(dest, src, dest_len, src_len, flags);
1830 }
1831
1832 /**
1833  Copy a string from a unicode or ascii source (depending on
1834  the packet flags) to a char* destination.
1835  Variant that uses talloc.
1836  Flags can have:
1837   STR_TERMINATE means the string in src is null terminated.
1838   STR_UNICODE   means to force as unicode.
1839   STR_ASCII     use ascii even with unicode packet.
1840   STR_NOALIGN   means don't do alignment.
1841  if STR_TERMINATE is set then src_len is ignored is it is -1
1842  src_len is the length of the source area in bytes.
1843  Return the number of bytes occupied by the string in src.
1844  The resulting string in "dest" is always null terminated.
1845 **/
1846
1847 size_t pull_string_talloc_fn(const char *function,
1848                         unsigned int line,
1849                         TALLOC_CTX *ctx,
1850                         const void *base_ptr,
1851                         uint16 smb_flags2,
1852                         char **ppdest,
1853                         const void *src,
1854                         size_t src_len,
1855                         int flags)
1856 {
1857         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1858                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1859                           "UNICODE defined");
1860         }
1861
1862         if (!(flags & STR_ASCII) && \
1863             ((flags & STR_UNICODE || \
1864               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1865                 return pull_ucs2_base_talloc(ctx,
1866                                         base_ptr,
1867                                         ppdest,
1868                                         src,
1869                                         src_len,
1870                                         flags);
1871         }
1872         return pull_ascii_base_talloc(ctx,
1873                                         ppdest,
1874                                         src,
1875                                         src_len,
1876                                         flags);
1877 }
1878
1879
1880 size_t align_string(const void *base_ptr, const char *p, int flags)
1881 {
1882         if (!(flags & STR_ASCII) && \
1883             ((flags & STR_UNICODE || \
1884               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1885                 return ucs2_align(base_ptr, p, flags);
1886         }
1887         return 0;
1888 }
1889
1890 /*
1891   Return the unicode codepoint for the next multi-byte CH_UNIX character
1892   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1893
1894   Also return the number of bytes consumed (which tells the caller
1895   how many bytes to skip to get to the next CH_UNIX character).
1896
1897   Return INVALID_CODEPOINT if the next character cannot be converted.
1898 */
1899
1900 codepoint_t next_codepoint(const char *str, size_t *size)
1901 {
1902         /* It cannot occupy more than 4 bytes in UTF16 format */
1903         uint8_t buf[4];
1904         smb_iconv_t descriptor;
1905         size_t ilen_orig;
1906         size_t ilen;
1907         size_t olen;
1908         char *outbuf;
1909
1910         if ((str[0] & 0x80) == 0) {
1911                 *size = 1;
1912                 return (codepoint_t)str[0];
1913         }
1914
1915         /* We assume that no multi-byte character can take
1916            more than 5 bytes. This is OK as we only
1917            support codepoints up to 1M */
1918
1919         ilen_orig = strnlen(str, 5);
1920         ilen = ilen_orig;
1921
1922         lazy_initialize_conv();
1923
1924         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1925         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1926                 *size = 1;
1927                 return INVALID_CODEPOINT;
1928         }
1929
1930         /* This looks a little strange, but it is needed to cope
1931            with codepoints above 64k which are encoded as per RFC2781. */
1932         olen = 2;
1933         outbuf = (char *)buf;
1934         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1935         if (olen == 2) {
1936                 /* We failed to convert to a 2 byte character.
1937                    See if we can convert to a 4 UTF16-LE byte char encoding.
1938                 */
1939                 olen = 4;
1940                 outbuf = (char *)buf;
1941                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1942                 if (olen == 4) {
1943                         /* We didn't convert any bytes */
1944                         *size = 1;
1945                         return INVALID_CODEPOINT;
1946                 }
1947                 olen = 4 - olen;
1948         } else {
1949                 olen = 2 - olen;
1950         }
1951
1952         *size = ilen_orig - ilen;
1953
1954         if (olen == 2) {
1955                 /* 2 byte, UTF16-LE encoded value. */
1956                 return (codepoint_t)SVAL(buf, 0);
1957         }
1958         if (olen == 4) {
1959                 /* Decode a 4 byte UTF16-LE character manually.
1960                    See RFC2871 for the encoding machanism.
1961                 */
1962                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1963                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1964
1965                 return (codepoint_t)0x10000 +
1966                                 (w1 << 10) + w2;
1967         }
1968
1969         /* no other length is valid */
1970         return INVALID_CODEPOINT;
1971 }