source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  21
  22 */
  23 #include "includes.h"
  24
  25 /* We can parameterize this if someone complains.... JRA. */
  26
  27 char lp_failed_convert_char(void)
  28 {
  29         return '_';
  30 }
  31
  32 /**
  33  * @file
  34  *
  35  * @brief Character-set conversion routines built on our iconv.
  36  *
  37  * @note Samba's internal character set (at least in the 3.0 series)
  38  * is always the same as the one for the Unix filesystem.  It is
  39  * <b>not</b> necessarily UTF-8 and may be different on machines that
  40  * need i18n filenames to be compatible with Unix software.  It does
  41  * have to be a superset of ASCII.  All multibyte sequences must start
  42  * with a byte with the high bit set.
  43  *
  44  * @sa lib/iconv.c
  45  */
  46
  47
  48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
  50 static bool initialized;
  51
  52 /**
  53  * Return the name of a charset to give to iconv().
  54  **/
  55 static const char *charset_name(charset_t ch)
  56 {
  57         const char *ret = NULL;
  58
  59         if (ch == CH_UTF16LE) ret = "UTF-16LE";
  60         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
  61         else if (ch == CH_UNIX) ret = lp_unix_charset();
  62         else if (ch == CH_DOS) ret = lp_dos_charset();
  63         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  64         else if (ch == CH_UTF8) ret = "UTF8";
  65
  66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  67         if (ret && !strcmp(ret, "LOCALE")) {
  68                 const char *ln = NULL;
  69
  70 #ifdef HAVE_SETLOCALE
  71                 setlocale(LC_ALL, "");
  72 #endif
  73                 ln = nl_langinfo(CODESET);
  74                 if (ln) {
  75                         /* Check whether the charset name is supported
  76                            by iconv */
  77                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  78                         if (handle == (smb_iconv_t) -1) {
  79                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  80                                 ln = NULL;
  81                         } else {
  82                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  83                                 smb_iconv_close(handle);
  84                         }
  85                 }
  86                 ret = ln;
  87         }
  88 #endif
  89
  90         if (!ret || !*ret) ret = "ASCII";
  91         return ret;
  92 }
  93
  94 void lazy_initialize_conv(void)
  95 {
  96         if (!initialized) {
  97                 load_case_tables();
  98                 init_iconv();
  99                 initialized = true;
 100         }
 101 }
 102
 103 /**
 104  * Destroy global objects allocated by init_iconv()
 105  **/
 106 void gfree_charcnv(void)
 107 {
 108         int c1, c2;
 109
 110         for (c1=0;c1<NUM_CHARSETS;c1++) {
 111                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 112                         if ( conv_handles[c1][c2] ) {
 113                                 smb_iconv_close( conv_handles[c1][c2] );
 114                                 conv_handles[c1][c2] = 0;
 115                         }
 116                 }
 117         }
 118         initialized = false;
 119 }
 120
 121 /**
 122  * Initialize iconv conversion descriptors.
 123  *
 124  * This is called the first time it is needed, and also called again
 125  * every time the configuration is reloaded, because the charset or
 126  * codepage might have changed.
 127  **/
 128 void init_iconv(void)
 129 {
 130         int c1, c2;
 131         bool did_reload = False;
 132
 133         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 134            first */
 135         if (!conv_handles[CH_UNIX][CH_UTF16LE])
 136                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
 137
 138         if (!conv_handles[CH_UTF16LE][CH_UNIX])
 139                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
 140
 141         for (c1=0;c1<NUM_CHARSETS;c1++) {
 142                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 143                         const char *n1 = charset_name((charset_t)c1);
 144                         const char *n2 = charset_name((charset_t)c2);
 145                         if (conv_handles[c1][c2] &&
 146                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 147                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 148                                 continue;
 149
 150                         did_reload = True;
 151
 152                         if (conv_handles[c1][c2])
 153                                 smb_iconv_close(conv_handles[c1][c2]);
 154
 155                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 156                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 157                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 158                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 159                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
 160                                         n1 = "ASCII";
 161                                 }
 162                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
 163                                         n2 = "ASCII";
 164                                 }
 165                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 166                                         n1, n2 ));
 167                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 168                                 if (!conv_handles[c1][c2]) {
 169                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 170                                         smb_panic("init_iconv: conv_handle initialization failed");
 171                                 }
 172                         }
 173                 }
 174         }
 175
 176         if (did_reload) {
 177                 /* XXX: Does this really get called every time the dos
 178                  * codepage changes? */
 179                 /* XXX: Is the did_reload test too strict? */
 180                 conv_silent = True;
 181                 init_valid_table();
 182                 conv_silent = False;
 183         }
 184 }
 185
 186 /**
 187  * Convert string from one encoding to another, making error checking etc
 188  * Slow path version - uses (slow) iconv.
 189  *
 190  * @param src pointer to source string (multibyte or singlebyte)
 191  * @param srclen length of the source string in bytes
 192  * @param dest pointer to destination string (multibyte or singlebyte)
 193  * @param destlen maximal length allowed for string
 194  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 195  * @returns the number of bytes occupied in the destination
 196  *
 197  * Ensure the srclen contains the terminating zero.
 198  *
 199  **/
 200
 201 static size_t convert_string_internal(charset_t from, charset_t to,
 202                       void const *src, size_t srclen,
 203                       void *dest, size_t destlen, bool allow_bad_conv)
 204 {
 205         size_t i_len, o_len;
 206         size_t retval;
 207         const char* inbuf = (const char*)src;
 208         char* outbuf = (char*)dest;
 209         smb_iconv_t descriptor;
 210
 211         lazy_initialize_conv();
 212
 213         descriptor = conv_handles[from][to];
 214
 215         if (srclen == (size_t)-1) {
 216                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
 217                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 218                 } else {
 219                         srclen = strlen((const char *)src)+1;
 220                 }
 221         }
 222
 223
 224         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 225                 if (!conv_silent)
 226                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 227                 return (size_t)-1;
 228         }
 229
 230         i_len=srclen;
 231         o_len=destlen;
 232
 233  again:
 234
 235         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
 236         if(retval==(size_t)-1) {
 237                 const char *reason="unknown error";
 238                 switch(errno) {
 239                         case EINVAL:
 240                                 reason="Incomplete multibyte sequence";
 241                                 if (!conv_silent)
 242                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 243                                 if (allow_bad_conv)
 244                                         goto use_as_is;
 245                                 break;
 246                         case E2BIG:
 247                                 reason="No more room";
 248                                 if (!conv_silent) {
 249                                         if (from == CH_UNIX) {
 250                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
 251                                                         charset_name(from), charset_name(to),
 252                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 253                                         } else {
 254                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
 255                                                         charset_name(from), charset_name(to),
 256                                                         (unsigned int)srclen, (unsigned int)destlen));
 257                                         }
 258                                 }
 259                                 break;
 260                         case EILSEQ:
 261                                 reason="Illegal multibyte sequence";
 262                                 if (!conv_silent)
 263                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 264                                 if (allow_bad_conv)
 265                                         goto use_as_is;
 266                                 break;
 267                         default:
 268                                 if (!conv_silent)
 269                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 270                                 break;
 271                 }
 272                 /* smb_panic(reason); */
 273         }
 274         return destlen-o_len;
 275
 276  use_as_is:
 277
 278         /*
 279          * Conversion not supported. This is actually an error, but there are so
 280          * many misconfigured iconv systems and smb.conf's out there we can't just
 281          * fail. Do a very bad conversion instead.... JRA.
 282          */
 283
 284         {
 285                 if (o_len == 0 || i_len == 0)
 286                         return destlen - o_len;
 287
 288                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 289                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 290                         /* Can't convert from utf16 any endian to multibyte.
 291                            Replace with the default fail char.
 292                         */
 293                         if (i_len < 2)
 294                                 return destlen - o_len;
 295                         if (i_len >= 2) {
 296                                 *outbuf = lp_failed_convert_char();
 297
 298                                 outbuf++;
 299                                 o_len--;
 300
 301                                 inbuf += 2;
 302                                 i_len -= 2;
 303                         }
 304
 305                         if (o_len == 0 || i_len == 0)
 306                                 return destlen - o_len;
 307
 308                         /* Keep trying with the next char... */
 309                         goto again;
 310
 311                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 312                         /* Can't convert to UTF16LE - just widen by adding the
 313                            default fail char then zero.
 314                         */
 315                         if (o_len < 2)
 316                                 return destlen - o_len;
 317
 318                         outbuf[0] = lp_failed_convert_char();
 319                         outbuf[1] = '\0';
 320
 321                         inbuf++;
 322                         i_len--;
 323
 324                         outbuf += 2;
 325                         o_len -= 2;
 326
 327                         if (o_len == 0 || i_len == 0)
 328                                 return destlen - o_len;
 329
 330                         /* Keep trying with the next char... */
 331                         goto again;
 332
 333                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 334                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 335                         /* Failed multibyte to multibyte. Just copy the default fail char and
 336                                 try again. */
 337                         outbuf[0] = lp_failed_convert_char();
 338
 339                         inbuf++;
 340                         i_len--;
 341
 342                         outbuf++;
 343                         o_len--;
 344
 345                         if (o_len == 0 || i_len == 0)
 346                                 return destlen - o_len;
 347
 348                         /* Keep trying with the next char... */
 349                         goto again;
 350
 351                 } else {
 352                         /* Keep compiler happy.... */
 353                         return destlen - o_len;
 354                 }
 355         }
 356 }
 357
 358 /**
 359  * Convert string from one encoding to another, making error checking etc
 360  * Fast path version - handles ASCII first.
 361  *
 362  * @param src pointer to source string (multibyte or singlebyte)
 363  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 364  * @param dest pointer to destination string (multibyte or singlebyte)
 365  * @param destlen maximal length allowed for string - *NEVER* -1.
 366  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 367  * @returns the number of bytes occupied in the destination
 368  *
 369  * Ensure the srclen contains the terminating zero.
 370  *
 371  * This function has been hand-tuned to provide a fast path.
 372  * Don't change unless you really know what you are doing. JRA.
 373  **/
 374
 375 size_t convert_string(charset_t from, charset_t to,
 376                       void const *src, size_t srclen,
 377                       void *dest, size_t destlen, bool allow_bad_conv)
 378 {
 379         /*
 380          * NB. We deliberately don't do a strlen here if srclen == -1.
 381          * This is very expensive over millions of calls and is taken
 382          * care of in the slow path in convert_string_internal. JRA.
 383          */
 384
 385 #ifdef DEVELOPER
 386         SMB_ASSERT(destlen != (size_t)-1);
 387 #endif
 388
 389         if (srclen == 0)
 390                 return 0;
 391
 392         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
 393                 const unsigned char *p = (const unsigned char *)src;
 394                 unsigned char *q = (unsigned char *)dest;
 395                 size_t slen = srclen;
 396                 size_t dlen = destlen;
 397                 unsigned char lastp = '\0';
 398                 size_t retval = 0;
 399
 400                 /* If all characters are ascii, fast path here. */
 401                 while (slen && dlen) {
 402                         if ((lastp = *p) <= 0x7f) {
 403                                 *q++ = *p++;
 404                                 if (slen != (size_t)-1) {
 405                                         slen--;
 406                                 }
 407                                 dlen--;
 408                                 retval++;
 409                                 if (!lastp)
 410                                         break;
 411                         } else {
 412 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 413                                 goto general_case;
 414 #else
 415                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 416 #endif
 417                         }
 418                 }
 419                 if (!dlen) {
 420                         /* Even if we fast path we should note if we ran out of room. */
 421                         if (((slen != (size_t)-1) && slen) ||
 422                                         ((slen == (size_t)-1) && lastp)) {
 423                                 errno = E2BIG;
 424                         }
 425                 }
 426                 return retval;
 427         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
 428                 const unsigned char *p = (const unsigned char *)src;
 429                 unsigned char *q = (unsigned char *)dest;
 430                 size_t retval = 0;
 431                 size_t slen = srclen;
 432                 size_t dlen = destlen;
 433                 unsigned char lastp = '\0';
 434
 435                 /* If all characters are ascii, fast path here. */
 436                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 437                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 438                                 *q++ = *p;
 439                                 if (slen != (size_t)-1) {
 440                                         slen -= 2;
 441                                 }
 442                                 p += 2;
 443                                 dlen--;
 444                                 retval++;
 445                                 if (!lastp)
 446                                         break;
 447                         } else {
 448 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 449                                 goto general_case;
 450 #else
 451                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 452 #endif
 453                         }
 454                 }
 455                 if (!dlen) {
 456                         /* Even if we fast path we should note if we ran out of room. */
 457                         if (((slen != (size_t)-1) && slen) ||
 458                                         ((slen == (size_t)-1) && lastp)) {
 459                                 errno = E2BIG;
 460                         }
 461                 }
 462                 return retval;
 463         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 464                 const unsigned char *p = (const unsigned char *)src;
 465                 unsigned char *q = (unsigned char *)dest;
 466                 size_t retval = 0;
 467                 size_t slen = srclen;
 468                 size_t dlen = destlen;
 469                 unsigned char lastp = '\0';
 470
 471                 /* If all characters are ascii, fast path here. */
 472                 while (slen && (dlen >= 2)) {
 473                         if ((lastp = *p) <= 0x7F) {
 474                                 *q++ = *p++;
 475                                 *q++ = '\0';
 476                                 if (slen != (size_t)-1) {
 477                                         slen--;
 478                                 }
 479                                 dlen -= 2;
 480                                 retval += 2;
 481                                 if (!lastp)
 482                                         break;
 483                         } else {
 484 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 485                                 goto general_case;
 486 #else
 487                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 488 #endif
 489                         }
 490                 }
 491                 if (!dlen) {
 492                         /* Even if we fast path we should note if we ran out of room. */
 493                         if (((slen != (size_t)-1) && slen) ||
 494                                         ((slen == (size_t)-1) && lastp)) {
 495                                 errno = E2BIG;
 496                         }
 497                 }
 498                 return retval;
 499         }
 500
 501 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 502   general_case:
 503 #endif
 504         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 505 }
 506
 507 /**
 508  * Convert between character sets, allocating a new buffer for the result.
 509  *
 510  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 511  * (this is a bad interface and needs fixing. JRA).
 512  * @param srclen length of source buffer.
 513  * @param dest always set at least to NULL
 514  * @param converted_size set to the size of the allocated buffer on return
 515  * true
 516  * @note -1 is not accepted for srclen.
 517  *
 518  * @return true if new buffer was correctly allocated, and string was
 519  * converted.
 520  *
 521  * Ensure the srclen contains the terminating zero.
 522  *
 523  * I hate the goto's in this function. It's embarressing.....
 524  * There has to be a cleaner way to do this. JRA.
 525  **/
 526
 527 bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 528                              void const *src, size_t srclen, void *dst,
 529                              size_t *converted_size, bool allow_bad_conv)
 530 {
 531         size_t i_len, o_len, destlen = (srclen * 3) / 2;
 532         size_t retval;
 533         const char *inbuf = (const char *)src;
 534         char *outbuf = NULL, *ob = NULL;
 535         smb_iconv_t descriptor;
 536         void **dest = (void **)dst;
 537
 538         *dest = NULL;
 539
 540         if (!converted_size) {
 541                 errno = EINVAL;
 542                 return false;
 543         }
 544
 545         if (src == NULL || srclen == (size_t)-1) {
 546                 errno = EINVAL;
 547                 return false;
 548         }
 549         if (srclen == 0) {
 550                 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
 551                 if (ob == NULL) {
 552                         errno = ENOMEM;
 553                         return false;
 554                 }
 555                 *dest = ob;
 556                 *converted_size = 0;
 557                 return true;
 558         }
 559
 560         lazy_initialize_conv();
 561
 562         descriptor = conv_handles[from][to];
 563
 564         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 565                 if (!conv_silent)
 566                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 567                 errno = EOPNOTSUPP;
 568                 return false;
 569         }
 570
 571   convert:
 572
 573         /* +2 is for ucs2 null termination. */
 574         if ((destlen*2)+2 < destlen) {
 575                 /* wrapped ! abort. */
 576                 if (!conv_silent)
 577                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 578                 if (!ctx)
 579                         SAFE_FREE(outbuf);
 580                 errno = EOPNOTSUPP;
 581                 return false;
 582         } else {
 583                 destlen = destlen * 2;
 584         }
 585
 586         /* +2 is for ucs2 null termination. */
 587         if (ctx) {
 588                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
 589         } else {
 590                 ob = (char *)SMB_REALLOC(ob, destlen + 2);
 591         }
 592
 593         if (!ob) {
 594                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 595                 errno = ENOMEM;
 596                 return false;
 597         }
 598         outbuf = ob;
 599         i_len = srclen;
 600         o_len = destlen;
 601
 602  again:
 603
 604         retval = smb_iconv(descriptor,
 605                            &inbuf, &i_len,
 606                            &outbuf, &o_len);
 607         if(retval == (size_t)-1)                {
 608                 const char *reason="unknown error";
 609                 switch(errno) {
 610                         case EINVAL:
 611                                 reason="Incomplete multibyte sequence";
 612                                 if (!conv_silent)
 613                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 614                                 if (allow_bad_conv)
 615                                         goto use_as_is;
 616                                 break;
 617                         case E2BIG:
 618                                 goto convert;
 619                         case EILSEQ:
 620                                 reason="Illegal multibyte sequence";
 621                                 if (!conv_silent)
 622                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 623                                 if (allow_bad_conv)
 624                                         goto use_as_is;
 625                                 break;
 626                 }
 627                 if (!conv_silent)
 628                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 629                 /* smb_panic(reason); */
 630                 if (ctx) {
 631                         TALLOC_FREE(ob);
 632                 } else {
 633                         SAFE_FREE(ob);
 634                 }
 635                 return false;
 636         }
 637
 638   out:
 639
 640         destlen = destlen - o_len;
 641         /* Don't shrink unless we're reclaiming a lot of
 642          * space. This is in the hot codepath and these
 643          * reallocs *cost*. JRA.
 644          */
 645         if (o_len > 1024) {
 646                 /* We're shrinking here so we know the +2 is safe from wrap. */
 647                 if (ctx) {
 648                         ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
 649                 } else {
 650                         ob = (char *)SMB_REALLOC(ob,destlen + 2);
 651                 }
 652         }
 653
 654         if (destlen && !ob) {
 655                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 656                 errno = ENOMEM;
 657                 return false;
 658         }
 659
 660         *dest = ob;
 661
 662         /* Must ucs2 null terminate in the extra space we allocated. */
 663         ob[destlen] = '\0';
 664         ob[destlen+1] = '\0';
 665
 666         *converted_size = destlen;
 667         return true;
 668
 669  use_as_is:
 670
 671         /*
 672          * Conversion not supported. This is actually an error, but there are so
 673          * many misconfigured iconv systems and smb.conf's out there we can't just
 674          * fail. Do a very bad conversion instead.... JRA.
 675          */
 676
 677         {
 678                 if (o_len == 0 || i_len == 0)
 679                         goto out;
 680
 681                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
 682                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
 683                         /* Can't convert from utf16 any endian to multibyte.
 684                            Replace with the default fail char.
 685                         */
 686
 687                         if (i_len < 2)
 688                                 goto out;
 689
 690                         if (i_len >= 2) {
 691                                 *outbuf = lp_failed_convert_char();
 692
 693                                 outbuf++;
 694                                 o_len--;
 695
 696                                 inbuf += 2;
 697                                 i_len -= 2;
 698                         }
 699
 700                         if (o_len == 0 || i_len == 0)
 701                                 goto out;
 702
 703                         /* Keep trying with the next char... */
 704                         goto again;
 705
 706                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
 707                         /* Can't convert to UTF16LE - just widen by adding the
 708                            default fail char then zero.
 709                         */
 710                         if (o_len < 2)
 711                                 goto out;
 712
 713                         outbuf[0] = lp_failed_convert_char();
 714                         outbuf[1] = '\0';
 715
 716                         inbuf++;
 717                         i_len--;
 718
 719                         outbuf += 2;
 720                         o_len -= 2;
 721
 722                         if (o_len == 0 || i_len == 0)
 723                                 goto out;
 724
 725                         /* Keep trying with the next char... */
 726                         goto again;
 727
 728                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
 729                                 to != CH_UTF16LE && to != CH_UTF16BE) {
 730                         /* Failed multibyte to multibyte. Just copy the default fail char and
 731                            try again. */
 732                         outbuf[0] = lp_failed_convert_char();
 733
 734                         inbuf++;
 735                         i_len--;
 736
 737                         outbuf++;
 738                         o_len--;
 739
 740                         if (o_len == 0 || i_len == 0)
 741                                 goto out;
 742
 743                         /* Keep trying with the next char... */
 744                         goto again;
 745
 746                 } else {
 747                         /* Keep compiler happy.... */
 748                         goto out;
 749                 }
 750         }
 751 }
 752
 753 /**
 754  * Convert between character sets, allocating a new buffer using talloc for the result.
 755  *
 756  * @param srclen length of source buffer.
 757  * @param dest always set at least to NULL
 758  * @parm converted_size set to the number of bytes occupied by the string in
 759  * the destination on success.
 760  * @note -1 is not accepted for srclen.
 761  *
 762  * @return true if new buffer was correctly allocated, and string was
 763  * converted.
 764  */
 765 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 766                            void const *src, size_t srclen, void *dst,
 767                            size_t *converted_size, bool allow_bad_conv)
 768 {
 769         void **dest = (void **)dst;
 770
 771         *dest = NULL;
 772         return convert_string_allocate(ctx, from, to, src, srclen, dest,
 773                                        converted_size, allow_bad_conv);
 774 }
 775
 776 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 777 {
 778         size_t size;
 779         smb_ucs2_t *buffer;
 780
 781         if (!push_ucs2_allocate(&buffer, src, &size)) {
 782                 return (size_t)-1;
 783         }
 784
 785         if (!strupper_w(buffer) && (dest == src)) {
 786                 free(buffer);
 787                 return srclen;
 788         }
 789
 790         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 791         free(buffer);
 792         return size;
 793 }
 794
 795 /**
 796  strdup() a unix string to upper case.
 797 **/
 798
 799 char *strdup_upper(const char *s)
 800 {
 801         char *out_buffer = SMB_STRDUP(s);
 802         const unsigned char *p = (const unsigned char *)s;
 803         unsigned char *q = (unsigned char *)out_buffer;
 804
 805         if (!q) {
 806                 return NULL;
 807         }
 808
 809         /* this is quite a common operation, so we want it to be
 810            fast. We optimise for the ascii case, knowing that all our
 811            supported multi-byte character sets are ascii-compatible
 812            (ie. they match for the first 128 chars) */
 813
 814         while (*p) {
 815                 if (*p & 0x80)
 816                         break;
 817                 *q++ = toupper_ascii_fast(*p);
 818                 p++;
 819         }
 820
 821         if (*p) {
 822                 /* MB case. */
 823                 size_t converted_size, converted_size2;
 824                 smb_ucs2_t *buffer = NULL;
 825
 826                 SAFE_FREE(out_buffer);
 827                 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
 828                                              strlen(s) + 1,
 829                                              (void **)(void *)&buffer,
 830                                              &converted_size, True))
 831                 {
 832                         return NULL;
 833                 }
 834
 835                 strupper_w(buffer);
 836
 837                 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
 838                                              converted_size,
 839                                              (void **)(void *)&out_buffer,
 840                                              &converted_size2, True))
 841                 {
 842                         TALLOC_FREE(buffer);
 843                         return NULL;
 844                 }
 845
 846                 /* Don't need the intermediate buffer
 847                  * anymore.
 848                  */
 849                 TALLOC_FREE(buffer);
 850         }
 851
 852         return out_buffer;
 853 }
 854
 855 /**
 856  talloc_strdup() a unix string to upper case.
 857 **/
 858
 859 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
 860 {
 861         char *out_buffer = talloc_strdup(ctx,s);
 862         const unsigned char *p = (const unsigned char *)s;
 863         unsigned char *q = (unsigned char *)out_buffer;
 864
 865         if (!q) {
 866                 return NULL;
 867         }
 868
 869         /* this is quite a common operation, so we want it to be
 870            fast. We optimise for the ascii case, knowing that all our
 871            supported multi-byte character sets are ascii-compatible
 872            (ie. they match for the first 128 chars) */
 873
 874         while (*p) {
 875                 if (*p & 0x80)
 876                         break;
 877                 *q++ = toupper_ascii_fast(*p);
 878                 p++;
 879         }
 880
 881         if (*p) {
 882                 /* MB case. */
 883                 size_t converted_size, converted_size2;
 884                 smb_ucs2_t *ubuf = NULL;
 885
 886                 /* We're not using the ascii buffer above. */
 887                 TALLOC_FREE(out_buffer);
 888
 889                 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
 890                                            strlen(s)+1, (void *)&ubuf,
 891                                            &converted_size, True))
 892                 {
 893                         return NULL;
 894                 }
 895
 896                 strupper_w(ubuf);
 897
 898                 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
 899                                            converted_size, (void *)&out_buffer,
 900                                            &converted_size2, True))
 901                 {
 902                         TALLOC_FREE(ubuf);
 903                         return NULL;
 904                 }
 905
 906                 /* Don't need the intermediate buffer
 907                  * anymore.
 908                  */
 909                 TALLOC_FREE(ubuf);
 910         }
 911
 912         return out_buffer;
 913 }
 914
 915 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 916 {
 917         size_t size;
 918         smb_ucs2_t *buffer = NULL;
 919
 920         if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
 921                                      (void **)(void *)&buffer, &size,
 922                                      True))
 923         {
 924                 smb_panic("failed to create UCS2 buffer");
 925         }
 926         if (!strlower_w(buffer) && (dest == src)) {
 927                 SAFE_FREE(buffer);
 928                 return srclen;
 929         }
 930         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
 931         SAFE_FREE(buffer);
 932         return size;
 933 }
 934
 935 /**
 936  strdup() a unix string to lower case.
 937 **/
 938
 939 char *strdup_lower(const char *s)
 940 {
 941         size_t converted_size;
 942         smb_ucs2_t *buffer = NULL;
 943         char *out_buffer;
 944
 945         if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
 946                 return NULL;
 947         }
 948
 949         strlower_w(buffer);
 950
 951         if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
 952                 SAFE_FREE(buffer);
 953                 return NULL;
 954         }
 955
 956         SAFE_FREE(buffer);
 957
 958         return out_buffer;
 959 }
 960
 961 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
 962 {
 963         size_t converted_size;
 964         smb_ucs2_t *buffer = NULL;
 965         char *out_buffer;
 966
 967         if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
 968                 return NULL;
 969         }
 970
 971         strlower_w(buffer);
 972
 973         if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
 974                 TALLOC_FREE(buffer);
 975                 return NULL;
 976         }
 977
 978         TALLOC_FREE(buffer);
 979
 980         return out_buffer;
 981 }
 982
 983
 984 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 985 {
 986         if (flags & (STR_NOALIGN|STR_ASCII))
 987                 return 0;
 988         return PTR_DIFF(p, base_ptr) & 1;
 989 }
 990
 991
 992 /**
 993  * Copy a string from a char* unix src to a dos codepage string destination.
 994  *
 995  * @return the number of bytes occupied by the string in the destination.
 996  *
 997  * @param flags can include
 998  * <dl>
 999  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1000  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1001  * </dl>
1002  *
1003  * @param dest_len the maximum length in bytes allowed in the
1004  * destination.
1005  **/
1006 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1007 {
1008         size_t src_len = strlen(src);
1009         char *tmpbuf = NULL;
1010         size_t ret;
1011
1012         /* No longer allow a length of -1. */
1013         if (dest_len == (size_t)-1) {
1014                 smb_panic("push_ascii - dest_len == -1");
1015         }
1016
1017         if (flags & STR_UPPER) {
1018                 tmpbuf = SMB_STRDUP(src);
1019                 if (!tmpbuf) {
1020                         smb_panic("malloc fail");
1021                 }
1022                 strupper_m(tmpbuf);
1023                 src = tmpbuf;
1024         }
1025
1026         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1027                 src_len++;
1028         }
1029
1030         ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1031         if (ret == (size_t)-1 &&
1032                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1033                         && dest_len > 0) {
1034                 ((char *)dest)[0] = '\0';
1035         }
1036         SAFE_FREE(tmpbuf);
1037         return ret;
1038 }
1039
1040 size_t push_ascii_fstring(void *dest, const char *src)
1041 {
1042         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1043 }
1044
1045 /********************************************************************
1046  Push an nstring - ensure null terminated. Written by
1047  moriyama@miraclelinux.com (MORIYAMA Masayuki).
1048 ********************************************************************/
1049
1050 size_t push_ascii_nstring(void *dest, const char *src)
1051 {
1052         size_t i, buffer_len, dest_len;
1053         smb_ucs2_t *buffer;
1054
1055         conv_silent = True;
1056         if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1057                 smb_panic("failed to create UCS2 buffer");
1058         }
1059
1060         /* We're using buffer_len below to count ucs2 characters, not bytes. */
1061         buffer_len /= sizeof(smb_ucs2_t);
1062
1063         dest_len = 0;
1064         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1065                 unsigned char mb[10];
1066                 /* Convert one smb_ucs2_t character at a time. */
1067                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1068                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1069                         memcpy((char *)dest + dest_len, mb, mb_len);
1070                         dest_len += mb_len;
1071                 } else {
1072                         errno = E2BIG;
1073                         break;
1074                 }
1075         }
1076         ((char *)dest)[dest_len] = '\0';
1077
1078         SAFE_FREE(buffer);
1079         conv_silent = False;
1080         return dest_len;
1081 }
1082
1083 /********************************************************************
1084  Push and malloc an ascii string. src and dest null terminated.
1085 ********************************************************************/
1086
1087 bool push_ascii_allocate(char **dest, const char *src, size_t *converted_size)
1088 {
1089         size_t src_len = strlen(src)+1;
1090
1091         *dest = NULL;
1092         return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1093                                        (void **)dest, converted_size, True);
1094 }
1095
1096 /**
1097  * Copy a string from a dos codepage source to a unix char* destination.
1098  *
1099  * The resulting string in "dest" is always null terminated.
1100  *
1101  * @param flags can have:
1102  * <dl>
1103  * <dt>STR_TERMINATE</dt>
1104  * <dd>STR_TERMINATE means the string in @p src
1105  * is null terminated, and src_len is ignored.</dd>
1106  * </dl>
1107  *
1108  * @param src_len is the length of the source area in bytes.
1109  * @returns the number of bytes occupied by the string in @p src.
1110  **/
1111 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1112 {
1113         size_t ret;
1114
1115         if (dest_len == (size_t)-1) {
1116                 /* No longer allow dest_len of -1. */
1117                 smb_panic("pull_ascii - invalid dest_len of -1");
1118         }
1119
1120         if (flags & STR_TERMINATE) {
1121                 if (src_len == (size_t)-1) {
1122                         src_len = strlen((const char *)src) + 1;
1123                 } else {
1124                         size_t len = strnlen((const char *)src, src_len);
1125                         if (len < src_len)
1126                                 len++;
1127                         src_len = len;
1128                 }
1129         }
1130
1131         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1132         if (ret == (size_t)-1) {
1133                 ret = 0;
1134                 dest_len = 0;
1135         }
1136
1137         if (dest_len && ret) {
1138                 /* Did we already process the terminating zero ? */
1139                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1140                         dest[MIN(ret, dest_len-1)] = 0;
1141                 }
1142         } else  {
1143                 dest[0] = 0;
1144         }
1145
1146         return src_len;
1147 }
1148
1149 /**
1150  * Copy a string from a dos codepage source to a unix char* destination.
1151  Talloc version.
1152  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1153  needs fixing. JRA).
1154  *
1155  * The resulting string in "dest" is always null terminated.
1156  *
1157  * @param flags can have:
1158  * <dl>
1159  * <dt>STR_TERMINATE</dt>
1160  * <dd>STR_TERMINATE means the string in @p src
1161  * is null terminated, and src_len is ignored.</dd>
1162  * </dl>
1163  *
1164  * @param src_len is the length of the source area in bytes.
1165  * @returns the number of bytes occupied by the string in @p src.
1166  **/
1167
1168 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1169                                         char **ppdest,
1170                                         const void *src,
1171                                         size_t src_len,
1172                                         int flags)
1173 {
1174         char *dest = NULL;
1175         size_t dest_len;
1176
1177 #ifdef DEVELOPER
1178         /* Ensure we never use the braindead "malloc" varient. */
1179         if (ctx == NULL) {
1180                 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1181         }
1182 #endif
1183
1184         *ppdest = NULL;
1185
1186         if (!src_len) {
1187                 return 0;
1188         }
1189
1190         if (flags & STR_TERMINATE) {
1191                 if (src_len == (size_t)-1) {
1192                         src_len = strlen((const char *)src) + 1;
1193                 } else {
1194                         size_t len = strnlen((const char *)src, src_len);
1195                         if (len < src_len)
1196                                 len++;
1197                         src_len = len;
1198                 }
1199                 /* Ensure we don't use an insane length from the client. */
1200                 if (src_len >= 1024*1024) {
1201                         char *msg = talloc_asprintf(ctx,
1202                                         "Bad src length (%u) in "
1203                                         "pull_ascii_base_talloc",
1204                                         (unsigned int)src_len);
1205                         smb_panic(msg);
1206                 }
1207         } else {
1208                 /* Can't have an unlimited length
1209                  * non STR_TERMINATE'd.
1210                  */
1211                 if (src_len == (size_t)-1) {
1212                         errno = EINVAL;
1213                         return 0;
1214                 }
1215         }
1216
1217         /* src_len != -1 here. */
1218
1219         if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1220                                      &dest_len, True)) {
1221                 dest_len = 0;
1222         }
1223
1224         if (dest_len && dest) {
1225                 /* Did we already process the terminating zero ? */
1226                 if (dest[dest_len-1] != 0) {
1227                         size_t size = talloc_get_size(dest);
1228                         /* Have we got space to append the '\0' ? */
1229                         if (size <= dest_len) {
1230                                 /* No, realloc. */
1231                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1232                                                 dest_len+1);
1233                                 if (!dest) {
1234                                         /* talloc fail. */
1235                                         dest_len = (size_t)-1;
1236                                         return 0;
1237                                 }
1238                         }
1239                         /* Yay - space ! */
1240                         dest[dest_len] = '\0';
1241                         dest_len++;
1242                 }
1243         } else if (dest) {
1244                 dest[0] = 0;
1245         }
1246
1247         *ppdest = dest;
1248         return src_len;
1249 }
1250
1251 size_t pull_ascii_fstring(char *dest, const void *src)
1252 {
1253         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1254 }
1255
1256 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1257
1258 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1259 {
1260         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1261 }
1262
1263 /**
1264  * Copy a string from a char* src to a unicode destination.
1265  *
1266  * @returns the number of bytes occupied by the string in the destination.
1267  *
1268  * @param flags can have:
1269  *
1270  * <dl>
1271  * <dt>STR_TERMINATE <dd>means include the null termination.
1272  * <dt>STR_UPPER     <dd>means uppercase in the destination.
1273  * <dt>STR_NOALIGN   <dd>means don't do alignment.
1274  * </dl>
1275  *
1276  * @param dest_len is the maximum length allowed in the
1277  * destination.
1278  **/
1279
1280 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1281 {
1282         size_t len=0;
1283         size_t src_len;
1284         size_t ret;
1285
1286         if (dest_len == (size_t)-1) {
1287                 /* No longer allow dest_len of -1. */
1288                 smb_panic("push_ucs2 - invalid dest_len of -1");
1289         }
1290
1291         if (flags & STR_TERMINATE)
1292                 src_len = (size_t)-1;
1293         else
1294                 src_len = strlen(src);
1295
1296         if (ucs2_align(base_ptr, dest, flags)) {
1297                 *(char *)dest = 0;
1298                 dest = (void *)((char *)dest + 1);
1299                 if (dest_len)
1300                         dest_len--;
1301                 len++;
1302         }
1303
1304         /* ucs2 is always a multiple of 2 bytes */
1305         dest_len &= ~1;
1306
1307         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1308         if (ret == (size_t)-1) {
1309                 if ((flags & STR_TERMINATE) &&
1310                                 dest &&
1311                                 dest_len) {
1312                         *(char *)dest = 0;
1313                 }
1314                 return len;
1315         }
1316
1317         len += ret;
1318
1319         if (flags & STR_UPPER) {
1320                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1321                 size_t i;
1322
1323                 /* We check for i < (ret / 2) below as the dest string isn't null
1324                    terminated if STR_TERMINATE isn't set. */
1325
1326                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1327                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1328                         if (v != dest_ucs2[i]) {
1329                                 dest_ucs2[i] = v;
1330                         }
1331                 }
1332         }
1333
1334         return len;
1335 }
1336
1337
1338 /**
1339  * Copy a string from a unix char* src to a UCS2 destination,
1340  * allocating a buffer using talloc().
1341  *
1342  * @param dest always set at least to NULL
1343  * @parm converted_size set to the number of bytes occupied by the string in
1344  * the destination on success.
1345  *
1346  * @return true if new buffer was correctly allocated, and string was
1347  * converted.
1348  **/
1349 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1350                       size_t *converted_size)
1351 {
1352         size_t src_len = strlen(src)+1;
1353
1354         *dest = NULL;
1355         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1356                                      (void **)dest, converted_size, True);
1357 }
1358
1359
1360 /**
1361  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1362  *
1363  * @param dest always set at least to NULL
1364  * @parm converted_size set to the number of bytes occupied by the string in
1365  * the destination on success.
1366  *
1367  * @return true if new buffer was correctly allocated, and string was
1368  * converted.
1369  **/
1370
1371 bool push_ucs2_allocate(smb_ucs2_t **dest, const char *src,
1372                         size_t *converted_size)
1373 {
1374         size_t src_len = strlen(src)+1;
1375
1376         *dest = NULL;
1377         return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1378                                        (void **)dest, converted_size, True);
1379 }
1380
1381 /**
1382  Copy a string from a char* src to a UTF-8 destination.
1383  Return the number of bytes occupied by the string in the destination
1384  Flags can have:
1385   STR_TERMINATE means include the null termination
1386   STR_UPPER     means uppercase in the destination
1387  dest_len is the maximum length allowed in the destination. If dest_len
1388  is -1 then no maxiumum is used.
1389 **/
1390
1391 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1392 {
1393         size_t src_len = 0;
1394         size_t ret;
1395         char *tmpbuf = NULL;
1396
1397         if (dest_len == (size_t)-1) {
1398                 /* No longer allow dest_len of -1. */
1399                 smb_panic("push_utf8 - invalid dest_len of -1");
1400         }
1401
1402         if (flags & STR_UPPER) {
1403                 tmpbuf = strdup_upper(src);
1404                 if (!tmpbuf) {
1405                         return (size_t)-1;
1406                 }
1407                 src = tmpbuf;
1408                 src_len = strlen(src);
1409         }
1410
1411         src_len = strlen(src);
1412         if (flags & STR_TERMINATE) {
1413                 src_len++;
1414         }
1415
1416         ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1417         SAFE_FREE(tmpbuf);
1418         return ret;
1419 }
1420
1421 size_t push_utf8_fstring(void *dest, const char *src)
1422 {
1423         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1424 }
1425
1426 /**
1427  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1428  *
1429  * @param dest always set at least to NULL
1430  * @parm converted_size set to the number of bytes occupied by the string in
1431  * the destination on success.
1432  *
1433  * @return true if new buffer was correctly allocated, and string was
1434  * converted.
1435  **/
1436
1437 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1438                       size_t *converted_size)
1439 {
1440         size_t src_len = strlen(src)+1;
1441
1442         *dest = NULL;
1443         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1444                                      (void**)dest, converted_size, True);
1445 }
1446
1447 /**
1448  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1449  *
1450  * @param dest always set at least to NULL
1451  * @parm converted_size set to the number of bytes occupied by the string in
1452  * the destination on success.
1453  *
1454  * @return true if new buffer was correctly allocated, and string was
1455  * converted.
1456  **/
1457
1458 bool push_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1459 {
1460         size_t src_len = strlen(src)+1;
1461
1462         *dest = NULL;
1463         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1464                                        (void **)dest, converted_size, True);
1465 }
1466
1467 /**
1468  Copy a string from a ucs2 source to a unix char* destination.
1469  Flags can have:
1470   STR_TERMINATE means the string in src is null terminated.
1471   STR_NOALIGN   means don't try to align.
1472  if STR_TERMINATE is set then src_len is ignored if it is -1.
1473  src_len is the length of the source area in bytes
1474  Return the number of bytes occupied by the string in src.
1475  The resulting string in "dest" is always null terminated.
1476 **/
1477
1478 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1479 {
1480         size_t ret;
1481
1482         if (dest_len == (size_t)-1) {
1483                 /* No longer allow dest_len of -1. */
1484                 smb_panic("pull_ucs2 - invalid dest_len of -1");
1485         }
1486
1487         if (!src_len) {
1488                 if (dest && dest_len > 0) {
1489                         dest[0] = '\0';
1490                 }
1491                 return 0;
1492         }
1493
1494         if (ucs2_align(base_ptr, src, flags)) {
1495                 src = (const void *)((const char *)src + 1);
1496                 if (src_len != (size_t)-1)
1497                         src_len--;
1498         }
1499
1500         if (flags & STR_TERMINATE) {
1501                 /* src_len -1 is the default for null terminated strings. */
1502                 if (src_len != (size_t)-1) {
1503                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1504                                                 src_len/2);
1505                         if (len < src_len/2)
1506                                 len++;
1507                         src_len = len*2;
1508                 }
1509         }
1510
1511         /* ucs2 is always a multiple of 2 bytes */
1512         if (src_len != (size_t)-1)
1513                 src_len &= ~1;
1514
1515         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1516         if (ret == (size_t)-1) {
1517                 ret = 0;
1518                 dest_len = 0;
1519         }
1520
1521         if (src_len == (size_t)-1)
1522                 src_len = ret*2;
1523
1524         if (dest_len && ret) {
1525                 /* Did we already process the terminating zero ? */
1526                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1527                         dest[MIN(ret, dest_len-1)] = 0;
1528                 }
1529         } else {
1530                 dest[0] = 0;
1531         }
1532
1533         return src_len;
1534 }
1535
1536 /**
1537  Copy a string from a ucs2 source to a unix char* destination.
1538  Talloc version with a base pointer.
1539  Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1540  needs fixing. JRA).
1541  Flags can have:
1542   STR_TERMINATE means the string in src is null terminated.
1543   STR_NOALIGN   means don't try to align.
1544  if STR_TERMINATE is set then src_len is ignored if it is -1.
1545  src_len is the length of the source area in bytes
1546  Return the number of bytes occupied by the string in src.
1547  The resulting string in "dest" is always null terminated.
1548 **/
1549
1550 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1551                         const void *base_ptr,
1552                         char **ppdest,
1553                         const void *src,
1554                         size_t src_len,
1555                         int flags)
1556 {
1557         char *dest;
1558         size_t dest_len;
1559
1560         *ppdest = NULL;
1561
1562 #ifdef DEVELOPER
1563         /* Ensure we never use the braindead "malloc" varient. */
1564         if (ctx == NULL) {
1565                 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1566         }
1567 #endif
1568
1569         if (!src_len) {
1570                 return 0;
1571         }
1572
1573         if (ucs2_align(base_ptr, src, flags)) {
1574                 src = (const void *)((const char *)src + 1);
1575                 if (src_len != (size_t)-1)
1576                         src_len--;
1577         }
1578
1579         if (flags & STR_TERMINATE) {
1580                 /* src_len -1 is the default for null terminated strings. */
1581                 if (src_len != (size_t)-1) {
1582                         size_t len = strnlen_w((const smb_ucs2_t *)src,
1583                                                 src_len/2);
1584                         if (len < src_len/2)
1585                                 len++;
1586                         src_len = len*2;
1587                 } else {
1588                         /*
1589                          * src_len == -1 - alloc interface won't take this
1590                          * so we must calculate.
1591                          */
1592                         src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1593                 }
1594                 /* Ensure we don't use an insane length from the client. */
1595                 if (src_len >= 1024*1024) {
1596                         smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1597                 }
1598         } else {
1599                 /* Can't have an unlimited length
1600                  * non STR_TERMINATE'd.
1601                  */
1602                 if (src_len == (size_t)-1) {
1603                         errno = EINVAL;
1604                         return 0;
1605                 }
1606         }
1607
1608         /* src_len != -1 here. */
1609
1610         /* ucs2 is always a multiple of 2 bytes */
1611         src_len &= ~1;
1612
1613         if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1614                                    (void *)&dest, &dest_len, True)) {
1615                 dest_len = 0;
1616         }
1617
1618         if (dest_len) {
1619                 /* Did we already process the terminating zero ? */
1620                 if (dest[dest_len-1] != 0) {
1621                         size_t size = talloc_get_size(dest);
1622                         /* Have we got space to append the '\0' ? */
1623                         if (size <= dest_len) {
1624                                 /* No, realloc. */
1625                                 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1626                                                 dest_len+1);
1627                                 if (!dest) {
1628                                         /* talloc fail. */
1629                                         dest_len = (size_t)-1;
1630                                         return 0;
1631                                 }
1632                         }
1633                         /* Yay - space ! */
1634                         dest[dest_len] = '\0';
1635                         dest_len++;
1636                 }
1637         } else if (dest) {
1638                 dest[0] = 0;
1639         }
1640
1641         *ppdest = dest;
1642         return src_len;
1643 }
1644
1645 size_t pull_ucs2_fstring(char *dest, const void *src)
1646 {
1647         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1648 }
1649
1650 /**
1651  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1652  *
1653  * @param dest always set at least to NULL
1654  * @parm converted_size set to the number of bytes occupied by the string in
1655  * the destination on success.
1656  *
1657  * @return true if new buffer was correctly allocated, and string was
1658  * converted.
1659  **/
1660
1661 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1662                       size_t *converted_size)
1663 {
1664         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1665
1666         *dest = NULL;
1667         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1668                                      (void **)dest, converted_size, True);
1669 }
1670
1671 /**
1672  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1673  *
1674  * @param dest always set at least to NULL
1675  * @parm converted_size set to the number of bytes occupied by the string in
1676  * the destination on success.
1677  * @return true if new buffer was correctly allocated, and string was
1678  * converted.
1679  **/
1680
1681 bool pull_ucs2_allocate(char **dest, const smb_ucs2_t *src,
1682                         size_t *converted_size)
1683 {
1684         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1685
1686         *dest = NULL;
1687         return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1688                                        (void **)dest, converted_size, True);
1689 }
1690
1691 /**
1692  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1693  *
1694  * @param dest always set at least to NULL
1695  * @parm converted_size set to the number of bytes occupied by the string in
1696  * the destination on success.
1697  *
1698  * @return true if new buffer was correctly allocated, and string was
1699  * converted.
1700  **/
1701
1702 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1703                       size_t *converted_size)
1704 {
1705         size_t src_len = strlen(src)+1;
1706
1707         *dest = NULL;
1708         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1709                                      (void **)dest, converted_size, True);
1710 }
1711
1712 /**
1713  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1714  *
1715  * @param dest always set at least to NULL
1716  * @parm converted_size set to the number of bytes occupied by the string in
1717  * the destination on success.
1718  *
1719  * @return true if new buffer was correctly allocated, and string was
1720  * converted.
1721  **/
1722
1723 bool pull_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1724 {
1725         size_t src_len = strlen(src)+1;
1726
1727         *dest = NULL;
1728         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1729                                        (void **)dest, converted_size, True);
1730 }
1731
1732 /**
1733  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1734  *
1735  * @param dest always set at least to NULL
1736  * @parm converted_size set to the number of bytes occupied by the string in
1737  * the destination on success.
1738  *
1739  * @return true if new buffer was correctly allocated, and string was
1740  * converted.
1741  **/
1742
1743 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1744                        size_t *converted_size)
1745 {
1746         size_t src_len = strlen(src)+1;
1747
1748         *dest = NULL;
1749         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1750                                      (void **)dest, converted_size, True);
1751 }
1752
1753 /**
1754  Copy a string from a char* src to a unicode or ascii
1755  dos codepage destination choosing unicode or ascii based on the
1756  flags in the SMB buffer starting at base_ptr.
1757  Return the number of bytes occupied by the string in the destination.
1758  flags can have:
1759   STR_TERMINATE means include the null termination.
1760   STR_UPPER     means uppercase in the destination.
1761   STR_ASCII     use ascii even with unicode packet.
1762   STR_NOALIGN   means don't do alignment.
1763  dest_len is the maximum length allowed in the destination. If dest_len
1764  is -1 then no maxiumum is used.
1765 **/
1766
1767 size_t push_string_fn(const char *function, unsigned int line,
1768                       const void *base_ptr, uint16 flags2,
1769                       void *dest, const char *src,
1770                       size_t dest_len, int flags)
1771 {
1772 #ifdef DEVELOPER
1773         /* We really need to zero fill here, not clobber
1774          * region, as we want to ensure that valgrind thinks
1775          * all of the outgoing buffer has been written to
1776          * so a send() or write() won't trap an error.
1777          * JRA.
1778          */
1779 #if 0
1780         clobber_region(function, line, dest, dest_len);
1781 #else
1782         memset(dest, '\0', dest_len);
1783 #endif
1784 #endif
1785
1786         if (!(flags & STR_ASCII) && \
1787             ((flags & STR_UNICODE || \
1788               (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1789                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1790         }
1791         return push_ascii(dest, src, dest_len, flags);
1792 }
1793
1794
1795 /**
1796  Copy a string from a unicode or ascii source (depending on
1797  the packet flags) to a char* destination.
1798  Flags can have:
1799   STR_TERMINATE means the string in src is null terminated.
1800   STR_UNICODE   means to force as unicode.
1801   STR_ASCII     use ascii even with unicode packet.
1802   STR_NOALIGN   means don't do alignment.
1803  if STR_TERMINATE is set then src_len is ignored is it is -1
1804  src_len is the length of the source area in bytes.
1805  Return the number of bytes occupied by the string in src.
1806  The resulting string in "dest" is always null terminated.
1807 **/
1808
1809 size_t pull_string_fn(const char *function,
1810                         unsigned int line,
1811                         const void *base_ptr,
1812                         uint16 smb_flags2,
1813                         char *dest,
1814                         const void *src,
1815                         size_t dest_len,
1816                         size_t src_len,
1817                         int flags)
1818 {
1819 #ifdef DEVELOPER
1820         clobber_region(function, line, dest, dest_len);
1821 #endif
1822
1823         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1824                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1825                           "UNICODE defined");
1826         }
1827
1828         if (!(flags & STR_ASCII) && \
1829             ((flags & STR_UNICODE || \
1830               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1831                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1832         }
1833         return pull_ascii(dest, src, dest_len, src_len, flags);
1834 }
1835
1836 /**
1837  Copy a string from a unicode or ascii source (depending on
1838  the packet flags) to a char* destination.
1839  Variant that uses talloc.
1840  Flags can have:
1841   STR_TERMINATE means the string in src is null terminated.
1842   STR_UNICODE   means to force as unicode.
1843   STR_ASCII     use ascii even with unicode packet.
1844   STR_NOALIGN   means don't do alignment.
1845  if STR_TERMINATE is set then src_len is ignored is it is -1
1846  src_len is the length of the source area in bytes.
1847  Return the number of bytes occupied by the string in src.
1848  The resulting string in "dest" is always null terminated.
1849 **/
1850
1851 size_t pull_string_talloc_fn(const char *function,
1852                         unsigned int line,
1853                         TALLOC_CTX *ctx,
1854                         const void *base_ptr,
1855                         uint16 smb_flags2,
1856                         char **ppdest,
1857                         const void *src,
1858                         size_t src_len,
1859                         int flags)
1860 {
1861         if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1862                 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1863                           "UNICODE defined");
1864         }
1865
1866         if (!(flags & STR_ASCII) && \
1867             ((flags & STR_UNICODE || \
1868               (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1869                 return pull_ucs2_base_talloc(ctx,
1870                                         base_ptr,
1871                                         ppdest,
1872                                         src,
1873                                         src_len,
1874                                         flags);
1875         }
1876         return pull_ascii_base_talloc(ctx,
1877                                         ppdest,
1878                                         src,
1879                                         src_len,
1880                                         flags);
1881 }
1882
1883
1884 size_t align_string(const void *base_ptr, const char *p, int flags)
1885 {
1886         if (!(flags & STR_ASCII) && \
1887             ((flags & STR_UNICODE || \
1888               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1889                 return ucs2_align(base_ptr, p, flags);
1890         }
1891         return 0;
1892 }
1893
1894 /*
1895   Return the unicode codepoint for the next multi-byte CH_UNIX character
1896   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1897
1898   Also return the number of bytes consumed (which tells the caller
1899   how many bytes to skip to get to the next CH_UNIX character).
1900
1901   Return INVALID_CODEPOINT if the next character cannot be converted.
1902 */
1903
1904 codepoint_t next_codepoint(const char *str, size_t *size)
1905 {
1906         /* It cannot occupy more than 4 bytes in UTF16 format */
1907         uint8_t buf[4];
1908         smb_iconv_t descriptor;
1909         size_t ilen_orig;
1910         size_t ilen;
1911         size_t olen;
1912         char *outbuf;
1913
1914         if ((str[0] & 0x80) == 0) {
1915                 *size = 1;
1916                 return (codepoint_t)str[0];
1917         }
1918
1919         /* We assume that no multi-byte character can take
1920            more than 5 bytes. This is OK as we only
1921            support codepoints up to 1M */
1922
1923         ilen_orig = strnlen(str, 5);
1924         ilen = ilen_orig;
1925
1926         lazy_initialize_conv();
1927
1928         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1929         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1930                 *size = 1;
1931                 return INVALID_CODEPOINT;
1932         }
1933
1934         /* This looks a little strange, but it is needed to cope
1935            with codepoints above 64k which are encoded as per RFC2781. */
1936         olen = 2;
1937         outbuf = (char *)buf;
1938         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1939         if (olen == 2) {
1940                 /* We failed to convert to a 2 byte character.
1941                    See if we can convert to a 4 UTF16-LE byte char encoding.
1942                 */
1943                 olen = 4;
1944                 outbuf = (char *)buf;
1945                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1946                 if (olen == 4) {
1947                         /* We didn't convert any bytes */
1948                         *size = 1;
1949                         return INVALID_CODEPOINT;
1950                 }
1951                 olen = 4 - olen;
1952         } else {
1953                 olen = 2 - olen;
1954         }
1955
1956         *size = ilen_orig - ilen;
1957
1958         if (olen == 2) {
1959                 /* 2 byte, UTF16-LE encoded value. */
1960                 return (codepoint_t)SVAL(buf, 0);
1961         }
1962         if (olen == 4) {
1963                 /* Decode a 4 byte UTF16-LE character manually.
1964                    See RFC2871 for the encoding machanism.
1965                 */
1966                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1967                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1968
1969                 return (codepoint_t)0x10000 +
1970                                 (w1 << 10) + w2;
1971         }
1972
1973         /* no other length is valid */
1974         return INVALID_CODEPOINT;
1975 }