source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #include "includes.h"
  25
  26 /**
  27  * @file
  28  *
  29  * @brief Character-set conversion routines built on our iconv.
  30  *
  31  * @note Samba's internal character set (at least in the 3.0 series)
  32  * is always the same as the one for the Unix filesystem.  It is
  33  * <b>not</b> necessarily UTF-8 and may be different on machines that
  34  * need i18n filenames to be compatible with Unix software.  It does
  35  * have to be a superset of ASCII.  All multibyte sequences must start
  36  * with a byte with the high bit set.
  37  *
  38  * @sa lib/iconv.c
  39  */
  40
  41
  42 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  43 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
  44 /* Unsafe unix charsets which could contain '\\' as second byte of mb character */
  45 static const char *conv_unsafe_charsets[] = {
  46             "CP932",
  47             "EUC-JP",
  48             NULL};
  49 /* Global variable which is set to True in init_iconv() if unix charset is unsafe
  50    w.r.t. '\\' in second byte of mb character. Otherwise it is set to False.
  51 */
  52 BOOL is_unix_charset_unsafe;
  53
  54 /**
  55  * Return the name of a charset to give to iconv().
  56  **/
  57 static const char *charset_name(charset_t ch)
  58 {
  59         const char *ret = NULL;
  60
  61         if (ch == CH_UCS2) ret = "UCS-2LE";
  62         else if (ch == CH_UNIX) ret = lp_unix_charset();
  63         else if (ch == CH_DOS) ret = lp_dos_charset();
  64         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  65         else if (ch == CH_UTF8) ret = "UTF8";
  66
  67 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  68         if (ret && !strcmp(ret, "LOCALE")) {
  69                 const char *ln = NULL;
  70
  71 #ifdef HAVE_SETLOCALE
  72                 setlocale(LC_ALL, "");
  73 #endif
  74                 ln = nl_langinfo(CODESET);
  75                 if (ln) {
  76                         /* Check whether the charset name is supported
  77                            by iconv */
  78                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  79                         if (handle == (smb_iconv_t) -1) {
  80                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  81                                 ln = NULL;
  82                         } else {
  83                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  84                                 smb_iconv_close(handle);
  85                         }
  86                 }
  87                 ret = ln;
  88         }
  89 #endif
  90
  91         if (!ret || !*ret) ret = "ASCII";
  92         return ret;
  93 }
  94
  95 void lazy_initialize_conv(void)
  96 {
  97         static int initialized = False;
  98
  99         if (!initialized) {
 100                 initialized = True;
 101                 load_case_tables();
 102                 init_iconv();
 103         }
 104 }
 105
 106 /**
 107  * Initialize iconv conversion descriptors.
 108  *
 109  * This is called the first time it is needed, and also called again
 110  * every time the configuration is reloaded, because the charset or
 111  * codepage might have changed.
 112  **/
 113 void init_iconv(void)
 114 {
 115         int c1, c2;
 116         BOOL did_reload = False;
 117         const char **unsafe_charset = conv_unsafe_charsets;
 118
 119         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 120            first */
 121         if (!conv_handles[CH_UNIX][CH_UCS2])
 122                 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
 123
 124         if (!conv_handles[CH_UCS2][CH_UNIX])
 125                 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
 126
 127         for (c1=0;c1<NUM_CHARSETS;c1++) {
 128                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 129                         const char *n1 = charset_name((charset_t)c1);
 130                         const char *n2 = charset_name((charset_t)c2);
 131                         if (conv_handles[c1][c2] &&
 132                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 133                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 134                                 continue;
 135
 136                         did_reload = True;
 137
 138                         if (conv_handles[c1][c2])
 139                                 smb_iconv_close(conv_handles[c1][c2]);
 140
 141                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 142                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 143                                 DEBUG(0,("Conversion from %s to %s not supported\n",
 144                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 145                                 conv_handles[c1][c2] = NULL;
 146                         }
 147                 }
 148         }
 149
 150         if (did_reload) {
 151                 /* XXX: Does this really get called every time the dos
 152                  * codepage changes? */
 153                 /* XXX: Is the did_reload test too strict? */
 154                 conv_silent = True;
 155                 init_doschar_table();
 156                 init_valid_table();
 157                 conv_silent = False;
 158         }
 159
 160         while(*unsafe_charset && strcmp(*unsafe_charset, conv_handles[CH_UCS2][CH_UNIX]->to_name)) {
 161                 unsafe_charset++;
 162         }
 163
 164         if (*unsafe_charset) {
 165                 is_unix_charset_unsafe = True;
 166         } else {
 167                 is_unix_charset_unsafe = False;
 168         }
 169 }
 170
 171 /**
 172  * Convert string from one encoding to another, making error checking etc
 173  * Slow path version - uses (slow) iconv.
 174  *
 175  * @param src pointer to source string (multibyte or singlebyte)
 176  * @param srclen length of the source string in bytes
 177  * @param dest pointer to destination string (multibyte or singlebyte)
 178  * @param destlen maximal length allowed for string
 179  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 180  * @returns the number of bytes occupied in the destination
 181  *
 182  * Ensure the srclen contains the terminating zero.
 183  *
 184  **/
 185
 186 static size_t convert_string_internal(charset_t from, charset_t to,
 187                       void const *src, size_t srclen,
 188                       void *dest, size_t destlen, BOOL allow_bad_conv)
 189 {
 190         size_t i_len, o_len;
 191         size_t retval;
 192         const char* inbuf = (const char*)src;
 193         char* outbuf = (char*)dest;
 194         smb_iconv_t descriptor;
 195
 196         lazy_initialize_conv();
 197
 198         descriptor = conv_handles[from][to];
 199
 200         if (srclen == (size_t)-1) {
 201                 if (from == CH_UCS2) {
 202                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 203                 } else {
 204                         srclen = strlen((const char *)src)+1;
 205                 }
 206         }
 207
 208
 209         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 210                 if (!conv_silent)
 211                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 212                 return (size_t)-1;
 213         }
 214
 215         i_len=srclen;
 216         o_len=destlen;
 217
 218  again:
 219
 220         retval = smb_iconv(descriptor, (char **)&inbuf, &i_len, &outbuf, &o_len);
 221         if(retval==(size_t)-1) {
 222                 const char *reason="unknown error";
 223                 switch(errno) {
 224                         case EINVAL:
 225                                 reason="Incomplete multibyte sequence";
 226                                 if (!conv_silent)
 227                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 228                                 if (allow_bad_conv)
 229                                         goto use_as_is;
 230                                 break;
 231                         case E2BIG:
 232                                 reason="No more room";
 233                                 if (!conv_silent)
 234                                         DEBUG(3, ("convert_string_internal: Required %lu, available %lu\n",
 235                                                 (unsigned long)srclen, (unsigned long)destlen));
 236                                 /* we are not sure we need srclen bytes,
 237                                   may be more, may be less.
 238                                   We only know we need more than destlen
 239                                   bytes ---simo */
 240                                break;
 241                         case EILSEQ:
 242                                 reason="Illegal multibyte sequence";
 243                                 if (!conv_silent)
 244                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 245                                 if (allow_bad_conv)
 246                                         goto use_as_is;
 247                                 break;
 248                         default:
 249                                 if (!conv_silent)
 250                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 251                                 break;
 252                 }
 253                 /* smb_panic(reason); */
 254         }
 255         return destlen-o_len;
 256
 257  use_as_is:
 258
 259         /*
 260          * Conversion not supported. This is actually an error, but there are so
 261          * many misconfigured iconv systems and smb.conf's out there we can't just
 262          * fail. Do a very bad conversion instead.... JRA.
 263          */
 264
 265         {
 266                 if (o_len == 0 || i_len == 0)
 267                         return destlen - o_len;
 268
 269                 if (from == CH_UCS2 && to != CH_UCS2) {
 270                         /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
 271                         if (i_len < 2)
 272                                 return destlen - o_len;
 273                         if (i_len >= 2) {
 274                                 *outbuf = inbuf[0];
 275
 276                                 outbuf++;
 277                                 o_len--;
 278
 279                                 inbuf += 2;
 280                                 i_len -= 2;
 281                         }
 282
 283                         if (o_len == 0 || i_len == 0)
 284                                 return destlen - o_len;
 285
 286                         /* Keep trying with the next char... */
 287                         goto again;
 288
 289                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 290                         /* Can't convert to ucs2 - just widen by adding zero. */
 291                         if (o_len < 2)
 292                                 return destlen - o_len;
 293
 294                         outbuf[0] = inbuf[0];
 295                         outbuf[1] = '\0';
 296
 297                         inbuf++;
 298                         i_len--;
 299
 300                         outbuf += 2;
 301                         o_len -= 2;
 302
 303                         if (o_len == 0 || i_len == 0)
 304                                 return destlen - o_len;
 305
 306                         /* Keep trying with the next char... */
 307                         goto again;
 308
 309                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 310                         /* Failed multibyte to multibyte. Just copy 1 char and
 311                                 try again. */
 312                         outbuf[0] = inbuf[0];
 313
 314                         inbuf++;
 315                         i_len--;
 316
 317                         outbuf++;
 318                         o_len--;
 319
 320                         if (o_len == 0 || i_len == 0)
 321                                 return destlen - o_len;
 322
 323                         /* Keep trying with the next char... */
 324                         goto again;
 325
 326                 } else {
 327                         /* Keep compiler happy.... */
 328                         return destlen - o_len;
 329                 }
 330         }
 331 }
 332
 333 /**
 334  * Convert string from one encoding to another, making error checking etc
 335  * Fast path version - handles ASCII first.
 336  *
 337  * @param src pointer to source string (multibyte or singlebyte)
 338  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 339  * @param dest pointer to destination string (multibyte or singlebyte)
 340  * @param destlen maximal length allowed for string - *NEVER* -1.
 341  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 342  * @returns the number of bytes occupied in the destination
 343  *
 344  * Ensure the srclen contains the terminating zero.
 345  *
 346  * This function has been hand-tuned to provide a fast path.
 347  * Don't change unless you really know what you are doing. JRA.
 348  **/
 349
 350 size_t convert_string(charset_t from, charset_t to,
 351                       void const *src, size_t srclen,
 352                       void *dest, size_t destlen, BOOL allow_bad_conv)
 353 {
 354         /*
 355          * NB. We deliberately don't do a strlen here if srclen == -1.
 356          * This is very expensive over millions of calls and is taken
 357          * care of in the slow path in convert_string_internal. JRA.
 358          */
 359
 360 #ifdef DEVELOPER
 361         SMB_ASSERT(destlen != (size_t)-1);
 362 #endif
 363
 364         if (srclen == 0)
 365                 return 0;
 366
 367         if (from != CH_UCS2 && to != CH_UCS2) {
 368                 const unsigned char *p = (const unsigned char *)src;
 369                 unsigned char *q = (unsigned char *)dest;
 370                 size_t slen = srclen;
 371                 size_t dlen = destlen;
 372                 unsigned char lastp;
 373                 size_t retval = 0;
 374
 375                 /* If all characters are ascii, fast path here. */
 376                 while (slen && dlen) {
 377                         if ((lastp = *p) <= 0x7f) {
 378                                 *q++ = *p++;
 379                                 if (slen != (size_t)-1) {
 380                                         slen--;
 381                                 }
 382                                 dlen--;
 383                                 retval++;
 384                                 if (!lastp)
 385                                         break;
 386                         } else {
 387 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 388                                 goto general_case;
 389 #else
 390                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 391 #endif
 392                         }
 393                 }
 394                 return retval;
 395         } else if (from == CH_UCS2 && to != CH_UCS2) {
 396                 const unsigned char *p = (const unsigned char *)src;
 397                 unsigned char *q = (unsigned char *)dest;
 398                 size_t retval = 0;
 399                 size_t slen = srclen;
 400                 size_t dlen = destlen;
 401                 unsigned char lastp;
 402
 403                 /* If all characters are ascii, fast path here. */
 404                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 405                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 406                                 *q++ = *p;
 407                                 if (slen != (size_t)-1) {
 408                                         slen -= 2;
 409                                 }
 410                                 p += 2;
 411                                 dlen--;
 412                                 retval++;
 413                                 if (!lastp)
 414                                         break;
 415                         } else {
 416 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 417                                 goto general_case;
 418 #else
 419                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 420 #endif
 421                         }
 422                 }
 423                 return retval;
 424         } else if (from != CH_UCS2 && to == CH_UCS2) {
 425                 const unsigned char *p = (const unsigned char *)src;
 426                 unsigned char *q = (unsigned char *)dest;
 427                 size_t retval = 0;
 428                 size_t slen = srclen;
 429                 size_t dlen = destlen;
 430                 unsigned char lastp;
 431
 432                 /* If all characters are ascii, fast path here. */
 433                 while (slen && (dlen >= 2)) {
 434                         if ((lastp = *p) <= 0x7F) {
 435                                 *q++ = *p++;
 436                                 *q++ = '\0';
 437                                 if (slen != (size_t)-1) {
 438                                         slen--;
 439                                 }
 440                                 dlen -= 2;
 441                                 retval += 2;
 442                                 if (!lastp)
 443                                         break;
 444                         } else {
 445 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 446                                 goto general_case;
 447 #else
 448                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 449 #endif
 450                         }
 451                 }
 452                 return retval;
 453         }
 454
 455 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 456   general_case:
 457 #endif
 458         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 459 }
 460
 461 /**
 462  * Convert between character sets, allocating a new buffer for the result.
 463  *
 464  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 465  * @param srclen length of source buffer.
 466  * @param dest always set at least to NULL
 467  * @note -1 is not accepted for srclen.
 468  *
 469  * @returns Size in bytes of the converted string; or -1 in case of error.
 470  *
 471  * Ensure the srclen contains the terminating zero.
 472  *
 473  * I hate the goto's in this function. It's embarressing.....
 474  * There has to be a cleaner way to do this. JRA.
 475  **/
 476
 477 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 478                                void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
 479 {
 480         size_t i_len, o_len, destlen = MAX(srclen, 512);
 481         size_t retval;
 482         const char *inbuf = (const char *)src;
 483         char *outbuf = NULL, *ob = NULL;
 484         smb_iconv_t descriptor;
 485
 486         *dest = NULL;
 487
 488         if (src == NULL || srclen == (size_t)-1)
 489                 return (size_t)-1;
 490         if (srclen == 0)
 491                 return 0;
 492
 493         lazy_initialize_conv();
 494
 495         descriptor = conv_handles[from][to];
 496
 497         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 498                 if (!conv_silent)
 499                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 500                 if (allow_bad_conv)
 501                         goto use_as_is;
 502                 return (size_t)-1;
 503         }
 504
 505   convert:
 506
 507         if ((destlen*2) < destlen) {
 508                 /* wrapped ! abort. */
 509                 if (!conv_silent)
 510                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 511                 if (!ctx)
 512                         SAFE_FREE(outbuf);
 513                 return (size_t)-1;
 514         } else {
 515                 destlen = destlen * 2;
 516         }
 517
 518         if (ctx)
 519                 ob = (char *)talloc_realloc(ctx, ob, destlen);
 520         else
 521                 ob = (char *)Realloc(ob, destlen);
 522
 523         if (!ob) {
 524                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 525                 if (!ctx)
 526                         SAFE_FREE(outbuf);
 527                 return (size_t)-1;
 528         } else {
 529                 outbuf = ob;
 530         }
 531         i_len = srclen;
 532         o_len = destlen;
 533
 534  again:
 535
 536         retval = smb_iconv(descriptor,
 537                            (char **)&inbuf, &i_len,
 538                            &outbuf, &o_len);
 539         if(retval == (size_t)-1)                {
 540                 const char *reason="unknown error";
 541                 switch(errno) {
 542                         case EINVAL:
 543                                 reason="Incomplete multibyte sequence";
 544                                 if (!conv_silent)
 545                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 546                                 if (allow_bad_conv)
 547                                         goto use_as_is;
 548                                 break;
 549                         case E2BIG:
 550                                 goto convert;
 551                         case EILSEQ:
 552                                 reason="Illegal multibyte sequence";
 553                                 if (!conv_silent)
 554                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 555                                 if (allow_bad_conv)
 556                                         goto use_as_is;
 557                                 break;
 558                 }
 559                 if (!conv_silent)
 560                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 561                 /* smb_panic(reason); */
 562                 return (size_t)-1;
 563         }
 564
 565   out:
 566
 567         destlen = destlen - o_len;
 568         if (ctx)
 569                 *dest = (char *)talloc_realloc(ctx,ob,destlen);
 570         else
 571                 *dest = (char *)Realloc(ob,destlen);
 572         if (destlen && !*dest) {
 573                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 574                 if (!ctx)
 575                         SAFE_FREE(ob);
 576                 return (size_t)-1;
 577         }
 578
 579         return destlen;
 580
 581  use_as_is:
 582
 583         /*
 584          * Conversion not supported. This is actually an error, but there are so
 585          * many misconfigured iconv systems and smb.conf's out there we can't just
 586          * fail. Do a very bad conversion instead.... JRA.
 587          */
 588
 589         {
 590                 if (o_len == 0 || i_len == 0)
 591                         goto out;
 592
 593                 if (from == CH_UCS2 && to != CH_UCS2) {
 594                         /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
 595                         if (i_len < 2)
 596                                 goto out;
 597
 598                         if (i_len >= 2) {
 599                                 *outbuf = inbuf[0];
 600
 601                                 outbuf++;
 602                                 o_len--;
 603
 604                                 inbuf += 2;
 605                                 i_len -= 2;
 606                         }
 607
 608                         if (o_len == 0 || i_len == 0)
 609                                 goto out;
 610
 611                         /* Keep trying with the next char... */
 612                         goto again;
 613
 614                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 615                         /* Can't convert to ucs2 - just widen by adding zero. */
 616                         if (o_len < 2)
 617                                 goto out;
 618
 619                         outbuf[0] = inbuf[0];
 620                         outbuf[1] = '\0';
 621
 622                         inbuf++;
 623                         i_len--;
 624
 625                         outbuf += 2;
 626                         o_len -= 2;
 627
 628                         if (o_len == 0 || i_len == 0)
 629                                 goto out;
 630
 631                         /* Keep trying with the next char... */
 632                         goto again;
 633
 634                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 635                         /* Failed multibyte to multibyte. Just copy 1 char and
 636                                 try again. */
 637                         outbuf[0] = inbuf[0];
 638
 639                         inbuf++;
 640                         i_len--;
 641
 642                         outbuf++;
 643                         o_len--;
 644
 645                         if (o_len == 0 || i_len == 0)
 646                                 goto out;
 647
 648                         /* Keep trying with the next char... */
 649                         goto again;
 650
 651                 } else {
 652                         /* Keep compiler happy.... */
 653                         goto out;
 654                 }
 655         }
 656 }
 657
 658 /**
 659  * Convert between character sets, allocating a new buffer using talloc for the result.
 660  *
 661  * @param srclen length of source buffer.
 662  * @param dest always set at least to NULL
 663  * @note -1 is not accepted for srclen.
 664  *
 665  * @returns Size in bytes of the converted string; or -1 in case of error.
 666  **/
 667 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 668                                 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
 669 {
 670         size_t dest_len;
 671
 672         *dest = NULL;
 673         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
 674         if (dest_len == (size_t)-1)
 675                 return (size_t)-1;
 676         if (*dest == NULL)
 677                 return (size_t)-1;
 678         return dest_len;
 679 }
 680
 681 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 682 {
 683         size_t size;
 684         smb_ucs2_t *buffer;
 685
 686         size = push_ucs2_allocate(&buffer, src);
 687         if (size == (size_t)-1) {
 688                 smb_panic("failed to create UCS2 buffer");
 689         }
 690         if (!strupper_w(buffer) && (dest == src)) {
 691                 free(buffer);
 692                 return srclen;
 693         }
 694
 695         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
 696         free(buffer);
 697         return size;
 698 }
 699
 700 /**
 701  strdup() a unix string to upper case.
 702  Max size is pstring.
 703 **/
 704
 705 char *strdup_upper(const char *s)
 706 {
 707         pstring out_buffer;
 708         const unsigned char *p = (const unsigned char *)s;
 709         unsigned char *q = (unsigned char *)out_buffer;
 710
 711         /* this is quite a common operation, so we want it to be
 712            fast. We optimise for the ascii case, knowing that all our
 713            supported multi-byte character sets are ascii-compatible
 714            (ie. they match for the first 128 chars) */
 715
 716         while (1) {
 717                 if (*p & 0x80)
 718                         break;
 719                 *q++ = toupper(*p);
 720                 if (!*p)
 721                         break;
 722                 p++;
 723                 if (p - ( const unsigned char *)s >= sizeof(pstring))
 724                         break;
 725         }
 726
 727         if (*p) {
 728                 /* MB case. */
 729                 size_t size;
 730                 wpstring buffer;
 731                 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
 732                 if (size == (size_t)-1) {
 733                         return NULL;
 734                 }
 735
 736                 strupper_w(buffer);
 737
 738                 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
 739                 if (size == (size_t)-1) {
 740                         return NULL;
 741                 }
 742         }
 743
 744         return strdup(out_buffer);
 745 }
 746
 747 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 748 {
 749         size_t size;
 750         smb_ucs2_t *buffer = NULL;
 751
 752         size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
 753                                        (void **) &buffer, True);
 754         if (size == (size_t)-1 || !buffer) {
 755                 smb_panic("failed to create UCS2 buffer");
 756         }
 757         if (!strlower_w(buffer) && (dest == src)) {
 758                 SAFE_FREE(buffer);
 759                 return srclen;
 760         }
 761         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
 762         SAFE_FREE(buffer);
 763         return size;
 764 }
 765
 766 /**
 767  strdup() a unix string to lower case.
 768 **/
 769
 770 char *strdup_lower(const char *s)
 771 {
 772         size_t size;
 773         smb_ucs2_t *buffer = NULL;
 774         char *out_buffer;
 775
 776         size = push_ucs2_allocate(&buffer, s);
 777         if (size == -1 || !buffer) {
 778                 return NULL;
 779         }
 780
 781         strlower_w(buffer);
 782
 783         size = pull_ucs2_allocate(&out_buffer, buffer);
 784         SAFE_FREE(buffer);
 785
 786         if (size == (size_t)-1) {
 787                 return NULL;
 788         }
 789
 790         return out_buffer;
 791 }
 792
 793 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 794 {
 795         if (flags & (STR_NOALIGN|STR_ASCII))
 796                 return 0;
 797         return PTR_DIFF(p, base_ptr) & 1;
 798 }
 799
 800
 801 /**
 802  * Copy a string from a char* unix src to a dos codepage string destination.
 803  *
 804  * @return the number of bytes occupied by the string in the destination.
 805  *
 806  * @param flags can include
 807  * <dl>
 808  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 809  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 810  * </dl>
 811  *
 812  * @param dest_len the maximum length in bytes allowed in the
 813  * destination.  If @p dest_len is -1 then no maximum is used.
 814  **/
 815 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 816 {
 817         size_t src_len = strlen(src);
 818         pstring tmpbuf;
 819
 820         /* treat a pstring as "unlimited" length */
 821         if (dest_len == (size_t)-1)
 822                 dest_len = sizeof(pstring);
 823
 824         if (flags & STR_UPPER) {
 825                 pstrcpy(tmpbuf, src);
 826                 strupper_m(tmpbuf);
 827                 src = tmpbuf;
 828         }
 829
 830         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 831                 src_len++;
 832
 833         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 834 }
 835
 836 size_t push_ascii_fstring(void *dest, const char *src)
 837 {
 838         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 839 }
 840
 841 size_t push_ascii_pstring(void *dest, const char *src)
 842 {
 843         return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
 844 }
 845
 846 /********************************************************************
 847  Push an nstring - ensure null terminated. Written by
 848  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 849 ********************************************************************/
 850
 851 size_t push_ascii_nstring(void *dest, const char *src)
 852 {
 853         size_t i, buffer_len, dest_len;
 854         smb_ucs2_t *buffer;
 855
 856         conv_silent = True;
 857         buffer_len = push_ucs2_allocate(&buffer, src);
 858         if (buffer_len == (size_t)-1) {
 859                 smb_panic("failed to create UCS2 buffer");
 860         }
 861
 862         dest_len = 0;
 863         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
 864                 unsigned char mb[10];
 865                 /* Convert one smb_ucs2_t character at a time. */
 866                 size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
 867                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
 868                         memcpy((char *)dest + dest_len, mb, mb_len);
 869                         dest_len += mb_len;
 870                 } else {
 871                         errno = E2BIG;
 872                         break;
 873                 }
 874         }
 875         ((char *)dest)[dest_len] = '\0';
 876
 877         SAFE_FREE(buffer);
 878         conv_silent = False;
 879         return dest_len;
 880 }
 881
 882 /**
 883  * Copy a string from a dos codepage source to a unix char* destination.
 884  *
 885  * The resulting string in "dest" is always null terminated.
 886  *
 887  * @param flags can have:
 888  * <dl>
 889  * <dt>STR_TERMINATE</dt>
 890  * <dd>STR_TERMINATE means the string in @p src
 891  * is null terminated, and src_len is ignored.</dd>
 892  * </dl>
 893  *
 894  * @param src_len is the length of the source area in bytes.
 895  * @returns the number of bytes occupied by the string in @p src.
 896  **/
 897 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 898 {
 899         size_t ret;
 900
 901         if (dest_len == (size_t)-1)
 902                 dest_len = sizeof(pstring);
 903
 904         if (flags & STR_TERMINATE) {
 905                 if (src_len == (size_t)-1) {
 906                         src_len = strlen(src) + 1;
 907                 } else {
 908                         size_t len = strnlen(src, src_len);
 909                         if (len < src_len)
 910                                 len++;
 911                         src_len = len;
 912                 }
 913         }
 914
 915         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
 916         if (ret == (size_t)-1) {
 917                 dest_len = 0;
 918         }
 919
 920         if (dest_len)
 921                 dest[MIN(ret, dest_len-1)] = 0;
 922         else
 923                 dest[0] = 0;
 924
 925         return src_len;
 926 }
 927
 928 size_t pull_ascii_pstring(char *dest, const void *src)
 929 {
 930         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
 931 }
 932
 933 size_t pull_ascii_fstring(char *dest, const void *src)
 934 {
 935         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
 936 }
 937
 938 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
 939
 940 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
 941 {
 942         return pull_ascii(dest, src, dest_len, sizeof(nstring), STR_TERMINATE);
 943 }
 944
 945 /**
 946  * Copy a string from a char* src to a unicode destination.
 947  *
 948  * @returns the number of bytes occupied by the string in the destination.
 949  *
 950  * @param flags can have:
 951  *
 952  * <dl>
 953  * <dt>STR_TERMINATE <dd>means include the null termination.
 954  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 955  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 956  * </dl>
 957  *
 958  * @param dest_len is the maximum length allowed in the
 959  * destination. If dest_len is -1 then no maxiumum is used.
 960  **/
 961
 962 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
 963 {
 964         size_t len=0;
 965         size_t src_len;
 966         size_t ret;
 967
 968         /* treat a pstring as "unlimited" length */
 969         if (dest_len == (size_t)-1)
 970                 dest_len = sizeof(pstring);
 971
 972         if (flags & STR_TERMINATE)
 973                 src_len = (size_t)-1;
 974         else
 975                 src_len = strlen(src);
 976
 977         if (ucs2_align(base_ptr, dest, flags)) {
 978                 *(char *)dest = 0;
 979                 dest = (void *)((char *)dest + 1);
 980                 if (dest_len)
 981                         dest_len--;
 982                 len++;
 983         }
 984
 985         /* ucs2 is always a multiple of 2 bytes */
 986         dest_len &= ~1;
 987
 988         ret =  convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
 989         if (ret == (size_t)-1) {
 990                 return 0;
 991         }
 992
 993         len += ret;
 994
 995         if (flags & STR_UPPER) {
 996                 smb_ucs2_t *dest_ucs2 = dest;
 997                 size_t i;
 998                 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
 999                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1000                         if (v != dest_ucs2[i]) {
1001                                 dest_ucs2[i] = v;
1002                         }
1003                 }
1004         }
1005
1006         return len;
1007 }
1008
1009
1010 /**
1011  * Copy a string from a unix char* src to a UCS2 destination,
1012  * allocating a buffer using talloc().
1013  *
1014  * @param dest always set at least to NULL
1015  *
1016  * @returns The number of bytes occupied by the string in the destination
1017  *         or -1 in case of error.
1018  **/
1019 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1020 {
1021         size_t src_len = strlen(src)+1;
1022
1023         *dest = NULL;
1024         return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1025 }
1026
1027
1028 /**
1029  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1030  *
1031  * @param dest always set at least to NULL
1032  *
1033  * @returns The number of bytes occupied by the string in the destination
1034  *         or -1 in case of error.
1035  **/
1036
1037 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1038 {
1039         size_t src_len = strlen(src)+1;
1040
1041         *dest = NULL;
1042         return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1043 }
1044
1045 /**
1046  Copy a string from a char* src to a UTF-8 destination.
1047  Return the number of bytes occupied by the string in the destination
1048  Flags can have:
1049   STR_TERMINATE means include the null termination
1050   STR_UPPER     means uppercase in the destination
1051  dest_len is the maximum length allowed in the destination. If dest_len
1052  is -1 then no maxiumum is used.
1053 **/
1054
1055 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1056 {
1057         size_t src_len = strlen(src);
1058         pstring tmpbuf;
1059
1060         /* treat a pstring as "unlimited" length */
1061         if (dest_len == (size_t)-1)
1062                 dest_len = sizeof(pstring);
1063
1064         if (flags & STR_UPPER) {
1065                 pstrcpy(tmpbuf, src);
1066                 strupper_m(tmpbuf);
1067                 src = tmpbuf;
1068         }
1069
1070         if (flags & STR_TERMINATE)
1071                 src_len++;
1072
1073         return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1074 }
1075
1076 size_t push_utf8_fstring(void *dest, const char *src)
1077 {
1078         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1079 }
1080
1081 /**
1082  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1083  *
1084  * @param dest always set at least to NULL
1085  *
1086  * @returns The number of bytes occupied by the string in the destination
1087  **/
1088
1089 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1090 {
1091         size_t src_len = strlen(src)+1;
1092
1093         *dest = NULL;
1094         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1095 }
1096
1097 /**
1098  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1099  *
1100  * @param dest always set at least to NULL
1101  *
1102  * @returns The number of bytes occupied by the string in the destination
1103  **/
1104
1105 size_t push_utf8_allocate(char **dest, const char *src)
1106 {
1107         size_t src_len = strlen(src)+1;
1108
1109         *dest = NULL;
1110         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1111 }
1112
1113 /**
1114  Copy a string from a ucs2 source to a unix char* destination.
1115  Flags can have:
1116   STR_TERMINATE means the string in src is null terminated.
1117   STR_NOALIGN   means don't try to align.
1118  if STR_TERMINATE is set then src_len is ignored if it is -1.
1119  src_len is the length of the source area in bytes
1120  Return the number of bytes occupied by the string in src.
1121  The resulting string in "dest" is always null terminated.
1122 **/
1123
1124 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1125 {
1126         size_t ret;
1127
1128         if (dest_len == (size_t)-1)
1129                 dest_len = sizeof(pstring);
1130
1131         if (ucs2_align(base_ptr, src, flags)) {
1132                 src = (const void *)((const char *)src + 1);
1133                 if (src_len != (size_t)-1)
1134                         src_len--;
1135         }
1136
1137         if (flags & STR_TERMINATE) {
1138                 /* src_len -1 is the default for null terminated strings. */
1139                 if (src_len != (size_t)-1) {
1140                         size_t len = strnlen_w(src, src_len/2);
1141                         if (len < src_len/2)
1142                                 len++;
1143                         src_len = len*2;
1144                 }
1145         }
1146
1147         /* ucs2 is always a multiple of 2 bytes */
1148         if (src_len != (size_t)-1)
1149                 src_len &= ~1;
1150
1151         ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1152         if (ret == (size_t)-1) {
1153                 return 0;
1154         }
1155
1156         if (src_len == (size_t)-1)
1157                 src_len = ret*2;
1158
1159         if (dest_len)
1160                 dest[MIN(ret, dest_len-1)] = 0;
1161         else
1162                 dest[0] = 0;
1163
1164         return src_len;
1165 }
1166
1167 size_t pull_ucs2_pstring(char *dest, const void *src)
1168 {
1169         return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1170 }
1171
1172 size_t pull_ucs2_fstring(char *dest, const void *src)
1173 {
1174         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1175 }
1176
1177 /**
1178  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1179  *
1180  * @param dest always set at least to NULL
1181  *
1182  * @returns The number of bytes occupied by the string in the destination
1183  **/
1184
1185 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1186 {
1187         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1188         *dest = NULL;
1189         return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1190 }
1191
1192 /**
1193  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1194  *
1195  * @param dest always set at least to NULL
1196  *
1197  * @returns The number of bytes occupied by the string in the destination
1198  **/
1199
1200 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1201 {
1202         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1203         *dest = NULL;
1204         return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1205 }
1206
1207 /**
1208  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1209  *
1210  * @param dest always set at least to NULL
1211  *
1212  * @returns The number of bytes occupied by the string in the destination
1213  **/
1214
1215 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1216 {
1217         size_t src_len = strlen(src)+1;
1218         *dest = NULL;
1219         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1220 }
1221
1222 /**
1223  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1224  *
1225  * @param dest always set at least to NULL
1226  *
1227  * @returns The number of bytes occupied by the string in the destination
1228  **/
1229
1230 size_t pull_utf8_allocate(char **dest, const char *src)
1231 {
1232         size_t src_len = strlen(src)+1;
1233         *dest = NULL;
1234         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1235 }
1236
1237 /**
1238  Copy a string from a char* src to a unicode or ascii
1239  dos codepage destination choosing unicode or ascii based on the
1240  flags in the SMB buffer starting at base_ptr.
1241  Return the number of bytes occupied by the string in the destination.
1242  flags can have:
1243   STR_TERMINATE means include the null termination.
1244   STR_UPPER     means uppercase in the destination.
1245   STR_ASCII     use ascii even with unicode packet.
1246   STR_NOALIGN   means don't do alignment.
1247  dest_len is the maximum length allowed in the destination. If dest_len
1248  is -1 then no maxiumum is used.
1249 **/
1250
1251 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1252 {
1253 #ifdef DEVELOPER
1254         /* We really need to zero fill here, not clobber
1255          * region, as we want to ensure that valgrind thinks
1256          * all of the outgoing buffer has been written to
1257          * so a send() or write() won't trap an error.
1258          * JRA.
1259          */
1260 #if 0
1261         if (dest_len != (size_t)-1)
1262                 clobber_region(function, line, dest, dest_len);
1263 #else
1264         if (dest_len != (size_t)-1)
1265                 memset(dest, '\0', dest_len);
1266 #endif
1267 #endif
1268
1269         if (!(flags & STR_ASCII) && \
1270             ((flags & STR_UNICODE || \
1271               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1272                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1273         }
1274         return push_ascii(dest, src, dest_len, flags);
1275 }
1276
1277
1278 /**
1279  Copy a string from a unicode or ascii source (depending on
1280  the packet flags) to a char* destination.
1281  Flags can have:
1282   STR_TERMINATE means the string in src is null terminated.
1283   STR_UNICODE   means to force as unicode.
1284   STR_ASCII     use ascii even with unicode packet.
1285   STR_NOALIGN   means don't do alignment.
1286  if STR_TERMINATE is set then src_len is ignored is it is -1
1287  src_len is the length of the source area in bytes.
1288  Return the number of bytes occupied by the string in src.
1289  The resulting string in "dest" is always null terminated.
1290 **/
1291
1292 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1293 {
1294 #ifdef DEVELOPER
1295         if (dest_len != (size_t)-1)
1296                 clobber_region(function, line, dest, dest_len);
1297 #endif
1298
1299         if (!(flags & STR_ASCII) && \
1300             ((flags & STR_UNICODE || \
1301               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1302                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1303         }
1304         return pull_ascii(dest, src, dest_len, src_len, flags);
1305 }
1306
1307 size_t align_string(const void *base_ptr, const char *p, int flags)
1308 {
1309         if (!(flags & STR_ASCII) && \
1310             ((flags & STR_UNICODE || \
1311               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1312                 return ucs2_align(base_ptr, p, flags);
1313         }
1314         return 0;
1315 }