source/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #include "includes.h"
  25
  26 /**
  27  * @file
  28  *
  29  * @brief Character-set conversion routines built on our iconv.
  30  *
  31  * @note Samba's internal character set (at least in the 3.0 series)
  32  * is always the same as the one for the Unix filesystem.  It is
  33  * <b>not</b> necessarily UTF-8 and may be different on machines that
  34  * need i18n filenames to be compatible with Unix software.  It does
  35  * have to be a superset of ASCII.  All multibyte sequences must start
  36  * with a byte with the high bit set.
  37  *
  38  * @sa lib/iconv.c
  39  */
  40
  41
  42 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  43 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
  44
  45 /**
  46  * Return the name of a charset to give to iconv().
  47  **/
  48 static const char *charset_name(charset_t ch)
  49 {
  50         const char *ret = NULL;
  51
  52         if (ch == CH_UCS2) ret = "UCS-2LE";
  53         else if (ch == CH_UNIX) ret = lp_unix_charset();
  54         else if (ch == CH_DOS) ret = lp_dos_charset();
  55         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  56         else if (ch == CH_UTF8) ret = "UTF8";
  57
  58 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  59         if (ret && !strcmp(ret, "LOCALE")) {
  60                 const char *ln = NULL;
  61
  62 #ifdef HAVE_SETLOCALE
  63                 setlocale(LC_ALL, "");
  64 #endif
  65                 ln = nl_langinfo(CODESET);
  66                 if (ln) {
  67                         /* Check whether the charset name is supported
  68                            by iconv */
  69                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  70                         if (handle == (smb_iconv_t) -1) {
  71                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  72                                 ln = NULL;
  73                         } else {
  74                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  75                                 smb_iconv_close(handle);
  76                         }
  77                 }
  78                 ret = ln;
  79         }
  80 #endif
  81
  82         if (!ret || !*ret) ret = "ASCII";
  83         return ret;
  84 }
  85
  86 void lazy_initialize_conv(void)
  87 {
  88         static int initialized = False;
  89
  90         if (!initialized) {
  91                 initialized = True;
  92                 load_case_tables();
  93                 init_iconv();
  94         }
  95 }
  96
  97 /**
  98  * Initialize iconv conversion descriptors.
  99  *
 100  * This is called the first time it is needed, and also called again
 101  * every time the configuration is reloaded, because the charset or
 102  * codepage might have changed.
 103  **/
 104 void init_iconv(void)
 105 {
 106         int c1, c2;
 107         BOOL did_reload = False;
 108
 109         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 110            first */
 111         if (!conv_handles[CH_UNIX][CH_UCS2])
 112                 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
 113
 114         if (!conv_handles[CH_UCS2][CH_UNIX])
 115                 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
 116
 117         for (c1=0;c1<NUM_CHARSETS;c1++) {
 118                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 119                         const char *n1 = charset_name((charset_t)c1);
 120                         const char *n2 = charset_name((charset_t)c2);
 121                         if (conv_handles[c1][c2] &&
 122                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 123                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 124                                 continue;
 125
 126                         did_reload = True;
 127
 128                         if (conv_handles[c1][c2])
 129                                 smb_iconv_close(conv_handles[c1][c2]);
 130
 131                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 132                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 133                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
 134                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 135                                 if (c1 != CH_UCS2) {
 136                                         n1 = "ASCII";
 137                                 }
 138                                 if (c2 != CH_UCS2) {
 139                                         n2 = "ASCII";
 140                                 }
 141                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
 142                                         n1, n2 ));
 143                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 144                                 if (!conv_handles[c1][c2]) {
 145                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
 146                                         smb_panic("init_iconv: conv_handle initialization failed.");
 147                                 }
 148                         }
 149                 }
 150         }
 151
 152         if (did_reload) {
 153                 /* XXX: Does this really get called every time the dos
 154                  * codepage changes? */
 155                 /* XXX: Is the did_reload test too strict? */
 156                 conv_silent = True;
 157                 init_doschar_table();
 158                 init_valid_table();
 159                 conv_silent = False;
 160         }
 161 }
 162
 163 /**
 164  * Convert string from one encoding to another, making error checking etc
 165  * Slow path version - uses (slow) iconv.
 166  *
 167  * @param src pointer to source string (multibyte or singlebyte)
 168  * @param srclen length of the source string in bytes
 169  * @param dest pointer to destination string (multibyte or singlebyte)
 170  * @param destlen maximal length allowed for string
 171  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 172  * @returns the number of bytes occupied in the destination
 173  *
 174  * Ensure the srclen contains the terminating zero.
 175  *
 176  **/
 177
 178 static size_t convert_string_internal(charset_t from, charset_t to,
 179                       void const *src, size_t srclen,
 180                       void *dest, size_t destlen, BOOL allow_bad_conv)
 181 {
 182         size_t i_len, o_len;
 183         size_t retval;
 184         const char* inbuf = (const char*)src;
 185         char* outbuf = (char*)dest;
 186         smb_iconv_t descriptor;
 187
 188         lazy_initialize_conv();
 189
 190         descriptor = conv_handles[from][to];
 191
 192         if (srclen == (size_t)-1) {
 193                 if (from == CH_UCS2) {
 194                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 195                 } else {
 196                         srclen = strlen((const char *)src)+1;
 197                 }
 198         }
 199
 200
 201         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 202                 if (!conv_silent)
 203                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 204                 return (size_t)-1;
 205         }
 206
 207         i_len=srclen;
 208         o_len=destlen;
 209
 210  again:
 211
 212         retval = smb_iconv(descriptor, (char **)&inbuf, &i_len, &outbuf, &o_len);
 213         if(retval==(size_t)-1) {
 214                 const char *reason="unknown error";
 215                 switch(errno) {
 216                         case EINVAL:
 217                                 reason="Incomplete multibyte sequence";
 218                                 if (!conv_silent)
 219                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 220                                 if (allow_bad_conv)
 221                                         goto use_as_is;
 222                                 break;
 223                         case E2BIG:
 224                                 reason="No more room";
 225                                 if (!conv_silent)
 226                                         DEBUG(3, ("convert_string_internal: Required %lu, available %lu\n",
 227                                                 (unsigned long)srclen, (unsigned long)destlen));
 228                                 /* we are not sure we need srclen bytes,
 229                                   may be more, may be less.
 230                                   We only know we need more than destlen
 231                                   bytes ---simo */
 232                                break;
 233                         case EILSEQ:
 234                                 reason="Illegal multibyte sequence";
 235                                 if (!conv_silent)
 236                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 237                                 if (allow_bad_conv)
 238                                         goto use_as_is;
 239                                 break;
 240                         default:
 241                                 if (!conv_silent)
 242                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 243                                 break;
 244                 }
 245                 /* smb_panic(reason); */
 246         }
 247         return destlen-o_len;
 248
 249  use_as_is:
 250
 251         /*
 252          * Conversion not supported. This is actually an error, but there are so
 253          * many misconfigured iconv systems and smb.conf's out there we can't just
 254          * fail. Do a very bad conversion instead.... JRA.
 255          */
 256
 257         {
 258                 if (o_len == 0 || i_len == 0)
 259                         return destlen - o_len;
 260
 261                 if (from == CH_UCS2 && to != CH_UCS2) {
 262                         /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
 263                         if (i_len < 2)
 264                                 return destlen - o_len;
 265                         if (i_len >= 2) {
 266                                 *outbuf = inbuf[0];
 267
 268                                 outbuf++;
 269                                 o_len--;
 270
 271                                 inbuf += 2;
 272                                 i_len -= 2;
 273                         }
 274
 275                         if (o_len == 0 || i_len == 0)
 276                                 return destlen - o_len;
 277
 278                         /* Keep trying with the next char... */
 279                         goto again;
 280
 281                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 282                         /* Can't convert to ucs2 - just widen by adding zero. */
 283                         if (o_len < 2)
 284                                 return destlen - o_len;
 285
 286                         outbuf[0] = inbuf[0];
 287                         outbuf[1] = '\0';
 288
 289                         inbuf++;
 290                         i_len--;
 291
 292                         outbuf += 2;
 293                         o_len -= 2;
 294
 295                         if (o_len == 0 || i_len == 0)
 296                                 return destlen - o_len;
 297
 298                         /* Keep trying with the next char... */
 299                         goto again;
 300
 301                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 302                         /* Failed multibyte to multibyte. Just copy 1 char and
 303                                 try again. */
 304                         outbuf[0] = inbuf[0];
 305
 306                         inbuf++;
 307                         i_len--;
 308
 309                         outbuf++;
 310                         o_len--;
 311
 312                         if (o_len == 0 || i_len == 0)
 313                                 return destlen - o_len;
 314
 315                         /* Keep trying with the next char... */
 316                         goto again;
 317
 318                 } else {
 319                         /* Keep compiler happy.... */
 320                         return destlen - o_len;
 321                 }
 322         }
 323 }
 324
 325 /**
 326  * Convert string from one encoding to another, making error checking etc
 327  * Fast path version - handles ASCII first.
 328  *
 329  * @param src pointer to source string (multibyte or singlebyte)
 330  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 331  * @param dest pointer to destination string (multibyte or singlebyte)
 332  * @param destlen maximal length allowed for string - *NEVER* -1.
 333  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 334  * @returns the number of bytes occupied in the destination
 335  *
 336  * Ensure the srclen contains the terminating zero.
 337  *
 338  * This function has been hand-tuned to provide a fast path.
 339  * Don't change unless you really know what you are doing. JRA.
 340  **/
 341
 342 size_t convert_string(charset_t from, charset_t to,
 343                       void const *src, size_t srclen,
 344                       void *dest, size_t destlen, BOOL allow_bad_conv)
 345 {
 346         /*
 347          * NB. We deliberately don't do a strlen here if srclen == -1.
 348          * This is very expensive over millions of calls and is taken
 349          * care of in the slow path in convert_string_internal. JRA.
 350          */
 351
 352 #ifdef DEVELOPER
 353         SMB_ASSERT(destlen != (size_t)-1);
 354 #endif
 355
 356         if (srclen == 0)
 357                 return 0;
 358
 359         if (from != CH_UCS2 && to != CH_UCS2) {
 360                 const unsigned char *p = (const unsigned char *)src;
 361                 unsigned char *q = (unsigned char *)dest;
 362                 size_t slen = srclen;
 363                 size_t dlen = destlen;
 364                 unsigned char lastp;
 365                 size_t retval = 0;
 366
 367                 /* If all characters are ascii, fast path here. */
 368                 while (slen && dlen) {
 369                         if ((lastp = *p) <= 0x7f) {
 370                                 *q++ = *p++;
 371                                 if (slen != (size_t)-1) {
 372                                         slen--;
 373                                 }
 374                                 dlen--;
 375                                 retval++;
 376                                 if (!lastp)
 377                                         break;
 378                         } else {
 379 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 380                                 goto general_case;
 381 #else
 382                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 383 #endif
 384                         }
 385                 }
 386                 return retval;
 387         } else if (from == CH_UCS2 && to != CH_UCS2) {
 388                 const unsigned char *p = (const unsigned char *)src;
 389                 unsigned char *q = (unsigned char *)dest;
 390                 size_t retval = 0;
 391                 size_t slen = srclen;
 392                 size_t dlen = destlen;
 393                 unsigned char lastp;
 394
 395                 /* If all characters are ascii, fast path here. */
 396                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 397                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 398                                 *q++ = *p;
 399                                 if (slen != (size_t)-1) {
 400                                         slen -= 2;
 401                                 }
 402                                 p += 2;
 403                                 dlen--;
 404                                 retval++;
 405                                 if (!lastp)
 406                                         break;
 407                         } else {
 408 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 409                                 goto general_case;
 410 #else
 411                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 412 #endif
 413                         }
 414                 }
 415                 return retval;
 416         } else if (from != CH_UCS2 && to == CH_UCS2) {
 417                 const unsigned char *p = (const unsigned char *)src;
 418                 unsigned char *q = (unsigned char *)dest;
 419                 size_t retval = 0;
 420                 size_t slen = srclen;
 421                 size_t dlen = destlen;
 422                 unsigned char lastp;
 423
 424                 /* If all characters are ascii, fast path here. */
 425                 while (slen && (dlen >= 2)) {
 426                         if ((lastp = *p) <= 0x7F) {
 427                                 *q++ = *p++;
 428                                 *q++ = '\0';
 429                                 if (slen != (size_t)-1) {
 430                                         slen--;
 431                                 }
 432                                 dlen -= 2;
 433                                 retval += 2;
 434                                 if (!lastp)
 435                                         break;
 436                         } else {
 437 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 438                                 goto general_case;
 439 #else
 440                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 441 #endif
 442                         }
 443                 }
 444                 return retval;
 445         }
 446
 447 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 448   general_case:
 449 #endif
 450         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 451 }
 452
 453 /**
 454  * Convert between character sets, allocating a new buffer for the result.
 455  *
 456  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 457  * @param srclen length of source buffer.
 458  * @param dest always set at least to NULL
 459  * @note -1 is not accepted for srclen.
 460  *
 461  * @returns Size in bytes of the converted string; or -1 in case of error.
 462  *
 463  * Ensure the srclen contains the terminating zero.
 464  *
 465  * I hate the goto's in this function. It's embarressing.....
 466  * There has to be a cleaner way to do this. JRA.
 467  **/
 468
 469 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 470                                void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
 471 {
 472         size_t i_len, o_len, destlen = MAX(srclen, 512);
 473         size_t retval;
 474         const char *inbuf = (const char *)src;
 475         char *outbuf = NULL, *ob = NULL;
 476         smb_iconv_t descriptor;
 477
 478         *dest = NULL;
 479
 480         if (src == NULL || srclen == (size_t)-1)
 481                 return (size_t)-1;
 482         if (srclen == 0)
 483                 return 0;
 484
 485         lazy_initialize_conv();
 486
 487         descriptor = conv_handles[from][to];
 488
 489         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 490                 if (!conv_silent)
 491                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 492                 return (size_t)-1;
 493         }
 494
 495   convert:
 496
 497         if ((destlen*2) < destlen) {
 498                 /* wrapped ! abort. */
 499                 if (!conv_silent)
 500                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 501                 if (!ctx)
 502                         SAFE_FREE(outbuf);
 503                 return (size_t)-1;
 504         } else {
 505                 destlen = destlen * 2;
 506         }
 507
 508         if (ctx)
 509                 ob = (char *)talloc_realloc(ctx, ob, destlen);
 510         else
 511                 ob = (char *)Realloc(ob, destlen);
 512
 513         if (!ob) {
 514                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 515                 if (!ctx)
 516                         SAFE_FREE(outbuf);
 517                 return (size_t)-1;
 518         } else {
 519                 outbuf = ob;
 520         }
 521         i_len = srclen;
 522         o_len = destlen;
 523
 524  again:
 525
 526         retval = smb_iconv(descriptor,
 527                            (char **)&inbuf, &i_len,
 528                            &outbuf, &o_len);
 529         if(retval == (size_t)-1)                {
 530                 const char *reason="unknown error";
 531                 switch(errno) {
 532                         case EINVAL:
 533                                 reason="Incomplete multibyte sequence";
 534                                 if (!conv_silent)
 535                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 536                                 if (allow_bad_conv)
 537                                         goto use_as_is;
 538                                 break;
 539                         case E2BIG:
 540                                 goto convert;
 541                         case EILSEQ:
 542                                 reason="Illegal multibyte sequence";
 543                                 if (!conv_silent)
 544                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 545                                 if (allow_bad_conv)
 546                                         goto use_as_is;
 547                                 break;
 548                 }
 549                 if (!conv_silent)
 550                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 551                 /* smb_panic(reason); */
 552                 return (size_t)-1;
 553         }
 554
 555   out:
 556
 557         destlen = destlen - o_len;
 558         if (ctx)
 559                 *dest = (char *)talloc_realloc(ctx,ob,destlen);
 560         else
 561                 *dest = (char *)Realloc(ob,destlen);
 562         if (destlen && !*dest) {
 563                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 564                 if (!ctx)
 565                         SAFE_FREE(ob);
 566                 return (size_t)-1;
 567         }
 568
 569         return destlen;
 570
 571  use_as_is:
 572
 573         /*
 574          * Conversion not supported. This is actually an error, but there are so
 575          * many misconfigured iconv systems and smb.conf's out there we can't just
 576          * fail. Do a very bad conversion instead.... JRA.
 577          */
 578
 579         {
 580                 if (o_len == 0 || i_len == 0)
 581                         goto out;
 582
 583                 if (from == CH_UCS2 && to != CH_UCS2) {
 584                         /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
 585                         if (i_len < 2)
 586                                 goto out;
 587
 588                         if (i_len >= 2) {
 589                                 *outbuf = inbuf[0];
 590
 591                                 outbuf++;
 592                                 o_len--;
 593
 594                                 inbuf += 2;
 595                                 i_len -= 2;
 596                         }
 597
 598                         if (o_len == 0 || i_len == 0)
 599                                 goto out;
 600
 601                         /* Keep trying with the next char... */
 602                         goto again;
 603
 604                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 605                         /* Can't convert to ucs2 - just widen by adding zero. */
 606                         if (o_len < 2)
 607                                 goto out;
 608
 609                         outbuf[0] = inbuf[0];
 610                         outbuf[1] = '\0';
 611
 612                         inbuf++;
 613                         i_len--;
 614
 615                         outbuf += 2;
 616                         o_len -= 2;
 617
 618                         if (o_len == 0 || i_len == 0)
 619                                 goto out;
 620
 621                         /* Keep trying with the next char... */
 622                         goto again;
 623
 624                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 625                         /* Failed multibyte to multibyte. Just copy 1 char and
 626                                 try again. */
 627                         outbuf[0] = inbuf[0];
 628
 629                         inbuf++;
 630                         i_len--;
 631
 632                         outbuf++;
 633                         o_len--;
 634
 635                         if (o_len == 0 || i_len == 0)
 636                                 goto out;
 637
 638                         /* Keep trying with the next char... */
 639                         goto again;
 640
 641                 } else {
 642                         /* Keep compiler happy.... */
 643                         goto out;
 644                 }
 645         }
 646 }
 647
 648 /**
 649  * Convert between character sets, allocating a new buffer using talloc for the result.
 650  *
 651  * @param srclen length of source buffer.
 652  * @param dest always set at least to NULL
 653  * @note -1 is not accepted for srclen.
 654  *
 655  * @returns Size in bytes of the converted string; or -1 in case of error.
 656  **/
 657 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 658                                 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
 659 {
 660         size_t dest_len;
 661
 662         *dest = NULL;
 663         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
 664         if (dest_len == (size_t)-1)
 665                 return (size_t)-1;
 666         if (*dest == NULL)
 667                 return (size_t)-1;
 668         return dest_len;
 669 }
 670
 671 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 672 {
 673         size_t size;
 674         smb_ucs2_t *buffer;
 675
 676         size = push_ucs2_allocate(&buffer, src);
 677         if (size == (size_t)-1) {
 678                 smb_panic("failed to create UCS2 buffer");
 679         }
 680         if (!strupper_w(buffer) && (dest == src)) {
 681                 free(buffer);
 682                 return srclen;
 683         }
 684
 685         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
 686         free(buffer);
 687         return size;
 688 }
 689
 690 /**
 691  strdup() a unix string to upper case.
 692  Max size is pstring.
 693 **/
 694
 695 char *strdup_upper(const char *s)
 696 {
 697         pstring out_buffer;
 698         const unsigned char *p = (const unsigned char *)s;
 699         unsigned char *q = (unsigned char *)out_buffer;
 700
 701         /* this is quite a common operation, so we want it to be
 702            fast. We optimise for the ascii case, knowing that all our
 703            supported multi-byte character sets are ascii-compatible
 704            (ie. they match for the first 128 chars) */
 705
 706         while (1) {
 707                 if (*p & 0x80)
 708                         break;
 709                 *q++ = toupper(*p);
 710                 if (!*p)
 711                         break;
 712                 p++;
 713                 if (p - ( const unsigned char *)s >= sizeof(pstring))
 714                         break;
 715         }
 716
 717         if (*p) {
 718                 /* MB case. */
 719                 size_t size;
 720                 wpstring buffer;
 721                 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
 722                 if (size == (size_t)-1) {
 723                         return NULL;
 724                 }
 725
 726                 strupper_w(buffer);
 727
 728                 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
 729                 if (size == (size_t)-1) {
 730                         return NULL;
 731                 }
 732         }
 733
 734         return strdup(out_buffer);
 735 }
 736
 737 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 738 {
 739         size_t size;
 740         smb_ucs2_t *buffer = NULL;
 741
 742         size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
 743                                        (void **) &buffer, True);
 744         if (size == (size_t)-1 || !buffer) {
 745                 smb_panic("failed to create UCS2 buffer");
 746         }
 747         if (!strlower_w(buffer) && (dest == src)) {
 748                 SAFE_FREE(buffer);
 749                 return srclen;
 750         }
 751         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
 752         SAFE_FREE(buffer);
 753         return size;
 754 }
 755
 756 /**
 757  strdup() a unix string to lower case.
 758 **/
 759
 760 char *strdup_lower(const char *s)
 761 {
 762         size_t size;
 763         smb_ucs2_t *buffer = NULL;
 764         char *out_buffer;
 765
 766         size = push_ucs2_allocate(&buffer, s);
 767         if (size == -1 || !buffer) {
 768                 return NULL;
 769         }
 770
 771         strlower_w(buffer);
 772
 773         size = pull_ucs2_allocate(&out_buffer, buffer);
 774         SAFE_FREE(buffer);
 775
 776         if (size == (size_t)-1) {
 777                 return NULL;
 778         }
 779
 780         return out_buffer;
 781 }
 782
 783 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 784 {
 785         if (flags & (STR_NOALIGN|STR_ASCII))
 786                 return 0;
 787         return PTR_DIFF(p, base_ptr) & 1;
 788 }
 789
 790
 791 /**
 792  * Copy a string from a char* unix src to a dos codepage string destination.
 793  *
 794  * @return the number of bytes occupied by the string in the destination.
 795  *
 796  * @param flags can include
 797  * <dl>
 798  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 799  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 800  * </dl>
 801  *
 802  * @param dest_len the maximum length in bytes allowed in the
 803  * destination.  If @p dest_len is -1 then no maximum is used.
 804  **/
 805 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 806 {
 807         size_t src_len = strlen(src);
 808         pstring tmpbuf;
 809
 810         /* treat a pstring as "unlimited" length */
 811         if (dest_len == (size_t)-1)
 812                 dest_len = sizeof(pstring);
 813
 814         if (flags & STR_UPPER) {
 815                 pstrcpy(tmpbuf, src);
 816                 strupper_m(tmpbuf);
 817                 src = tmpbuf;
 818         }
 819
 820         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 821                 src_len++;
 822
 823         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 824 }
 825
 826 size_t push_ascii_fstring(void *dest, const char *src)
 827 {
 828         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 829 }
 830
 831 size_t push_ascii_pstring(void *dest, const char *src)
 832 {
 833         return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
 834 }
 835
 836 /********************************************************************
 837  Push an nstring - ensure null terminated. Written by
 838  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 839 ********************************************************************/
 840
 841 size_t push_ascii_nstring(void *dest, const char *src)
 842 {
 843         size_t i, buffer_len, dest_len;
 844         smb_ucs2_t *buffer;
 845
 846         conv_silent = True;
 847         buffer_len = push_ucs2_allocate(&buffer, src);
 848         if (buffer_len == (size_t)-1) {
 849                 smb_panic("failed to create UCS2 buffer");
 850         }
 851
 852         /* We're using buffer_len below to count ucs2 characters, not bytes. */
 853         buffer_len /= sizeof(smb_ucs2_t);
 854
 855         dest_len = 0;
 856         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
 857                 unsigned char mb[10];
 858                 /* Convert one smb_ucs2_t character at a time. */
 859                 size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
 860                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
 861                         memcpy((char *)dest + dest_len, mb, mb_len);
 862                         dest_len += mb_len;
 863                 } else {
 864                         errno = E2BIG;
 865                         break;
 866                 }
 867         }
 868         ((char *)dest)[dest_len] = '\0';
 869
 870         SAFE_FREE(buffer);
 871         conv_silent = False;
 872         return dest_len;
 873 }
 874
 875 /**
 876  * Copy a string from a dos codepage source to a unix char* destination.
 877  *
 878  * The resulting string in "dest" is always null terminated.
 879  *
 880  * @param flags can have:
 881  * <dl>
 882  * <dt>STR_TERMINATE</dt>
 883  * <dd>STR_TERMINATE means the string in @p src
 884  * is null terminated, and src_len is ignored.</dd>
 885  * </dl>
 886  *
 887  * @param src_len is the length of the source area in bytes.
 888  * @returns the number of bytes occupied by the string in @p src.
 889  **/
 890 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 891 {
 892         size_t ret;
 893
 894         if (dest_len == (size_t)-1)
 895                 dest_len = sizeof(pstring);
 896
 897         if (flags & STR_TERMINATE) {
 898                 if (src_len == (size_t)-1) {
 899                         src_len = strlen(src) + 1;
 900                 } else {
 901                         size_t len = strnlen(src, src_len);
 902                         if (len < src_len)
 903                                 len++;
 904                         src_len = len;
 905                 }
 906         }
 907
 908         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
 909         if (ret == (size_t)-1) {
 910                 dest_len = 0;
 911         }
 912
 913         if (dest_len)
 914                 dest[MIN(ret, dest_len-1)] = 0;
 915         else
 916                 dest[0] = 0;
 917
 918         return src_len;
 919 }
 920
 921 size_t pull_ascii_pstring(char *dest, const void *src)
 922 {
 923         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
 924 }
 925
 926 size_t pull_ascii_fstring(char *dest, const void *src)
 927 {
 928         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
 929 }
 930
 931 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
 932
 933 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
 934 {
 935         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
 936 }
 937
 938 /**
 939  * Copy a string from a char* src to a unicode destination.
 940  *
 941  * @returns the number of bytes occupied by the string in the destination.
 942  *
 943  * @param flags can have:
 944  *
 945  * <dl>
 946  * <dt>STR_TERMINATE <dd>means include the null termination.
 947  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 948  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 949  * </dl>
 950  *
 951  * @param dest_len is the maximum length allowed in the
 952  * destination. If dest_len is -1 then no maxiumum is used.
 953  **/
 954
 955 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
 956 {
 957         size_t len=0;
 958         size_t src_len;
 959         size_t ret;
 960
 961         /* treat a pstring as "unlimited" length */
 962         if (dest_len == (size_t)-1)
 963                 dest_len = sizeof(pstring);
 964
 965         if (flags & STR_TERMINATE)
 966                 src_len = (size_t)-1;
 967         else
 968                 src_len = strlen(src);
 969
 970         if (ucs2_align(base_ptr, dest, flags)) {
 971                 *(char *)dest = 0;
 972                 dest = (void *)((char *)dest + 1);
 973                 if (dest_len)
 974                         dest_len--;
 975                 len++;
 976         }
 977
 978         /* ucs2 is always a multiple of 2 bytes */
 979         dest_len &= ~1;
 980
 981         ret =  convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
 982         if (ret == (size_t)-1) {
 983                 return 0;
 984         }
 985
 986         len += ret;
 987
 988         if (flags & STR_UPPER) {
 989                 smb_ucs2_t *dest_ucs2 = dest;
 990                 size_t i;
 991                 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
 992                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
 993                         if (v != dest_ucs2[i]) {
 994                                 dest_ucs2[i] = v;
 995                         }
 996                 }
 997         }
 998
 999         return len;
1000 }
1001
1002
1003 /**
1004  * Copy a string from a unix char* src to a UCS2 destination,
1005  * allocating a buffer using talloc().
1006  *
1007  * @param dest always set at least to NULL
1008  *
1009  * @returns The number of bytes occupied by the string in the destination
1010  *         or -1 in case of error.
1011  **/
1012 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1013 {
1014         size_t src_len = strlen(src)+1;
1015
1016         *dest = NULL;
1017         return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1018 }
1019
1020
1021 /**
1022  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1023  *
1024  * @param dest always set at least to NULL
1025  *
1026  * @returns The number of bytes occupied by the string in the destination
1027  *         or -1 in case of error.
1028  **/
1029
1030 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1031 {
1032         size_t src_len = strlen(src)+1;
1033
1034         *dest = NULL;
1035         return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1036 }
1037
1038 /**
1039  Copy a string from a char* src to a UTF-8 destination.
1040  Return the number of bytes occupied by the string in the destination
1041  Flags can have:
1042   STR_TERMINATE means include the null termination
1043   STR_UPPER     means uppercase in the destination
1044  dest_len is the maximum length allowed in the destination. If dest_len
1045  is -1 then no maxiumum is used.
1046 **/
1047
1048 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1049 {
1050         size_t src_len = strlen(src);
1051         pstring tmpbuf;
1052
1053         /* treat a pstring as "unlimited" length */
1054         if (dest_len == (size_t)-1)
1055                 dest_len = sizeof(pstring);
1056
1057         if (flags & STR_UPPER) {
1058                 pstrcpy(tmpbuf, src);
1059                 strupper_m(tmpbuf);
1060                 src = tmpbuf;
1061         }
1062
1063         if (flags & STR_TERMINATE)
1064                 src_len++;
1065
1066         return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1067 }
1068
1069 size_t push_utf8_fstring(void *dest, const char *src)
1070 {
1071         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1072 }
1073
1074 /**
1075  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1076  *
1077  * @param dest always set at least to NULL
1078  *
1079  * @returns The number of bytes occupied by the string in the destination
1080  **/
1081
1082 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1083 {
1084         size_t src_len = strlen(src)+1;
1085
1086         *dest = NULL;
1087         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1088 }
1089
1090 /**
1091  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1092  *
1093  * @param dest always set at least to NULL
1094  *
1095  * @returns The number of bytes occupied by the string in the destination
1096  **/
1097
1098 size_t push_utf8_allocate(char **dest, const char *src)
1099 {
1100         size_t src_len = strlen(src)+1;
1101
1102         *dest = NULL;
1103         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1104 }
1105
1106 /**
1107  Copy a string from a ucs2 source to a unix char* destination.
1108  Flags can have:
1109   STR_TERMINATE means the string in src is null terminated.
1110   STR_NOALIGN   means don't try to align.
1111  if STR_TERMINATE is set then src_len is ignored if it is -1.
1112  src_len is the length of the source area in bytes
1113  Return the number of bytes occupied by the string in src.
1114  The resulting string in "dest" is always null terminated.
1115 **/
1116
1117 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1118 {
1119         size_t ret;
1120
1121         if (dest_len == (size_t)-1)
1122                 dest_len = sizeof(pstring);
1123
1124         if (ucs2_align(base_ptr, src, flags)) {
1125                 src = (const void *)((const char *)src + 1);
1126                 if (src_len != (size_t)-1)
1127                         src_len--;
1128         }
1129
1130         if (flags & STR_TERMINATE) {
1131                 /* src_len -1 is the default for null terminated strings. */
1132                 if (src_len != (size_t)-1) {
1133                         size_t len = strnlen_w(src, src_len/2);
1134                         if (len < src_len/2)
1135                                 len++;
1136                         src_len = len*2;
1137                 }
1138         }
1139
1140         /* ucs2 is always a multiple of 2 bytes */
1141         if (src_len != (size_t)-1)
1142                 src_len &= ~1;
1143
1144         ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1145         if (ret == (size_t)-1) {
1146                 return 0;
1147         }
1148
1149         if (src_len == (size_t)-1)
1150                 src_len = ret*2;
1151
1152         if (dest_len)
1153                 dest[MIN(ret, dest_len-1)] = 0;
1154         else
1155                 dest[0] = 0;
1156
1157         return src_len;
1158 }
1159
1160 size_t pull_ucs2_pstring(char *dest, const void *src)
1161 {
1162         return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1163 }
1164
1165 size_t pull_ucs2_fstring(char *dest, const void *src)
1166 {
1167         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1168 }
1169
1170 /**
1171  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1172  *
1173  * @param dest always set at least to NULL
1174  *
1175  * @returns The number of bytes occupied by the string in the destination
1176  **/
1177
1178 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1179 {
1180         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1181         *dest = NULL;
1182         return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1183 }
1184
1185 /**
1186  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1187  *
1188  * @param dest always set at least to NULL
1189  *
1190  * @returns The number of bytes occupied by the string in the destination
1191  **/
1192
1193 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1194 {
1195         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1196         *dest = NULL;
1197         return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1198 }
1199
1200 /**
1201  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1202  *
1203  * @param dest always set at least to NULL
1204  *
1205  * @returns The number of bytes occupied by the string in the destination
1206  **/
1207
1208 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1209 {
1210         size_t src_len = strlen(src)+1;
1211         *dest = NULL;
1212         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1213 }
1214
1215 /**
1216  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1217  *
1218  * @param dest always set at least to NULL
1219  *
1220  * @returns The number of bytes occupied by the string in the destination
1221  **/
1222
1223 size_t pull_utf8_allocate(char **dest, const char *src)
1224 {
1225         size_t src_len = strlen(src)+1;
1226         *dest = NULL;
1227         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1228 }
1229
1230 /**
1231  Copy a string from a char* src to a unicode or ascii
1232  dos codepage destination choosing unicode or ascii based on the
1233  flags in the SMB buffer starting at base_ptr.
1234  Return the number of bytes occupied by the string in the destination.
1235  flags can have:
1236   STR_TERMINATE means include the null termination.
1237   STR_UPPER     means uppercase in the destination.
1238   STR_ASCII     use ascii even with unicode packet.
1239   STR_NOALIGN   means don't do alignment.
1240  dest_len is the maximum length allowed in the destination. If dest_len
1241  is -1 then no maxiumum is used.
1242 **/
1243
1244 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1245 {
1246 #ifdef DEVELOPER
1247         /* We really need to zero fill here, not clobber
1248          * region, as we want to ensure that valgrind thinks
1249          * all of the outgoing buffer has been written to
1250          * so a send() or write() won't trap an error.
1251          * JRA.
1252          */
1253 #if 0
1254         if (dest_len != (size_t)-1)
1255                 clobber_region(function, line, dest, dest_len);
1256 #else
1257         if (dest_len != (size_t)-1)
1258                 memset(dest, '\0', dest_len);
1259 #endif
1260 #endif
1261
1262         if (!(flags & STR_ASCII) && \
1263             ((flags & STR_UNICODE || \
1264               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1265                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1266         }
1267         return push_ascii(dest, src, dest_len, flags);
1268 }
1269
1270
1271 /**
1272  Copy a string from a unicode or ascii source (depending on
1273  the packet flags) to a char* destination.
1274  Flags can have:
1275   STR_TERMINATE means the string in src is null terminated.
1276   STR_UNICODE   means to force as unicode.
1277   STR_ASCII     use ascii even with unicode packet.
1278   STR_NOALIGN   means don't do alignment.
1279  if STR_TERMINATE is set then src_len is ignored is it is -1
1280  src_len is the length of the source area in bytes.
1281  Return the number of bytes occupied by the string in src.
1282  The resulting string in "dest" is always null terminated.
1283 **/
1284
1285 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1286 {
1287 #ifdef DEVELOPER
1288         if (dest_len != (size_t)-1)
1289                 clobber_region(function, line, dest, dest_len);
1290 #endif
1291
1292         if (!(flags & STR_ASCII) && \
1293             ((flags & STR_UNICODE || \
1294               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1295                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1296         }
1297         return pull_ascii(dest, src, dest_len, src_len, flags);
1298 }
1299
1300 size_t align_string(const void *base_ptr, const char *p, int flags)
1301 {
1302         if (!(flags & STR_ASCII) && \
1303             ((flags & STR_UNICODE || \
1304               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1305                 return ucs2_align(base_ptr, p, flags);
1306         }
1307         return 0;
1308 }
1309
1310 /****************************************************************
1311  Calculate the size (in bytes) of the next multibyte character in
1312  our internal character set. Note that p must be pointing to a
1313  valid mb char, not within one.
1314 ****************************************************************/
1315
1316 size_t next_mb_char_size(const char *s)
1317 {
1318         size_t i;
1319
1320         if (!(*s & 0x80))
1321                 return 1; /* ascii. */
1322
1323         conv_silent = True;
1324         for ( i = 1; i <=4; i++ ) {
1325                 smb_ucs2_t uc;
1326                 if (convert_string(CH_UNIX, CH_UCS2, s, i, &uc, 2, False) == 2) {
1327 #if 0 /* JRATEST */
1328                         DEBUG(10,("next_mb_char_size: size %u at string %s\n",
1329                                 (unsigned int)i, s));
1330 #endif
1331                         conv_silent = False;
1332                         return i;
1333                 }
1334         }
1335         /* We're hosed - we don't know how big this is... */
1336         DEBUG(10,("next_mb_char_size: unknown size at string %s\n", s));
1337         conv_silent = False;
1338         return 1;
1339 }