source3/lib/charcnv.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Character set conversion Extensions
   4    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
   5    Copyright (C) Andrew Tridgell 2001
   6    Copyright (C) Simo Sorce 2001
   7    Copyright (C) Martin Pool 2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22
  23 */
  24 #include "includes.h"
  25
  26 /**
  27  * @file
  28  *
  29  * @brief Character-set conversion routines built on our iconv.
  30  *
  31  * @note Samba's internal character set (at least in the 3.0 series)
  32  * is always the same as the one for the Unix filesystem.  It is
  33  * <b>not</b> necessarily UTF-8 and may be different on machines that
  34  * need i18n filenames to be compatible with Unix software.  It does
  35  * have to be a superset of ASCII.  All multibyte sequences must start
  36  * with a byte with the high bit set.
  37  *
  38  * @sa lib/iconv.c
  39  */
  40
  41
  42 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
  43 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
  44
  45 /**
  46  * Return the name of a charset to give to iconv().
  47  **/
  48 static const char *charset_name(charset_t ch)
  49 {
  50         const char *ret = NULL;
  51
  52         if (ch == CH_UCS2) ret = "UCS-2LE";
  53         else if (ch == CH_UNIX) ret = lp_unix_charset();
  54         else if (ch == CH_DOS) ret = lp_dos_charset();
  55         else if (ch == CH_DISPLAY) ret = lp_display_charset();
  56         else if (ch == CH_UTF8) ret = "UTF8";
  57
  58 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
  59         if (ret && !strcmp(ret, "LOCALE")) {
  60                 const char *ln = NULL;
  61
  62 #ifdef HAVE_SETLOCALE
  63                 setlocale(LC_ALL, "");
  64 #endif
  65                 ln = nl_langinfo(CODESET);
  66                 if (ln) {
  67                         /* Check whether the charset name is supported
  68                            by iconv */
  69                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
  70                         if (handle == (smb_iconv_t) -1) {
  71                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
  72                                 ln = NULL;
  73                         } else {
  74                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
  75                                 smb_iconv_close(handle);
  76                         }
  77                 }
  78                 ret = ln;
  79         }
  80 #endif
  81
  82         if (!ret || !*ret) ret = "ASCII";
  83         return ret;
  84 }
  85
  86 void lazy_initialize_conv(void)
  87 {
  88         static int initialized = False;
  89
  90         if (!initialized) {
  91                 initialized = True;
  92                 load_case_tables();
  93                 init_iconv();
  94         }
  95 }
  96
  97 /**
  98  * Initialize iconv conversion descriptors.
  99  *
 100  * This is called the first time it is needed, and also called again
 101  * every time the configuration is reloaded, because the charset or
 102  * codepage might have changed.
 103  **/
 104 void init_iconv(void)
 105 {
 106         int c1, c2;
 107         BOOL did_reload = False;
 108
 109         /* so that charset_name() works we need to get the UNIX<->UCS2 going
 110            first */
 111         if (!conv_handles[CH_UNIX][CH_UCS2])
 112                 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
 113
 114         if (!conv_handles[CH_UCS2][CH_UNIX])
 115                 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
 116
 117         for (c1=0;c1<NUM_CHARSETS;c1++) {
 118                 for (c2=0;c2<NUM_CHARSETS;c2++) {
 119                         const char *n1 = charset_name((charset_t)c1);
 120                         const char *n2 = charset_name((charset_t)c2);
 121                         if (conv_handles[c1][c2] &&
 122                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
 123                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
 124                                 continue;
 125
 126                         did_reload = True;
 127
 128                         if (conv_handles[c1][c2])
 129                                 smb_iconv_close(conv_handles[c1][c2]);
 130
 131                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
 132                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
 133                                 DEBUG(0,("Conversion from %s to %s not supported\n",
 134                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
 135                                 conv_handles[c1][c2] = NULL;
 136                         }
 137                 }
 138         }
 139
 140         if (did_reload) {
 141                 /* XXX: Does this really get called every time the dos
 142                  * codepage changes? */
 143                 /* XXX: Is the did_reload test too strict? */
 144                 conv_silent = True;
 145                 init_doschar_table();
 146                 init_valid_table();
 147                 conv_silent = False;
 148         }
 149 }
 150
 151 /**
 152  * Convert string from one encoding to another, making error checking etc
 153  * Slow path version - uses (slow) iconv.
 154  *
 155  * @param src pointer to source string (multibyte or singlebyte)
 156  * @param srclen length of the source string in bytes
 157  * @param dest pointer to destination string (multibyte or singlebyte)
 158  * @param destlen maximal length allowed for string
 159  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 160  * @returns the number of bytes occupied in the destination
 161  *
 162  * Ensure the srclen contains the terminating zero.
 163  *
 164  **/
 165
 166 static size_t convert_string_internal(charset_t from, charset_t to,
 167                       void const *src, size_t srclen,
 168                       void *dest, size_t destlen, BOOL allow_bad_conv)
 169 {
 170         size_t i_len, o_len;
 171         size_t retval;
 172         const char* inbuf = (const char*)src;
 173         char* outbuf = (char*)dest;
 174         smb_iconv_t descriptor;
 175
 176         lazy_initialize_conv();
 177
 178         descriptor = conv_handles[from][to];
 179
 180         if (srclen == (size_t)-1) {
 181                 if (from == CH_UCS2) {
 182                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
 183                 } else {
 184                         srclen = strlen((const char *)src)+1;
 185                 }
 186         }
 187
 188
 189         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 190                 if (!conv_silent)
 191                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
 192                 return (size_t)-1;
 193         }
 194
 195         i_len=srclen;
 196         o_len=destlen;
 197
 198  again:
 199
 200         retval = smb_iconv(descriptor, (char **)&inbuf, &i_len, &outbuf, &o_len);
 201         if(retval==(size_t)-1) {
 202                 const char *reason="unknown error";
 203                 switch(errno) {
 204                         case EINVAL:
 205                                 reason="Incomplete multibyte sequence";
 206                                 if (!conv_silent)
 207                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 208                                 if (allow_bad_conv)
 209                                         goto use_as_is;
 210                                 break;
 211                         case E2BIG:
 212                                 reason="No more room";
 213                                 if (!conv_silent)
 214                                         DEBUG(3, ("convert_string_internal: Required %lu, available %lu\n",
 215                                                 (unsigned long)srclen, (unsigned long)destlen));
 216                                 /* we are not sure we need srclen bytes,
 217                                   may be more, may be less.
 218                                   We only know we need more than destlen
 219                                   bytes ---simo */
 220                                break;
 221                         case EILSEQ:
 222                                 reason="Illegal multibyte sequence";
 223                                 if (!conv_silent)
 224                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 225                                 if (allow_bad_conv)
 226                                         goto use_as_is;
 227                                 break;
 228                         default:
 229                                 if (!conv_silent)
 230                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
 231                                 break;
 232                 }
 233                 /* smb_panic(reason); */
 234         }
 235         return destlen-o_len;
 236
 237  use_as_is:
 238
 239         /*
 240          * Conversion not supported. This is actually an error, but there are so
 241          * many misconfigured iconv systems and smb.conf's out there we can't just
 242          * fail. Do a very bad conversion instead.... JRA.
 243          */
 244
 245         {
 246                 if (o_len == 0 || i_len == 0)
 247                         return destlen - o_len;
 248
 249                 if (from == CH_UCS2 && to != CH_UCS2) {
 250                         /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
 251                         if (i_len < 2)
 252                                 return destlen - o_len;
 253                         if (i_len >= 2) {
 254                                 *outbuf = inbuf[0];
 255
 256                                 outbuf++;
 257                                 o_len--;
 258
 259                                 inbuf += 2;
 260                                 i_len -= 2;
 261                         }
 262
 263                         if (o_len == 0 || i_len == 0)
 264                                 return destlen - o_len;
 265
 266                         /* Keep trying with the next char... */
 267                         goto again;
 268
 269                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 270                         /* Can't convert to ucs2 - just widen by adding zero. */
 271                         if (o_len < 2)
 272                                 return destlen - o_len;
 273
 274                         outbuf[0] = inbuf[0];
 275                         outbuf[1] = '\0';
 276
 277                         inbuf++;
 278                         i_len--;
 279
 280                         outbuf += 2;
 281                         o_len -= 2;
 282
 283                         if (o_len == 0 || i_len == 0)
 284                                 return destlen - o_len;
 285
 286                         /* Keep trying with the next char... */
 287                         goto again;
 288
 289                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 290                         /* Failed multibyte to multibyte. Just copy 1 char and
 291                                 try again. */
 292                         outbuf[0] = inbuf[0];
 293
 294                         inbuf++;
 295                         i_len--;
 296
 297                         outbuf++;
 298                         o_len--;
 299
 300                         if (o_len == 0 || i_len == 0)
 301                                 return destlen - o_len;
 302
 303                         /* Keep trying with the next char... */
 304                         goto again;
 305
 306                 } else {
 307                         /* Keep compiler happy.... */
 308                         return destlen - o_len;
 309                 }
 310         }
 311 }
 312
 313 /**
 314  * Convert string from one encoding to another, making error checking etc
 315  * Fast path version - handles ASCII first.
 316  *
 317  * @param src pointer to source string (multibyte or singlebyte)
 318  * @param srclen length of the source string in bytes, or -1 for nul terminated.
 319  * @param dest pointer to destination string (multibyte or singlebyte)
 320  * @param destlen maximal length allowed for string - *NEVER* -1.
 321  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
 322  * @returns the number of bytes occupied in the destination
 323  *
 324  * Ensure the srclen contains the terminating zero.
 325  *
 326  * This function has been hand-tuned to provide a fast path.
 327  * Don't change unless you really know what you are doing. JRA.
 328  **/
 329
 330 size_t convert_string(charset_t from, charset_t to,
 331                       void const *src, size_t srclen,
 332                       void *dest, size_t destlen, BOOL allow_bad_conv)
 333 {
 334         /*
 335          * NB. We deliberately don't do a strlen here if srclen == -1.
 336          * This is very expensive over millions of calls and is taken
 337          * care of in the slow path in convert_string_internal. JRA.
 338          */
 339
 340 #ifdef DEVELOPER
 341         SMB_ASSERT(destlen != (size_t)-1);
 342 #endif
 343
 344         if (srclen == 0)
 345                 return 0;
 346
 347         if (from != CH_UCS2 && to != CH_UCS2) {
 348                 const unsigned char *p = (const unsigned char *)src;
 349                 unsigned char *q = (unsigned char *)dest;
 350                 size_t slen = srclen;
 351                 size_t dlen = destlen;
 352                 unsigned char lastp;
 353                 size_t retval = 0;
 354
 355                 /* If all characters are ascii, fast path here. */
 356                 while (slen && dlen) {
 357                         if ((lastp = *p) <= 0x7f) {
 358                                 *q++ = *p++;
 359                                 if (slen != (size_t)-1) {
 360                                         slen--;
 361                                 }
 362                                 dlen--;
 363                                 retval++;
 364                                 if (!lastp)
 365                                         break;
 366                         } else {
 367 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 368                                 goto general_case;
 369 #else
 370                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 371 #endif
 372                         }
 373                 }
 374                 return retval;
 375         } else if (from == CH_UCS2 && to != CH_UCS2) {
 376                 const unsigned char *p = (const unsigned char *)src;
 377                 unsigned char *q = (unsigned char *)dest;
 378                 size_t retval = 0;
 379                 size_t slen = srclen;
 380                 size_t dlen = destlen;
 381                 unsigned char lastp;
 382
 383                 /* If all characters are ascii, fast path here. */
 384                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
 385                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
 386                                 *q++ = *p;
 387                                 if (slen != (size_t)-1) {
 388                                         slen -= 2;
 389                                 }
 390                                 p += 2;
 391                                 dlen--;
 392                                 retval++;
 393                                 if (!lastp)
 394                                         break;
 395                         } else {
 396 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 397                                 goto general_case;
 398 #else
 399                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 400 #endif
 401                         }
 402                 }
 403                 return retval;
 404         } else if (from != CH_UCS2 && to == CH_UCS2) {
 405                 const unsigned char *p = (const unsigned char *)src;
 406                 unsigned char *q = (unsigned char *)dest;
 407                 size_t retval = 0;
 408                 size_t slen = srclen;
 409                 size_t dlen = destlen;
 410                 unsigned char lastp;
 411
 412                 /* If all characters are ascii, fast path here. */
 413                 while (slen && (dlen >= 2)) {
 414                         if ((lastp = *p) <= 0x7F) {
 415                                 *q++ = *p++;
 416                                 *q++ = '\0';
 417                                 if (slen != (size_t)-1) {
 418                                         slen--;
 419                                 }
 420                                 dlen -= 2;
 421                                 retval += 2;
 422                                 if (!lastp)
 423                                         break;
 424                         } else {
 425 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 426                                 goto general_case;
 427 #else
 428                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
 429 #endif
 430                         }
 431                 }
 432                 return retval;
 433         }
 434
 435 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
 436   general_case:
 437 #endif
 438         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
 439 }
 440
 441 /**
 442  * Convert between character sets, allocating a new buffer for the result.
 443  *
 444  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
 445  * @param srclen length of source buffer.
 446  * @param dest always set at least to NULL
 447  * @note -1 is not accepted for srclen.
 448  *
 449  * @returns Size in bytes of the converted string; or -1 in case of error.
 450  *
 451  * Ensure the srclen contains the terminating zero.
 452  *
 453  * I hate the goto's in this function. It's embarressing.....
 454  * There has to be a cleaner way to do this. JRA.
 455  **/
 456
 457 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
 458                                void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
 459 {
 460         size_t i_len, o_len, destlen = MAX(srclen, 512);
 461         size_t retval;
 462         const char *inbuf = (const char *)src;
 463         char *outbuf = NULL, *ob = NULL;
 464         smb_iconv_t descriptor;
 465
 466         *dest = NULL;
 467
 468         if (src == NULL || srclen == (size_t)-1)
 469                 return (size_t)-1;
 470         if (srclen == 0)
 471                 return 0;
 472
 473         lazy_initialize_conv();
 474
 475         descriptor = conv_handles[from][to];
 476
 477         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 478                 if (!conv_silent)
 479                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
 480                 if (allow_bad_conv)
 481                         goto use_as_is;
 482                 return (size_t)-1;
 483         }
 484
 485   convert:
 486
 487         if ((destlen*2) < destlen) {
 488                 /* wrapped ! abort. */
 489                 if (!conv_silent)
 490                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
 491                 if (!ctx)
 492                         SAFE_FREE(outbuf);
 493                 return (size_t)-1;
 494         } else {
 495                 destlen = destlen * 2;
 496         }
 497
 498         if (ctx)
 499                 ob = (char *)talloc_realloc(ctx, ob, destlen);
 500         else
 501                 ob = (char *)Realloc(ob, destlen);
 502
 503         if (!ob) {
 504                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
 505                 if (!ctx)
 506                         SAFE_FREE(outbuf);
 507                 return (size_t)-1;
 508         } else {
 509                 outbuf = ob;
 510         }
 511         i_len = srclen;
 512         o_len = destlen;
 513
 514  again:
 515
 516         retval = smb_iconv(descriptor,
 517                            (char **)&inbuf, &i_len,
 518                            &outbuf, &o_len);
 519         if(retval == (size_t)-1)                {
 520                 const char *reason="unknown error";
 521                 switch(errno) {
 522                         case EINVAL:
 523                                 reason="Incomplete multibyte sequence";
 524                                 if (!conv_silent)
 525                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 526                                 if (allow_bad_conv)
 527                                         goto use_as_is;
 528                                 break;
 529                         case E2BIG:
 530                                 goto convert;
 531                         case EILSEQ:
 532                                 reason="Illegal multibyte sequence";
 533                                 if (!conv_silent)
 534                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
 535                                 if (allow_bad_conv)
 536                                         goto use_as_is;
 537                                 break;
 538                 }
 539                 if (!conv_silent)
 540                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
 541                 /* smb_panic(reason); */
 542                 return (size_t)-1;
 543         }
 544
 545   out:
 546
 547         destlen = destlen - o_len;
 548         if (ctx)
 549                 *dest = (char *)talloc_realloc(ctx,ob,destlen);
 550         else
 551                 *dest = (char *)Realloc(ob,destlen);
 552         if (destlen && !*dest) {
 553                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
 554                 if (!ctx)
 555                         SAFE_FREE(ob);
 556                 return (size_t)-1;
 557         }
 558
 559         return destlen;
 560
 561  use_as_is:
 562
 563         /*
 564          * Conversion not supported. This is actually an error, but there are so
 565          * many misconfigured iconv systems and smb.conf's out there we can't just
 566          * fail. Do a very bad conversion instead.... JRA.
 567          */
 568
 569         {
 570                 if (o_len == 0 || i_len == 0)
 571                         goto out;
 572
 573                 if (from == CH_UCS2 && to != CH_UCS2) {
 574                         /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
 575                         if (i_len < 2)
 576                                 goto out;
 577
 578                         if (i_len >= 2) {
 579                                 *outbuf = inbuf[0];
 580
 581                                 outbuf++;
 582                                 o_len--;
 583
 584                                 inbuf += 2;
 585                                 i_len -= 2;
 586                         }
 587
 588                         if (o_len == 0 || i_len == 0)
 589                                 goto out;
 590
 591                         /* Keep trying with the next char... */
 592                         goto again;
 593
 594                 } else if (from != CH_UCS2 && to == CH_UCS2) {
 595                         /* Can't convert to ucs2 - just widen by adding zero. */
 596                         if (o_len < 2)
 597                                 goto out;
 598
 599                         outbuf[0] = inbuf[0];
 600                         outbuf[1] = '\0';
 601
 602                         inbuf++;
 603                         i_len--;
 604
 605                         outbuf += 2;
 606                         o_len -= 2;
 607
 608                         if (o_len == 0 || i_len == 0)
 609                                 goto out;
 610
 611                         /* Keep trying with the next char... */
 612                         goto again;
 613
 614                 } else if (from != CH_UCS2 && to != CH_UCS2) {
 615                         /* Failed multibyte to multibyte. Just copy 1 char and
 616                                 try again. */
 617                         outbuf[0] = inbuf[0];
 618
 619                         inbuf++;
 620                         i_len--;
 621
 622                         outbuf++;
 623                         o_len--;
 624
 625                         if (o_len == 0 || i_len == 0)
 626                                 goto out;
 627
 628                         /* Keep trying with the next char... */
 629                         goto again;
 630
 631                 } else {
 632                         /* Keep compiler happy.... */
 633                         goto out;
 634                 }
 635         }
 636 }
 637
 638 /**
 639  * Convert between character sets, allocating a new buffer using talloc for the result.
 640  *
 641  * @param srclen length of source buffer.
 642  * @param dest always set at least to NULL
 643  * @note -1 is not accepted for srclen.
 644  *
 645  * @returns Size in bytes of the converted string; or -1 in case of error.
 646  **/
 647 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 648                                 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
 649 {
 650         size_t dest_len;
 651
 652         *dest = NULL;
 653         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
 654         if (dest_len == (size_t)-1)
 655                 return (size_t)-1;
 656         if (*dest == NULL)
 657                 return (size_t)-1;
 658         return dest_len;
 659 }
 660
 661 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
 662 {
 663         size_t size;
 664         smb_ucs2_t *buffer;
 665
 666         size = push_ucs2_allocate(&buffer, src);
 667         if (size == (size_t)-1) {
 668                 smb_panic("failed to create UCS2 buffer");
 669         }
 670         if (!strupper_w(buffer) && (dest == src)) {
 671                 free(buffer);
 672                 return srclen;
 673         }
 674
 675         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
 676         free(buffer);
 677         return size;
 678 }
 679
 680 /**
 681  strdup() a unix string to upper case.
 682  Max size is pstring.
 683 **/
 684
 685 char *strdup_upper(const char *s)
 686 {
 687         pstring out_buffer;
 688         const unsigned char *p = (const unsigned char *)s;
 689         unsigned char *q = (unsigned char *)out_buffer;
 690
 691         /* this is quite a common operation, so we want it to be
 692            fast. We optimise for the ascii case, knowing that all our
 693            supported multi-byte character sets are ascii-compatible
 694            (ie. they match for the first 128 chars) */
 695
 696         while (1) {
 697                 if (*p & 0x80)
 698                         break;
 699                 *q++ = toupper(*p);
 700                 if (!*p)
 701                         break;
 702                 p++;
 703                 if (p - ( const unsigned char *)s >= sizeof(pstring))
 704                         break;
 705         }
 706
 707         if (*p) {
 708                 /* MB case. */
 709                 size_t size;
 710                 wpstring buffer;
 711                 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
 712                 if (size == (size_t)-1) {
 713                         return NULL;
 714                 }
 715
 716                 strupper_w(buffer);
 717
 718                 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
 719                 if (size == (size_t)-1) {
 720                         return NULL;
 721                 }
 722         }
 723
 724         return strdup(out_buffer);
 725 }
 726
 727 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
 728 {
 729         size_t size;
 730         smb_ucs2_t *buffer = NULL;
 731
 732         size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
 733                                        (void **) &buffer, True);
 734         if (size == (size_t)-1 || !buffer) {
 735                 smb_panic("failed to create UCS2 buffer");
 736         }
 737         if (!strlower_w(buffer) && (dest == src)) {
 738                 SAFE_FREE(buffer);
 739                 return srclen;
 740         }
 741         size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
 742         SAFE_FREE(buffer);
 743         return size;
 744 }
 745
 746 /**
 747  strdup() a unix string to lower case.
 748 **/
 749
 750 char *strdup_lower(const char *s)
 751 {
 752         size_t size;
 753         smb_ucs2_t *buffer = NULL;
 754         char *out_buffer;
 755
 756         size = push_ucs2_allocate(&buffer, s);
 757         if (size == -1 || !buffer) {
 758                 return NULL;
 759         }
 760
 761         strlower_w(buffer);
 762
 763         size = pull_ucs2_allocate(&out_buffer, buffer);
 764         SAFE_FREE(buffer);
 765
 766         if (size == (size_t)-1) {
 767                 return NULL;
 768         }
 769
 770         return out_buffer;
 771 }
 772
 773 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 774 {
 775         if (flags & (STR_NOALIGN|STR_ASCII))
 776                 return 0;
 777         return PTR_DIFF(p, base_ptr) & 1;
 778 }
 779
 780
 781 /**
 782  * Copy a string from a char* unix src to a dos codepage string destination.
 783  *
 784  * @return the number of bytes occupied by the string in the destination.
 785  *
 786  * @param flags can include
 787  * <dl>
 788  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 789  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 790  * </dl>
 791  *
 792  * @param dest_len the maximum length in bytes allowed in the
 793  * destination.  If @p dest_len is -1 then no maximum is used.
 794  **/
 795 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 796 {
 797         size_t src_len = strlen(src);
 798         pstring tmpbuf;
 799
 800         /* treat a pstring as "unlimited" length */
 801         if (dest_len == (size_t)-1)
 802                 dest_len = sizeof(pstring);
 803
 804         if (flags & STR_UPPER) {
 805                 pstrcpy(tmpbuf, src);
 806                 strupper_m(tmpbuf);
 807                 src = tmpbuf;
 808         }
 809
 810         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 811                 src_len++;
 812
 813         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
 814 }
 815
 816 size_t push_ascii_fstring(void *dest, const char *src)
 817 {
 818         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
 819 }
 820
 821 size_t push_ascii_pstring(void *dest, const char *src)
 822 {
 823         return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
 824 }
 825
 826 /********************************************************************
 827  Push an nstring - ensure null terminated. Written by
 828  moriyama@miraclelinux.com (MORIYAMA Masayuki).
 829 ********************************************************************/
 830
 831 size_t push_ascii_nstring(void *dest, const char *src)
 832 {
 833         size_t i, buffer_len, dest_len;
 834         smb_ucs2_t *buffer;
 835
 836         conv_silent = True;
 837         buffer_len = push_ucs2_allocate(&buffer, src);
 838         if (buffer_len == (size_t)-1) {
 839                 smb_panic("failed to create UCS2 buffer");
 840         }
 841
 842         dest_len = 0;
 843         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
 844                 unsigned char mb[10];
 845                 /* Convert one smb_ucs2_t character at a time. */
 846                 size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
 847                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
 848                         memcpy((char *)dest + dest_len, mb, mb_len);
 849                         dest_len += mb_len;
 850                 } else {
 851                         errno = E2BIG;
 852                         break;
 853                 }
 854         }
 855         ((char *)dest)[dest_len] = '\0';
 856
 857         SAFE_FREE(buffer);
 858         conv_silent = False;
 859         return dest_len;
 860 }
 861
 862 /**
 863  * Copy a string from a dos codepage source to a unix char* destination.
 864  *
 865  * The resulting string in "dest" is always null terminated.
 866  *
 867  * @param flags can have:
 868  * <dl>
 869  * <dt>STR_TERMINATE</dt>
 870  * <dd>STR_TERMINATE means the string in @p src
 871  * is null terminated, and src_len is ignored.</dd>
 872  * </dl>
 873  *
 874  * @param src_len is the length of the source area in bytes.
 875  * @returns the number of bytes occupied by the string in @p src.
 876  **/
 877 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 878 {
 879         size_t ret;
 880
 881         if (dest_len == (size_t)-1)
 882                 dest_len = sizeof(pstring);
 883
 884         if (flags & STR_TERMINATE) {
 885                 if (src_len == (size_t)-1) {
 886                         src_len = strlen(src) + 1;
 887                 } else {
 888                         size_t len = strnlen(src, src_len);
 889                         if (len < src_len)
 890                                 len++;
 891                         src_len = len;
 892                 }
 893         }
 894
 895         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
 896         if (ret == (size_t)-1) {
 897                 dest_len = 0;
 898         }
 899
 900         if (dest_len)
 901                 dest[MIN(ret, dest_len-1)] = 0;
 902         else
 903                 dest[0] = 0;
 904
 905         return src_len;
 906 }
 907
 908 size_t pull_ascii_pstring(char *dest, const void *src)
 909 {
 910         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
 911 }
 912
 913 size_t pull_ascii_fstring(char *dest, const void *src)
 914 {
 915         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
 916 }
 917
 918 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
 919
 920 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
 921 {
 922         return pull_ascii(dest, src, dest_len, sizeof(nstring), STR_TERMINATE);
 923 }
 924
 925 /**
 926  * Copy a string from a char* src to a unicode destination.
 927  *
 928  * @returns the number of bytes occupied by the string in the destination.
 929  *
 930  * @param flags can have:
 931  *
 932  * <dl>
 933  * <dt>STR_TERMINATE <dd>means include the null termination.
 934  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 935  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 936  * </dl>
 937  *
 938  * @param dest_len is the maximum length allowed in the
 939  * destination. If dest_len is -1 then no maxiumum is used.
 940  **/
 941
 942 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
 943 {
 944         size_t len=0;
 945         size_t src_len;
 946         size_t ret;
 947
 948         /* treat a pstring as "unlimited" length */
 949         if (dest_len == (size_t)-1)
 950                 dest_len = sizeof(pstring);
 951
 952         if (flags & STR_TERMINATE)
 953                 src_len = (size_t)-1;
 954         else
 955                 src_len = strlen(src);
 956
 957         if (ucs2_align(base_ptr, dest, flags)) {
 958                 *(char *)dest = 0;
 959                 dest = (void *)((char *)dest + 1);
 960                 if (dest_len)
 961                         dest_len--;
 962                 len++;
 963         }
 964
 965         /* ucs2 is always a multiple of 2 bytes */
 966         dest_len &= ~1;
 967
 968         ret =  convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
 969         if (ret == (size_t)-1) {
 970                 return 0;
 971         }
 972
 973         len += ret;
 974
 975         if (flags & STR_UPPER) {
 976                 smb_ucs2_t *dest_ucs2 = dest;
 977                 size_t i;
 978                 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
 979                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
 980                         if (v != dest_ucs2[i]) {
 981                                 dest_ucs2[i] = v;
 982                         }
 983                 }
 984         }
 985
 986         return len;
 987 }
 988
 989
 990 /**
 991  * Copy a string from a unix char* src to a UCS2 destination,
 992  * allocating a buffer using talloc().
 993  *
 994  * @param dest always set at least to NULL
 995  *
 996  * @returns The number of bytes occupied by the string in the destination
 997  *         or -1 in case of error.
 998  **/
 999 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1000 {
1001         size_t src_len = strlen(src)+1;
1002
1003         *dest = NULL;
1004         return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1005 }
1006
1007
1008 /**
1009  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1010  *
1011  * @param dest always set at least to NULL
1012  *
1013  * @returns The number of bytes occupied by the string in the destination
1014  *         or -1 in case of error.
1015  **/
1016
1017 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1018 {
1019         size_t src_len = strlen(src)+1;
1020
1021         *dest = NULL;
1022         return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1023 }
1024
1025 /**
1026  Copy a string from a char* src to a UTF-8 destination.
1027  Return the number of bytes occupied by the string in the destination
1028  Flags can have:
1029   STR_TERMINATE means include the null termination
1030   STR_UPPER     means uppercase in the destination
1031  dest_len is the maximum length allowed in the destination. If dest_len
1032  is -1 then no maxiumum is used.
1033 **/
1034
1035 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1036 {
1037         size_t src_len = strlen(src);
1038         pstring tmpbuf;
1039
1040         /* treat a pstring as "unlimited" length */
1041         if (dest_len == (size_t)-1)
1042                 dest_len = sizeof(pstring);
1043
1044         if (flags & STR_UPPER) {
1045                 pstrcpy(tmpbuf, src);
1046                 strupper_m(tmpbuf);
1047                 src = tmpbuf;
1048         }
1049
1050         if (flags & STR_TERMINATE)
1051                 src_len++;
1052
1053         return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1054 }
1055
1056 size_t push_utf8_fstring(void *dest, const char *src)
1057 {
1058         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1059 }
1060
1061 /**
1062  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1063  *
1064  * @param dest always set at least to NULL
1065  *
1066  * @returns The number of bytes occupied by the string in the destination
1067  **/
1068
1069 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1070 {
1071         size_t src_len = strlen(src)+1;
1072
1073         *dest = NULL;
1074         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1075 }
1076
1077 /**
1078  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1079  *
1080  * @param dest always set at least to NULL
1081  *
1082  * @returns The number of bytes occupied by the string in the destination
1083  **/
1084
1085 size_t push_utf8_allocate(char **dest, const char *src)
1086 {
1087         size_t src_len = strlen(src)+1;
1088
1089         *dest = NULL;
1090         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1091 }
1092
1093 /**
1094  Copy a string from a ucs2 source to a unix char* destination.
1095  Flags can have:
1096   STR_TERMINATE means the string in src is null terminated.
1097   STR_NOALIGN   means don't try to align.
1098  if STR_TERMINATE is set then src_len is ignored if it is -1.
1099  src_len is the length of the source area in bytes
1100  Return the number of bytes occupied by the string in src.
1101  The resulting string in "dest" is always null terminated.
1102 **/
1103
1104 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1105 {
1106         size_t ret;
1107
1108         if (dest_len == (size_t)-1)
1109                 dest_len = sizeof(pstring);
1110
1111         if (ucs2_align(base_ptr, src, flags)) {
1112                 src = (const void *)((const char *)src + 1);
1113                 if (src_len != (size_t)-1)
1114                         src_len--;
1115         }
1116
1117         if (flags & STR_TERMINATE) {
1118                 /* src_len -1 is the default for null terminated strings. */
1119                 if (src_len != (size_t)-1) {
1120                         size_t len = strnlen_w(src, src_len/2);
1121                         if (len < src_len/2)
1122                                 len++;
1123                         src_len = len*2;
1124                 }
1125         }
1126
1127         /* ucs2 is always a multiple of 2 bytes */
1128         if (src_len != (size_t)-1)
1129                 src_len &= ~1;
1130
1131         ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1132         if (ret == (size_t)-1) {
1133                 return 0;
1134         }
1135
1136         if (src_len == (size_t)-1)
1137                 src_len = ret*2;
1138
1139         if (dest_len)
1140                 dest[MIN(ret, dest_len-1)] = 0;
1141         else
1142                 dest[0] = 0;
1143
1144         return src_len;
1145 }
1146
1147 size_t pull_ucs2_pstring(char *dest, const void *src)
1148 {
1149         return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1150 }
1151
1152 size_t pull_ucs2_fstring(char *dest, const void *src)
1153 {
1154         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1155 }
1156
1157 /**
1158  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1159  *
1160  * @param dest always set at least to NULL
1161  *
1162  * @returns The number of bytes occupied by the string in the destination
1163  **/
1164
1165 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1166 {
1167         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1168         *dest = NULL;
1169         return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1170 }
1171
1172 /**
1173  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1174  *
1175  * @param dest always set at least to NULL
1176  *
1177  * @returns The number of bytes occupied by the string in the destination
1178  **/
1179
1180 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1181 {
1182         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1183         *dest = NULL;
1184         return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1185 }
1186
1187 /**
1188  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1189  *
1190  * @param dest always set at least to NULL
1191  *
1192  * @returns The number of bytes occupied by the string in the destination
1193  **/
1194
1195 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1196 {
1197         size_t src_len = strlen(src)+1;
1198         *dest = NULL;
1199         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1200 }
1201
1202 /**
1203  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1204  *
1205  * @param dest always set at least to NULL
1206  *
1207  * @returns The number of bytes occupied by the string in the destination
1208  **/
1209
1210 size_t pull_utf8_allocate(char **dest, const char *src)
1211 {
1212         size_t src_len = strlen(src)+1;
1213         *dest = NULL;
1214         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1215 }
1216
1217 /**
1218  Copy a string from a char* src to a unicode or ascii
1219  dos codepage destination choosing unicode or ascii based on the
1220  flags in the SMB buffer starting at base_ptr.
1221  Return the number of bytes occupied by the string in the destination.
1222  flags can have:
1223   STR_TERMINATE means include the null termination.
1224   STR_UPPER     means uppercase in the destination.
1225   STR_ASCII     use ascii even with unicode packet.
1226   STR_NOALIGN   means don't do alignment.
1227  dest_len is the maximum length allowed in the destination. If dest_len
1228  is -1 then no maxiumum is used.
1229 **/
1230
1231 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1232 {
1233 #ifdef DEVELOPER
1234         /* We really need to zero fill here, not clobber
1235          * region, as we want to ensure that valgrind thinks
1236          * all of the outgoing buffer has been written to
1237          * so a send() or write() won't trap an error.
1238          * JRA.
1239          */
1240 #if 0
1241         if (dest_len != (size_t)-1)
1242                 clobber_region(function, line, dest, dest_len);
1243 #else
1244         if (dest_len != (size_t)-1)
1245                 memset(dest, '\0', dest_len);
1246 #endif
1247 #endif
1248
1249         if (!(flags & STR_ASCII) && \
1250             ((flags & STR_UNICODE || \
1251               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1252                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1253         }
1254         return push_ascii(dest, src, dest_len, flags);
1255 }
1256
1257
1258 /**
1259  Copy a string from a unicode or ascii source (depending on
1260  the packet flags) to a char* destination.
1261  Flags can have:
1262   STR_TERMINATE means the string in src is null terminated.
1263   STR_UNICODE   means to force as unicode.
1264   STR_ASCII     use ascii even with unicode packet.
1265   STR_NOALIGN   means don't do alignment.
1266  if STR_TERMINATE is set then src_len is ignored is it is -1
1267  src_len is the length of the source area in bytes.
1268  Return the number of bytes occupied by the string in src.
1269  The resulting string in "dest" is always null terminated.
1270 **/
1271
1272 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1273 {
1274 #ifdef DEVELOPER
1275         if (dest_len != (size_t)-1)
1276                 clobber_region(function, line, dest, dest_len);
1277 #endif
1278
1279         if (!(flags & STR_ASCII) && \
1280             ((flags & STR_UNICODE || \
1281               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1282                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1283         }
1284         return pull_ascii(dest, src, dest_len, src_len, flags);
1285 }
1286
1287 size_t align_string(const void *base_ptr, const char *p, int flags)
1288 {
1289         if (!(flags & STR_ASCII) && \
1290             ((flags & STR_UNICODE || \
1291               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1292                 return ucs2_align(base_ptr, p, flags);
1293         }
1294         return 0;
1295 }