lib/util/charset/util_unistr.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4    Copyright (C) Andrew Tridgell 1992-2001
   5    Copyright (C) Simo Sorce 2001
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "includes.h"
  22 #include "system/locale.h"
  23
  24 /**
  25  Case insensitive string compararison
  26 **/
  27 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
  28 {
  29         codepoint_t c1=0, c2=0;
  30         size_t size1, size2;
  31         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
  32
  33         /* handle null ptr comparisons to simplify the use in qsort */
  34         if (s1 == s2) return 0;
  35         if (s1 == NULL) return -1;
  36         if (s2 == NULL) return 1;
  37
  38         while (*s1 && *s2) {
  39                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
  40                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
  41
  42                 s1 += size1;
  43                 s2 += size2;
  44
  45                 if (c1 == c2) {
  46                         continue;
  47                 }
  48
  49                 if (c1 == INVALID_CODEPOINT ||
  50                     c2 == INVALID_CODEPOINT) {
  51                         /* what else can we do?? */
  52                         return strcasecmp(s1, s2);
  53                 }
  54
  55                 if (toupper_m(c1) != toupper_m(c2)) {
  56                         return c1 - c2;
  57                 }
  58         }
  59
  60         return *s1 - *s2;
  61 }
  62
  63 /**
  64  Case insensitive string compararison, length limited
  65 **/
  66 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
  67 {
  68         codepoint_t c1=0, c2=0;
  69         size_t size1, size2;
  70         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
  71
  72         /* handle null ptr comparisons to simplify the use in qsort */
  73         if (s1 == s2) return 0;
  74         if (s1 == NULL) return -1;
  75         if (s2 == NULL) return 1;
  76
  77         while (*s1 && *s2 && n) {
  78                 n--;
  79
  80                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
  81                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
  82
  83                 s1 += size1;
  84                 s2 += size2;
  85
  86                 if (c1 == c2) {
  87                         continue;
  88                 }
  89
  90                 if (c1 == INVALID_CODEPOINT ||
  91                     c2 == INVALID_CODEPOINT) {
  92                         /* what else can we do?? */
  93                         return strcasecmp(s1, s2);
  94                 }
  95
  96                 if (toupper_m(c1) != toupper_m(c2)) {
  97                         return c1 - c2;
  98                 }
  99         }
 100
 101         if (n == 0) {
 102                 return 0;
 103         }
 104
 105         return *s1 - *s2;
 106 }
 107
 108 /**
 109  * Compare 2 strings.
 110  *
 111  * @note The comparison is case-insensitive.
 112  **/
 113 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
 114 {
 115         return strcasecmp_m(s1,s2) == 0;
 116 }
 117
 118 /**
 119  Compare 2 strings (case sensitive).
 120 **/
 121 _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
 122 {
 123         if (s1 == s2)
 124                 return true;
 125         if (!s1 || !s2)
 126                 return false;
 127
 128         return strcmp(s1,s2) == 0;
 129 }
 130
 131
 132 /**
 133  String replace.
 134  NOTE: oldc and newc must be 7 bit characters
 135 **/
 136 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
 137 {
 138         struct smb_iconv_convenience *ic = get_iconv_convenience();
 139         while (s && *s) {
 140                 size_t size;
 141                 codepoint_t c = next_codepoint_convenience(ic, s, &size);
 142                 if (c == oldc) {
 143                         *s = newc;
 144                 }
 145                 s += size;
 146         }
 147 }
 148
 149 /**
 150  Paranoid strcpy into a buffer of given length (includes terminating
 151  zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
 152  and replaces with '_'. Deliberately does *NOT* check for multibyte
 153  characters. Don't change it !
 154 **/
 155
 156 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
 157 {
 158         size_t len, i;
 159
 160         if (maxlength == 0) {
 161                 /* can't fit any bytes at all! */
 162                 return NULL;
 163         }
 164
 165         if (!dest) {
 166                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
 167                 return NULL;
 168         }
 169
 170         if (!src) {
 171                 *dest = 0;
 172                 return dest;
 173         }
 174
 175         len = strlen(src);
 176         if (len >= maxlength)
 177                 len = maxlength - 1;
 178
 179         if (!other_safe_chars)
 180                 other_safe_chars = "";
 181
 182         for(i = 0; i < len; i++) {
 183                 int val = (src[i] & 0xff);
 184                 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
 185                         dest[i] = src[i];
 186                 else
 187                         dest[i] = '_';
 188         }
 189
 190         dest[i] = '\0';
 191
 192         return dest;
 193 }
 194
 195 /**
 196  * Calculate the number of units (8 or 16-bit, depending on the
 197  * destination charset), that would be needed to convert the input
 198  * string which is expected to be in in src_charset encoding to the
 199  * destination charset (which should be a unicode charset).
 200  */
 201 _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
 202 {
 203         size_t count = 0;
 204         struct smb_iconv_convenience *ic = get_iconv_convenience();
 205
 206         if (!s) {
 207                 return 0;
 208         }
 209
 210         while (*s && !(((uint8_t)*s) & 0x80)) {
 211                 s++;
 212                 count++;
 213         }
 214
 215         if (!*s) {
 216                 return count;
 217         }
 218
 219         while (*s) {
 220                 size_t c_size;
 221                 codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
 222                 s += c_size;
 223
 224                 switch (dst_charset) {
 225                 case CH_UTF16LE:
 226                 case CH_UTF16BE:
 227                 case CH_UTF16MUNGED:
 228                         if (c < 0x10000) {
 229                                 count += 1;
 230                         } else {
 231                                 count += 2;
 232                         }
 233                         break;
 234                 case CH_UTF8:
 235                         /*
 236                          * this only checks ranges, and does not
 237                          * check for invalid codepoints
 238                          */
 239                         if (c < 0x80) {
 240                                 count += 1;
 241                         } else if (c < 0x800) {
 242                                 count += 2;
 243                         } else if (c < 0x1000) {
 244                                 count += 3;
 245                         } else {
 246                                 count += 4;
 247                         }
 248                         break;
 249                 default:
 250                         /*
 251                          * non-unicode encoding:
 252                          * assume that each codepoint fits into
 253                          * one unit in the destination encoding.
 254                          */
 255                         count += 1;
 256                 }
 257         }
 258
 259         return count;
 260 }
 261
 262 _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
 263                                   const charset_t dst_charset)
 264 {
 265         if (!s) {
 266                 return 0;
 267         }
 268         return strlen_m_ext(s, src_charset, dst_charset) + 1;
 269 }
 270
 271 /**
 272  * Calculate the number of 16-bit units that would be needed to convert
 273  * the input string which is expected to be in CH_UNIX encoding to UTF16.
 274  *
 275  * This will be the same as the number of bytes in a string for single
 276  * byte strings, but will be different for multibyte.
 277  */
 278 _PUBLIC_ size_t strlen_m(const char *s)
 279 {
 280         return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
 281 }
 282
 283 /**
 284    Work out the number of multibyte chars in a string, including the NULL
 285    terminator.
 286 **/
 287 _PUBLIC_ size_t strlen_m_term(const char *s)
 288 {
 289         if (!s) {
 290                 return 0;
 291         }
 292
 293         return strlen_m(s) + 1;
 294 }
 295
 296 /*
 297  * Weird helper routine for the winreg pipe: If nothing is around, return 0,
 298  * if a string is there, include the terminator.
 299  */
 300
 301 _PUBLIC_ size_t strlen_m_term_null(const char *s)
 302 {
 303         size_t len;
 304         if (!s) {
 305                 return 0;
 306         }
 307         len = strlen_m(s);
 308         if (len == 0) {
 309                 return 0;
 310         }
 311
 312         return len+1;
 313 }
 314
 315 /**
 316  Strchr and strrchr_m are a bit complex on general multi-byte strings.
 317 **/
 318 _PUBLIC_ char *strchr_m(const char *s, char c)
 319 {
 320         struct smb_iconv_convenience *ic = get_iconv_convenience();
 321         if (s == NULL) {
 322                 return NULL;
 323         }
 324         /* characters below 0x3F are guaranteed to not appear in
 325            non-initial position in multi-byte charsets */
 326         if ((c & 0xC0) == 0) {
 327                 return strchr(s, c);
 328         }
 329
 330         while (*s) {
 331                 size_t size;
 332                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 333                 if (c2 == c) {
 334                         return discard_const_p(char, s);
 335                 }
 336                 s += size;
 337         }
 338
 339         return NULL;
 340 }
 341
 342 /**
 343  * Multibyte-character version of strrchr
 344  */
 345 _PUBLIC_ char *strrchr_m(const char *s, char c)
 346 {
 347         struct smb_iconv_convenience *ic = get_iconv_convenience();
 348         char *ret = NULL;
 349
 350         if (s == NULL) {
 351                 return NULL;
 352         }
 353
 354         /* characters below 0x3F are guaranteed to not appear in
 355            non-initial position in multi-byte charsets */
 356         if ((c & 0xC0) == 0) {
 357                 return strrchr(s, c);
 358         }
 359
 360         while (*s) {
 361                 size_t size;
 362                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 363                 if (c2 == c) {
 364                         ret = discard_const_p(char, s);
 365                 }
 366                 s += size;
 367         }
 368
 369         return ret;
 370 }
 371
 372 /**
 373   return True if any (multi-byte) character is lower case
 374 */
 375 _PUBLIC_ bool strhaslower(const char *string)
 376 {
 377         struct smb_iconv_convenience *ic = get_iconv_convenience();
 378         while (*string) {
 379                 size_t c_size;
 380                 codepoint_t s;
 381                 codepoint_t t;
 382
 383                 s = next_codepoint_convenience(ic, string, &c_size);
 384                 string += c_size;
 385
 386                 t = toupper_m(s);
 387
 388                 if (s != t) {
 389                         return true; /* that means it has lower case chars */
 390                 }
 391         }
 392
 393         return false;
 394 }
 395
 396 /**
 397   return True if any (multi-byte) character is upper case
 398 */
 399 _PUBLIC_ bool strhasupper(const char *string)
 400 {
 401         struct smb_iconv_convenience *ic = get_iconv_convenience();
 402         while (*string) {
 403                 size_t c_size;
 404                 codepoint_t s;
 405                 codepoint_t t;
 406
 407                 s = next_codepoint_convenience(ic, string, &c_size);
 408                 string += c_size;
 409
 410                 t = tolower_m(s);
 411
 412                 if (s != t) {
 413                         return true; /* that means it has upper case chars */
 414                 }
 415         }
 416
 417         return false;
 418 }
 419
 420 /**
 421  Convert a string to lower case, allocated with talloc
 422 **/
 423 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 424 {
 425         size_t size=0;
 426         char *dest;
 427         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 428
 429         if(src == NULL) {
 430                 return NULL;
 431         }
 432
 433         /* this takes advantage of the fact that upper/lower can't
 434            change the length of a character by more than 1 byte */
 435         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
 436         if (dest == NULL) {
 437                 return NULL;
 438         }
 439
 440         while (*src) {
 441                 size_t c_size;
 442                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 443                 src += c_size;
 444
 445                 c = tolower_m(c);
 446
 447                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 448                 if (c_size == -1) {
 449                         talloc_free(dest);
 450                         return NULL;
 451                 }
 452                 size += c_size;
 453         }
 454
 455         dest[size] = 0;
 456
 457         /* trim it so talloc_append_string() works */
 458         dest = talloc_realloc(ctx, dest, char, size+1);
 459
 460         talloc_set_name_const(dest, dest);
 461
 462         return dest;
 463 }
 464
 465 /**
 466  Convert a string to UPPER case, allocated with talloc
 467  source length limited to n bytes
 468 **/
 469 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
 470 {
 471         size_t size=0;
 472         char *dest;
 473         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 474
 475         if (!src) {
 476                 return NULL;
 477         }
 478
 479         /* this takes advantage of the fact that upper/lower can't
 480            change the length of a character by more than 1 byte */
 481         dest = talloc_array(ctx, char, 2*(n+1));
 482         if (dest == NULL) {
 483                 return NULL;
 484         }
 485
 486         while (n-- && *src) {
 487                 size_t c_size;
 488                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 489                 src += c_size;
 490
 491                 c = toupper_m(c);
 492
 493                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 494                 if (c_size == -1) {
 495                         talloc_free(dest);
 496                         return NULL;
 497                 }
 498                 size += c_size;
 499         }
 500
 501         dest[size] = 0;
 502
 503         /* trim it so talloc_append_string() works */
 504         dest = talloc_realloc(ctx, dest, char, size+1);
 505
 506         talloc_set_name_const(dest, dest);
 507
 508         return dest;
 509 }
 510
 511 /**
 512  Convert a string to UPPER case, allocated with talloc
 513 **/
 514 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 515 {
 516         return strupper_talloc_n(ctx, src, src?strlen(src):0);
 517 }
 518
 519 /**
 520  talloc_strdup() a unix string to upper case.
 521 **/
 522 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
 523 {
 524         return strupper_talloc(ctx, src);
 525 }
 526
 527 /**
 528  Convert a string to lower case.
 529 **/
 530 _PUBLIC_ void strlower_m(char *s)
 531 {
 532         char *d;
 533         struct smb_iconv_convenience *iconv_convenience;
 534
 535         /* this is quite a common operation, so we want it to be
 536            fast. We optimise for the ascii case, knowing that all our
 537            supported multi-byte character sets are ascii-compatible
 538            (ie. they match for the first 128 chars) */
 539         while (*s && !(((uint8_t)*s) & 0x80)) {
 540                 *s = tolower((uint8_t)*s);
 541                 s++;
 542         }
 543
 544         if (!*s)
 545                 return;
 546
 547         iconv_convenience = get_iconv_convenience();
 548
 549         d = s;
 550
 551         while (*s) {
 552                 size_t c_size, c_size2;
 553                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 554                 c_size2 = push_codepoint_convenience(iconv_convenience, d, tolower_m(c));
 555                 if (c_size2 > c_size) {
 556                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 557                                  c, tolower_m(c), (int)c_size, (int)c_size2));
 558                         smb_panic("codepoint expansion in strlower_m\n");
 559                 }
 560                 s += c_size;
 561                 d += c_size2;
 562         }
 563         *d = 0;
 564 }
 565
 566 /**
 567  Convert a string to UPPER case.
 568 **/
 569 _PUBLIC_ void strupper_m(char *s)
 570 {
 571         char *d;
 572         struct smb_iconv_convenience *iconv_convenience;
 573
 574         /* this is quite a common operation, so we want it to be
 575            fast. We optimise for the ascii case, knowing that all our
 576            supported multi-byte character sets are ascii-compatible
 577            (ie. they match for the first 128 chars) */
 578         while (*s && !(((uint8_t)*s) & 0x80)) {
 579                 *s = toupper((uint8_t)*s);
 580                 s++;
 581         }
 582
 583         if (!*s)
 584                 return;
 585
 586         iconv_convenience = get_iconv_convenience();
 587
 588         d = s;
 589
 590         while (*s) {
 591                 size_t c_size, c_size2;
 592                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 593                 c_size2 = push_codepoint_convenience(iconv_convenience, d, toupper_m(c));
 594                 if (c_size2 > c_size) {
 595                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 596                                  c, toupper_m(c), (int)c_size, (int)c_size2));
 597                         smb_panic("codepoint expansion in strupper_m\n");
 598                 }
 599                 s += c_size;
 600                 d += c_size2;
 601         }
 602         *d = 0;
 603 }
 604
 605
 606 /**
 607  Find the number of 'c' chars in a string
 608 **/
 609 _PUBLIC_ size_t count_chars_m(const char *s, char c)
 610 {
 611         struct smb_iconv_convenience *ic = get_iconv_convenience();
 612         size_t count = 0;
 613
 614         while (*s) {
 615                 size_t size;
 616                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 617                 if (c2 == c) count++;
 618                 s += size;
 619         }
 620
 621         return count;
 622 }
 623
 624
 625 /**
 626  * Copy a string from a char* unix src to a dos codepage string destination.
 627  *
 628  * @return the number of bytes occupied by the string in the destination.
 629  *
 630  * @param flags can include
 631  * <dl>
 632  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 633  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 634  * </dl>
 635  *
 636  * @param dest_len the maximum length in bytes allowed in the
 637  * destination.  If @p dest_len is -1 then no maximum is used.
 638  **/
 639 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 640 {
 641         size_t src_len;
 642         ssize_t ret;
 643
 644         if (flags & STR_UPPER) {
 645                 char *tmpbuf = strupper_talloc(NULL, src);
 646                 if (tmpbuf == NULL) {
 647                         return -1;
 648                 }
 649                 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 650                 talloc_free(tmpbuf);
 651                 return ret;
 652         }
 653
 654         src_len = strlen(src);
 655
 656         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 657                 src_len++;
 658
 659         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, false);
 660 }
 661
 662 /**
 663  * Copy a string from a unix char* src to an ASCII destination,
 664  * allocating a buffer using talloc().
 665  *
 666  * @param dest always set at least to NULL
 667  *
 668  * @returns The number of bytes occupied by the string in the destination
 669  *         or -1 in case of error.
 670  **/
 671 _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 672 {
 673         size_t src_len = strlen(src)+1;
 674         *dest = NULL;
 675         return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
 676 }
 677
 678
 679 /**
 680  * Copy a string from a dos codepage source to a unix char* destination.
 681  *
 682  * The resulting string in "dest" is always null terminated.
 683  *
 684  * @param flags can have:
 685  * <dl>
 686  * <dt>STR_TERMINATE</dt>
 687  * <dd>STR_TERMINATE means the string in @p src
 688  * is null terminated, and src_len is ignored.</dd>
 689  * </dl>
 690  *
 691  * @param src_len is the length of the source area in bytes.
 692  * @returns the number of bytes occupied by the string in @p src.
 693  **/
 694 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 695 {
 696         size_t ret;
 697
 698         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 699                 if (src_len == (size_t)-1) {
 700                         src_len = strlen((const char *)src) + 1;
 701                 } else {
 702                         size_t len = strnlen((const char *)src, src_len);
 703                         if (len < src_len)
 704                                 len++;
 705                         src_len = len;
 706                 }
 707         }
 708
 709         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, false);
 710
 711         if (dest_len)
 712                 dest[MIN(ret, dest_len-1)] = 0;
 713
 714         return src_len;
 715 }
 716
 717 /**
 718  * Copy a string from a char* src to a unicode destination.
 719  *
 720  * @returns the number of bytes occupied by the string in the destination.
 721  *
 722  * @param flags can have:
 723  *
 724  * <dl>
 725  * <dt>STR_TERMINATE <dd>means include the null termination.
 726  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 727  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 728  * </dl>
 729  *
 730  * @param dest_len is the maximum length allowed in the
 731  * destination. If dest_len is -1 then no maxiumum is used.
 732  **/
 733 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
 734 {
 735         size_t len=0;
 736         size_t src_len = strlen(src);
 737         size_t ret;
 738
 739         if (flags & STR_UPPER) {
 740                 char *tmpbuf = strupper_talloc(NULL, src);
 741                 if (tmpbuf == NULL) {
 742                         return -1;
 743                 }
 744                 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 745                 talloc_free(tmpbuf);
 746                 return ret;
 747         }
 748
 749         if (flags & STR_TERMINATE)
 750                 src_len++;
 751
 752         if (ucs2_align(NULL, dest, flags)) {
 753                 *(char *)dest = 0;
 754                 dest = (void *)((char *)dest + 1);
 755                 if (dest_len) dest_len--;
 756                 len++;
 757         }
 758
 759         /* ucs2 is always a multiple of 2 bytes */
 760         dest_len &= ~1;
 761
 762         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, false);
 763         if (ret == (size_t)-1) {
 764                 return 0;
 765         }
 766
 767         len += ret;
 768
 769         return len;
 770 }
 771
 772
 773 /**
 774  * Copy a string from a unix char* src to a UCS2 destination,
 775  * allocating a buffer using talloc().
 776  *
 777  * @param dest always set at least to NULL
 778  *
 779  * @returns The number of bytes occupied by the string in the destination
 780  *         or -1 in case of error.
 781  **/
 782 _PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
 783 {
 784         size_t src_len = strlen(src)+1;
 785         *dest = NULL;
 786         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
 787 }
 788
 789
 790 /**
 791  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
 792  *
 793  * @param dest always set at least to NULL
 794  *
 795  * @returns The number of bytes occupied by the string in the destination
 796  **/
 797
 798 _PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 799 {
 800         size_t src_len = strlen(src)+1;
 801         *dest = NULL;
 802         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
 803 }
 804
 805 /**
 806  Copy a string from a ucs2 source to a unix char* destination.
 807  Flags can have:
 808   STR_TERMINATE means the string in src is null terminated.
 809   STR_NOALIGN   means don't try to align.
 810  if STR_TERMINATE is set then src_len is ignored if it is -1.
 811  src_len is the length of the source area in bytes
 812  Return the number of bytes occupied by the string in src.
 813  The resulting string in "dest" is always null terminated.
 814 **/
 815
 816 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 817 {
 818         size_t ret;
 819
 820         if (ucs2_align(NULL, src, flags)) {
 821                 src = (const void *)((const char *)src + 1);
 822                 if (src_len > 0)
 823                         src_len--;
 824         }
 825
 826         if (flags & STR_TERMINATE) {
 827                 if (src_len == (size_t)-1) {
 828                         src_len = utf16_len(src);
 829                 } else {
 830                         src_len = utf16_len_n(src, src_len);
 831                 }
 832         }
 833
 834         /* ucs2 is always a multiple of 2 bytes */
 835         if (src_len != (size_t)-1)
 836                 src_len &= ~1;
 837
 838         ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, false);
 839         if (dest_len)
 840                 dest[MIN(ret, dest_len-1)] = 0;
 841
 842         return src_len;
 843 }
 844
 845 /**
 846  * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
 847  *
 848  * @param dest always set at least to NULL
 849  *
 850  * @returns The number of bytes occupied by the string in the destination
 851  **/
 852
 853 _PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 854 {
 855         size_t src_len = strlen(src)+1;
 856         *dest = NULL;
 857         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 858 }
 859
 860 /**
 861  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
 862  *
 863  * @param dest always set at least to NULL
 864  *
 865  * @returns The number of bytes occupied by the string in the destination
 866  **/
 867
 868 _PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
 869 {
 870         size_t src_len = utf16_len(src);
 871         *dest = NULL;
 872         return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 873 }
 874
 875 /**
 876  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
 877  *
 878  * @param dest always set at least to NULL
 879  *
 880  * @returns The number of bytes occupied by the string in the destination
 881  **/
 882
 883 _PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 884 {
 885         size_t src_len = strlen(src)+1;
 886         *dest = NULL;
 887         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 888 }
 889
 890 /**
 891  Copy a string from a char* src to a unicode or ascii
 892  dos codepage destination choosing unicode or ascii based on the
 893  flags in the SMB buffer starting at base_ptr.
 894  Return the number of bytes occupied by the string in the destination.
 895  flags can have:
 896   STR_TERMINATE means include the null termination.
 897   STR_UPPER     means uppercase in the destination.
 898   STR_ASCII     use ascii even with unicode packet.
 899   STR_NOALIGN   means don't do alignment.
 900  dest_len is the maximum length allowed in the destination. If dest_len
 901  is -1 then no maxiumum is used.
 902 **/
 903
 904 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
 905 {
 906         if (flags & STR_ASCII) {
 907                 return push_ascii(dest, src, dest_len, flags);
 908         } else if (flags & STR_UNICODE) {
 909                 return push_ucs2(dest, src, dest_len, flags);
 910         } else {
 911                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
 912                 return -1;
 913         }
 914 }
 915
 916
 917 /**
 918  Copy a string from a unicode or ascii source (depending on
 919  the packet flags) to a char* destination.
 920  Flags can have:
 921   STR_TERMINATE means the string in src is null terminated.
 922   STR_UNICODE   means to force as unicode.
 923   STR_ASCII     use ascii even with unicode packet.
 924   STR_NOALIGN   means don't do alignment.
 925  if STR_TERMINATE is set then src_len is ignored is it is -1
 926  src_len is the length of the source area in bytes.
 927  Return the number of bytes occupied by the string in src.
 928  The resulting string in "dest" is always null terminated.
 929 **/
 930
 931 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 932 {
 933         if (flags & STR_ASCII) {
 934                 return pull_ascii(dest, src, dest_len, src_len, flags);
 935         } else if (flags & STR_UNICODE) {
 936                 return pull_ucs2(dest, src, dest_len, src_len, flags);
 937         } else {
 938                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
 939                 return -1;
 940         }
 941 }
 942
 943
 944 /**
 945  * Convert string from one encoding to another, making error checking etc
 946  *
 947  * @param src pointer to source string (multibyte or singlebyte)
 948  * @param srclen length of the source string in bytes
 949  * @param dest pointer to destination string (multibyte or singlebyte)
 950  * @param destlen maximal length allowed for string
 951  * @returns the number of bytes occupied in the destination
 952  **/
 953 _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
 954                                 void const *src, size_t srclen,
 955                                 void *dest, size_t destlen,
 956                                 bool allow_badcharcnv)
 957 {
 958         size_t ret;
 959         if (!convert_string_convenience(get_iconv_convenience(), from, to,
 960                                                                           src, srclen,
 961                                                                           dest, destlen, &ret,
 962                                                                           allow_badcharcnv))
 963                 return -1;
 964         return ret;
 965 }
 966
 967 /**
 968  * Convert between character sets, allocating a new buffer using talloc for the result.
 969  *
 970  * @param srclen length of source buffer.
 971  * @param dest always set at least to NULL
 972  * @param converted_size Size in bytes of the converted string
 973  * @note -1 is not accepted for srclen.
 974  *
 975  * @returns boolean indication whether the conversion succeeded
 976  **/
 977
 978 _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
 979                                        charset_t from, charset_t to,
 980                                        void const *src, size_t srclen,
 981                                        void *dest, size_t *converted_size,
 982                                            bool allow_badcharcnv)
 983 {
 984         return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
 985                                                                                          from, to, src, srclen, dest,
 986                                                                                          converted_size,
 987                                                                                          allow_badcharcnv);
 988 }
 989