lib/util/charset/util_unistr.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4    Copyright (C) Andrew Tridgell 1992-2001
   5    Copyright (C) Simo Sorce 2001
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "includes.h"
  22 #include "system/locale.h"
  23
  24 struct smb_iconv_convenience *global_iconv_convenience = NULL;
  25
  26 static inline struct smb_iconv_convenience *get_iconv_convenience(void)
  27 {
  28         if (global_iconv_convenience == NULL)
  29                 global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(),
  30                                                                         "ASCII", "UTF-8", true, NULL);
  31         return global_iconv_convenience;
  32 }
  33
  34 /**
  35  Case insensitive string compararison
  36 **/
  37 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
  38 {
  39         codepoint_t c1=0, c2=0;
  40         size_t size1, size2;
  41         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
  42
  43         /* handle null ptr comparisons to simplify the use in qsort */
  44         if (s1 == s2) return 0;
  45         if (s1 == NULL) return -1;
  46         if (s2 == NULL) return 1;
  47
  48         while (*s1 && *s2) {
  49                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
  50                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
  51
  52                 s1 += size1;
  53                 s2 += size2;
  54
  55                 if (c1 == c2) {
  56                         continue;
  57                 }
  58
  59                 if (c1 == INVALID_CODEPOINT ||
  60                     c2 == INVALID_CODEPOINT) {
  61                         /* what else can we do?? */
  62                         return strcasecmp(s1, s2);
  63                 }
  64
  65                 if (toupper_m(c1) != toupper_m(c2)) {
  66                         return c1 - c2;
  67                 }
  68         }
  69
  70         return *s1 - *s2;
  71 }
  72
  73 /**
  74  * Get the next token from a string, return False if none found.
  75  * Handles double-quotes.
  76  *
  77  * Based on a routine by GJC@VILLAGE.COM.
  78  * Extensively modified by Andrew.Tridgell@anu.edu.au
  79  **/
  80 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
  81 {
  82         const char *s;
  83         bool quoted;
  84         size_t len=1;
  85
  86         if (!ptr)
  87                 return false;
  88
  89         s = *ptr;
  90
  91         /* default to simple separators */
  92         if (!sep)
  93                 sep = " \t\n\r";
  94
  95         /* find the first non sep char */
  96         while (*s && strchr_m(sep,*s))
  97                 s++;
  98
  99         /* nothing left? */
 100         if (!*s)
 101                 return false;
 102
 103         /* copy over the token */
 104         for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
 105                 if (*s == '\"') {
 106                         quoted = !quoted;
 107                 } else {
 108                         len++;
 109                         *buff++ = *s;
 110                 }
 111         }
 112
 113         *ptr = (*s) ? s+1 : s;
 114         *buff = 0;
 115
 116         return true;
 117 }
 118
 119 /**
 120  Case insensitive string compararison, length limited
 121 **/
 122 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
 123 {
 124         codepoint_t c1=0, c2=0;
 125         size_t size1, size2;
 126         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 127
 128         /* handle null ptr comparisons to simplify the use in qsort */
 129         if (s1 == s2) return 0;
 130         if (s1 == NULL) return -1;
 131         if (s2 == NULL) return 1;
 132
 133         while (*s1 && *s2 && n) {
 134                 n--;
 135
 136                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
 137                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
 138
 139                 s1 += size1;
 140                 s2 += size2;
 141
 142                 if (c1 == c2) {
 143                         continue;
 144                 }
 145
 146                 if (c1 == INVALID_CODEPOINT ||
 147                     c2 == INVALID_CODEPOINT) {
 148                         /* what else can we do?? */
 149                         return strcasecmp(s1, s2);
 150                 }
 151
 152                 if (toupper_m(c1) != toupper_m(c2)) {
 153                         return c1 - c2;
 154                 }
 155         }
 156
 157         if (n == 0) {
 158                 return 0;
 159         }
 160
 161         return *s1 - *s2;
 162 }
 163
 164 /**
 165  * Compare 2 strings.
 166  *
 167  * @note The comparison is case-insensitive.
 168  **/
 169 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
 170 {
 171         return strcasecmp_m(s1,s2) == 0;
 172 }
 173
 174 /**
 175  Compare 2 strings (case sensitive).
 176 **/
 177 _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
 178 {
 179         if (s1 == s2)
 180                 return true;
 181         if (!s1 || !s2)
 182                 return false;
 183
 184         return strcmp(s1,s2) == 0;
 185 }
 186
 187
 188 /**
 189  String replace.
 190  NOTE: oldc and newc must be 7 bit characters
 191 **/
 192 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
 193 {
 194         struct smb_iconv_convenience *ic = get_iconv_convenience();
 195         while (s && *s) {
 196                 size_t size;
 197                 codepoint_t c = next_codepoint_convenience(ic, s, &size);
 198                 if (c == oldc) {
 199                         *s = newc;
 200                 }
 201                 s += size;
 202         }
 203 }
 204
 205 /**
 206  Paranoid strcpy into a buffer of given length (includes terminating
 207  zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
 208  and replaces with '_'. Deliberately does *NOT* check for multibyte
 209  characters. Don't change it !
 210 **/
 211
 212 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
 213 {
 214         size_t len, i;
 215
 216         if (maxlength == 0) {
 217                 /* can't fit any bytes at all! */
 218                 return NULL;
 219         }
 220
 221         if (!dest) {
 222                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
 223                 return NULL;
 224         }
 225
 226         if (!src) {
 227                 *dest = 0;
 228                 return dest;
 229         }
 230
 231         len = strlen(src);
 232         if (len >= maxlength)
 233                 len = maxlength - 1;
 234
 235         if (!other_safe_chars)
 236                 other_safe_chars = "";
 237
 238         for(i = 0; i < len; i++) {
 239                 int val = (src[i] & 0xff);
 240                 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
 241                         dest[i] = src[i];
 242                 else
 243                         dest[i] = '_';
 244         }
 245
 246         dest[i] = '\0';
 247
 248         return dest;
 249 }
 250
 251 /**
 252  Count the number of UCS2 characters in a string. Normally this will
 253  be the same as the number of bytes in a string for single byte strings,
 254  but will be different for multibyte.
 255 **/
 256 _PUBLIC_ size_t strlen_m(const char *s)
 257 {
 258         size_t count = 0;
 259         struct smb_iconv_convenience *ic = get_iconv_convenience();
 260
 261         if (!s) {
 262                 return 0;
 263         }
 264
 265         while (*s && !(((uint8_t)*s) & 0x80)) {
 266                 s++;
 267                 count++;
 268         }
 269
 270         if (!*s) {
 271                 return count;
 272         }
 273
 274         while (*s) {
 275                 size_t c_size;
 276                 codepoint_t c = next_codepoint_convenience(ic, s, &c_size);
 277                 if (c < 0x10000) {
 278                         count += 1;
 279                 } else {
 280                         count += 2;
 281                 }
 282                 s += c_size;
 283         }
 284
 285         return count;
 286 }
 287
 288 /**
 289    Work out the number of multibyte chars in a string, including the NULL
 290    terminator.
 291 **/
 292 _PUBLIC_ size_t strlen_m_term(const char *s)
 293 {
 294         if (!s) {
 295                 return 0;
 296         }
 297
 298         return strlen_m(s) + 1;
 299 }
 300
 301 /*
 302  * Weird helper routine for the winreg pipe: If nothing is around, return 0,
 303  * if a string is there, include the terminator.
 304  */
 305
 306 _PUBLIC_ size_t strlen_m_term_null(const char *s)
 307 {
 308         size_t len;
 309         if (!s) {
 310                 return 0;
 311         }
 312         len = strlen_m(s);
 313         if (len == 0) {
 314                 return 0;
 315         }
 316
 317         return len+1;
 318 }
 319
 320 /**
 321  Strchr and strrchr_m are a bit complex on general multi-byte strings.
 322 **/
 323 _PUBLIC_ char *strchr_m(const char *s, char c)
 324 {
 325         struct smb_iconv_convenience *ic = get_iconv_convenience();
 326         if (s == NULL) {
 327                 return NULL;
 328         }
 329         /* characters below 0x3F are guaranteed to not appear in
 330            non-initial position in multi-byte charsets */
 331         if ((c & 0xC0) == 0) {
 332                 return strchr(s, c);
 333         }
 334
 335         while (*s) {
 336                 size_t size;
 337                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 338                 if (c2 == c) {
 339                         return discard_const_p(char, s);
 340                 }
 341                 s += size;
 342         }
 343
 344         return NULL;
 345 }
 346
 347 /**
 348  * Multibyte-character version of strrchr
 349  */
 350 _PUBLIC_ char *strrchr_m(const char *s, char c)
 351 {
 352         struct smb_iconv_convenience *ic = get_iconv_convenience();
 353         char *ret = NULL;
 354
 355         if (s == NULL) {
 356                 return NULL;
 357         }
 358
 359         /* characters below 0x3F are guaranteed to not appear in
 360            non-initial position in multi-byte charsets */
 361         if ((c & 0xC0) == 0) {
 362                 return strrchr(s, c);
 363         }
 364
 365         while (*s) {
 366                 size_t size;
 367                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 368                 if (c2 == c) {
 369                         ret = discard_const_p(char, s);
 370                 }
 371                 s += size;
 372         }
 373
 374         return ret;
 375 }
 376
 377 /**
 378   return True if any (multi-byte) character is lower case
 379 */
 380 _PUBLIC_ bool strhaslower(const char *string)
 381 {
 382         struct smb_iconv_convenience *ic = get_iconv_convenience();
 383         while (*string) {
 384                 size_t c_size;
 385                 codepoint_t s;
 386                 codepoint_t t;
 387
 388                 s = next_codepoint_convenience(ic, string, &c_size);
 389                 string += c_size;
 390
 391                 t = toupper_m(s);
 392
 393                 if (s != t) {
 394                         return true; /* that means it has lower case chars */
 395                 }
 396         }
 397
 398         return false;
 399 }
 400
 401 /**
 402   return True if any (multi-byte) character is upper case
 403 */
 404 _PUBLIC_ bool strhasupper(const char *string)
 405 {
 406         struct smb_iconv_convenience *ic = get_iconv_convenience();
 407         while (*string) {
 408                 size_t c_size;
 409                 codepoint_t s;
 410                 codepoint_t t;
 411
 412                 s = next_codepoint_convenience(ic, string, &c_size);
 413                 string += c_size;
 414
 415                 t = tolower_m(s);
 416
 417                 if (s != t) {
 418                         return true; /* that means it has upper case chars */
 419                 }
 420         }
 421
 422         return false;
 423 }
 424
 425 /**
 426  Convert a string to lower case, allocated with talloc
 427 **/
 428 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 429 {
 430         size_t size=0;
 431         char *dest;
 432         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 433
 434         if(src == NULL) {
 435                 return NULL;
 436         }
 437
 438         /* this takes advantage of the fact that upper/lower can't
 439            change the length of a character by more than 1 byte */
 440         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
 441         if (dest == NULL) {
 442                 return NULL;
 443         }
 444
 445         while (*src) {
 446                 size_t c_size;
 447                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 448                 src += c_size;
 449
 450                 c = tolower_m(c);
 451
 452                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 453                 if (c_size == -1) {
 454                         talloc_free(dest);
 455                         return NULL;
 456                 }
 457                 size += c_size;
 458         }
 459
 460         dest[size] = 0;
 461
 462         /* trim it so talloc_append_string() works */
 463         dest = talloc_realloc(ctx, dest, char, size+1);
 464
 465         talloc_set_name_const(dest, dest);
 466
 467         return dest;
 468 }
 469
 470 /**
 471  Convert a string to UPPER case, allocated with talloc
 472  source length limited to n bytes
 473 **/
 474 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
 475 {
 476         size_t size=0;
 477         char *dest;
 478         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 479
 480         if (!src) {
 481                 return NULL;
 482         }
 483
 484         /* this takes advantage of the fact that upper/lower can't
 485            change the length of a character by more than 1 byte */
 486         dest = talloc_array(ctx, char, 2*(n+1));
 487         if (dest == NULL) {
 488                 return NULL;
 489         }
 490
 491         while (n-- && *src) {
 492                 size_t c_size;
 493                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 494                 src += c_size;
 495
 496                 c = toupper_m(c);
 497
 498                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 499                 if (c_size == -1) {
 500                         talloc_free(dest);
 501                         return NULL;
 502                 }
 503                 size += c_size;
 504         }
 505
 506         dest[size] = 0;
 507
 508         /* trim it so talloc_append_string() works */
 509         dest = talloc_realloc(ctx, dest, char, size+1);
 510
 511         talloc_set_name_const(dest, dest);
 512
 513         return dest;
 514 }
 515
 516 /**
 517  Convert a string to UPPER case, allocated with talloc
 518 **/
 519 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 520 {
 521         return strupper_talloc_n(ctx, src, src?strlen(src):0);
 522 }
 523
 524 /**
 525  talloc_strdup() a unix string to upper case.
 526 **/
 527 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
 528 {
 529         return strupper_talloc(ctx, src);
 530 }
 531
 532 /**
 533  Convert a string to lower case.
 534 **/
 535 _PUBLIC_ void strlower_m(char *s)
 536 {
 537         char *d;
 538         struct smb_iconv_convenience *iconv_convenience;
 539
 540         /* this is quite a common operation, so we want it to be
 541            fast. We optimise for the ascii case, knowing that all our
 542            supported multi-byte character sets are ascii-compatible
 543            (ie. they match for the first 128 chars) */
 544         while (*s && !(((uint8_t)*s) & 0x80)) {
 545                 *s = tolower((uint8_t)*s);
 546                 s++;
 547         }
 548
 549         if (!*s)
 550                 return;
 551
 552         iconv_convenience = get_iconv_convenience();
 553
 554         d = s;
 555
 556         while (*s) {
 557                 size_t c_size, c_size2;
 558                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 559                 c_size2 = push_codepoint_convenience(iconv_convenience, d, tolower_m(c));
 560                 if (c_size2 > c_size) {
 561                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 562                                  c, tolower_m(c), (int)c_size, (int)c_size2));
 563                         smb_panic("codepoint expansion in strlower_m\n");
 564                 }
 565                 s += c_size;
 566                 d += c_size2;
 567         }
 568         *d = 0;
 569 }
 570
 571 /**
 572  Convert a string to UPPER case.
 573 **/
 574 _PUBLIC_ void strupper_m(char *s)
 575 {
 576         char *d;
 577         struct smb_iconv_convenience *iconv_convenience;
 578
 579         /* this is quite a common operation, so we want it to be
 580            fast. We optimise for the ascii case, knowing that all our
 581            supported multi-byte character sets are ascii-compatible
 582            (ie. they match for the first 128 chars) */
 583         while (*s && !(((uint8_t)*s) & 0x80)) {
 584                 *s = toupper((uint8_t)*s);
 585                 s++;
 586         }
 587
 588         if (!*s)
 589                 return;
 590
 591         iconv_convenience = get_iconv_convenience();
 592
 593         d = s;
 594
 595         while (*s) {
 596                 size_t c_size, c_size2;
 597                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 598                 c_size2 = push_codepoint_convenience(iconv_convenience, d, toupper_m(c));
 599                 if (c_size2 > c_size) {
 600                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 601                                  c, toupper_m(c), (int)c_size, (int)c_size2));
 602                         smb_panic("codepoint expansion in strupper_m\n");
 603                 }
 604                 s += c_size;
 605                 d += c_size2;
 606         }
 607         *d = 0;
 608 }
 609
 610
 611 /**
 612  Find the number of 'c' chars in a string
 613 **/
 614 _PUBLIC_ size_t count_chars_m(const char *s, char c)
 615 {
 616         struct smb_iconv_convenience *ic = get_iconv_convenience();
 617         size_t count = 0;
 618
 619         while (*s) {
 620                 size_t size;
 621                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 622                 if (c2 == c) count++;
 623                 s += size;
 624         }
 625
 626         return count;
 627 }
 628
 629
 630 /**
 631  * Copy a string from a char* unix src to a dos codepage string destination.
 632  *
 633  * @return the number of bytes occupied by the string in the destination.
 634  *
 635  * @param flags can include
 636  * <dl>
 637  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 638  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 639  * </dl>
 640  *
 641  * @param dest_len the maximum length in bytes allowed in the
 642  * destination.  If @p dest_len is -1 then no maximum is used.
 643  **/
 644 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 645 {
 646         size_t src_len;
 647         ssize_t ret;
 648
 649         if (flags & STR_UPPER) {
 650                 char *tmpbuf = strupper_talloc(NULL, src);
 651                 if (tmpbuf == NULL) {
 652                         return -1;
 653                 }
 654                 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 655                 talloc_free(tmpbuf);
 656                 return ret;
 657         }
 658
 659         src_len = strlen(src);
 660
 661         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 662                 src_len++;
 663
 664         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, false);
 665 }
 666
 667 /**
 668  * Copy a string from a unix char* src to an ASCII destination,
 669  * allocating a buffer using talloc().
 670  *
 671  * @param dest always set at least to NULL
 672  *
 673  * @returns The number of bytes occupied by the string in the destination
 674  *         or -1 in case of error.
 675  **/
 676 _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 677 {
 678         size_t src_len = strlen(src)+1;
 679         *dest = NULL;
 680         return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
 681 }
 682
 683
 684 /**
 685  * Copy a string from a dos codepage source to a unix char* destination.
 686  *
 687  * The resulting string in "dest" is always null terminated.
 688  *
 689  * @param flags can have:
 690  * <dl>
 691  * <dt>STR_TERMINATE</dt>
 692  * <dd>STR_TERMINATE means the string in @p src
 693  * is null terminated, and src_len is ignored.</dd>
 694  * </dl>
 695  *
 696  * @param src_len is the length of the source area in bytes.
 697  * @returns the number of bytes occupied by the string in @p src.
 698  **/
 699 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 700 {
 701         size_t ret;
 702
 703         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 704                 if (src_len == (size_t)-1) {
 705                         src_len = strlen((const char *)src) + 1;
 706                 } else {
 707                         size_t len = strnlen((const char *)src, src_len);
 708                         if (len < src_len)
 709                                 len++;
 710                         src_len = len;
 711                 }
 712         }
 713
 714         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, false);
 715
 716         if (dest_len)
 717                 dest[MIN(ret, dest_len-1)] = 0;
 718
 719         return src_len;
 720 }
 721
 722 /**
 723  * Copy a string from a char* src to a unicode destination.
 724  *
 725  * @returns the number of bytes occupied by the string in the destination.
 726  *
 727  * @param flags can have:
 728  *
 729  * <dl>
 730  * <dt>STR_TERMINATE <dd>means include the null termination.
 731  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 732  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 733  * </dl>
 734  *
 735  * @param dest_len is the maximum length allowed in the
 736  * destination. If dest_len is -1 then no maxiumum is used.
 737  **/
 738 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
 739 {
 740         size_t len=0;
 741         size_t src_len = strlen(src);
 742         size_t ret;
 743
 744         if (flags & STR_UPPER) {
 745                 char *tmpbuf = strupper_talloc(NULL, src);
 746                 if (tmpbuf == NULL) {
 747                         return -1;
 748                 }
 749                 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 750                 talloc_free(tmpbuf);
 751                 return ret;
 752         }
 753
 754         if (flags & STR_TERMINATE)
 755                 src_len++;
 756
 757         if (ucs2_align(NULL, dest, flags)) {
 758                 *(char *)dest = 0;
 759                 dest = (void *)((char *)dest + 1);
 760                 if (dest_len) dest_len--;
 761                 len++;
 762         }
 763
 764         /* ucs2 is always a multiple of 2 bytes */
 765         dest_len &= ~1;
 766
 767         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, false);
 768         if (ret == (size_t)-1) {
 769                 return 0;
 770         }
 771
 772         len += ret;
 773
 774         return len;
 775 }
 776
 777
 778 /**
 779  * Copy a string from a unix char* src to a UCS2 destination,
 780  * allocating a buffer using talloc().
 781  *
 782  * @param dest always set at least to NULL
 783  *
 784  * @returns The number of bytes occupied by the string in the destination
 785  *         or -1 in case of error.
 786  **/
 787 _PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
 788 {
 789         size_t src_len = strlen(src)+1;
 790         *dest = NULL;
 791         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
 792 }
 793
 794
 795 /**
 796  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
 797  *
 798  * @param dest always set at least to NULL
 799  *
 800  * @returns The number of bytes occupied by the string in the destination
 801  **/
 802
 803 _PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 804 {
 805         size_t src_len = strlen(src)+1;
 806         *dest = NULL;
 807         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
 808 }
 809
 810 /**
 811  Copy a string from a ucs2 source to a unix char* destination.
 812  Flags can have:
 813   STR_TERMINATE means the string in src is null terminated.
 814   STR_NOALIGN   means don't try to align.
 815  if STR_TERMINATE is set then src_len is ignored if it is -1.
 816  src_len is the length of the source area in bytes
 817  Return the number of bytes occupied by the string in src.
 818  The resulting string in "dest" is always null terminated.
 819 **/
 820
 821 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 822 {
 823         size_t ret;
 824
 825         if (ucs2_align(NULL, src, flags)) {
 826                 src = (const void *)((const char *)src + 1);
 827                 if (src_len > 0)
 828                         src_len--;
 829         }
 830
 831         if (flags & STR_TERMINATE) {
 832                 if (src_len == (size_t)-1) {
 833                         src_len = utf16_len(src);
 834                 } else {
 835                         src_len = utf16_len_n(src, src_len);
 836                 }
 837         }
 838
 839         /* ucs2 is always a multiple of 2 bytes */
 840         if (src_len != (size_t)-1)
 841                 src_len &= ~1;
 842
 843         ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, false);
 844         if (dest_len)
 845                 dest[MIN(ret, dest_len-1)] = 0;
 846
 847         return src_len;
 848 }
 849
 850 /**
 851  * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
 852  *
 853  * @param dest always set at least to NULL
 854  *
 855  * @returns The number of bytes occupied by the string in the destination
 856  **/
 857
 858 _PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 859 {
 860         size_t src_len = strlen(src)+1;
 861         *dest = NULL;
 862         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 863 }
 864
 865 /**
 866  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
 867  *
 868  * @param dest always set at least to NULL
 869  *
 870  * @returns The number of bytes occupied by the string in the destination
 871  **/
 872
 873 _PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
 874 {
 875         size_t src_len = utf16_len(src);
 876         *dest = NULL;
 877         return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 878 }
 879
 880 /**
 881  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
 882  *
 883  * @param dest always set at least to NULL
 884  *
 885  * @returns The number of bytes occupied by the string in the destination
 886  **/
 887
 888 _PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 889 {
 890         size_t src_len = strlen(src)+1;
 891         *dest = NULL;
 892         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 893 }
 894
 895 /**
 896  Copy a string from a char* src to a unicode or ascii
 897  dos codepage destination choosing unicode or ascii based on the
 898  flags in the SMB buffer starting at base_ptr.
 899  Return the number of bytes occupied by the string in the destination.
 900  flags can have:
 901   STR_TERMINATE means include the null termination.
 902   STR_UPPER     means uppercase in the destination.
 903   STR_ASCII     use ascii even with unicode packet.
 904   STR_NOALIGN   means don't do alignment.
 905  dest_len is the maximum length allowed in the destination. If dest_len
 906  is -1 then no maxiumum is used.
 907 **/
 908
 909 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
 910 {
 911         if (flags & STR_ASCII) {
 912                 return push_ascii(dest, src, dest_len, flags);
 913         } else if (flags & STR_UNICODE) {
 914                 return push_ucs2(dest, src, dest_len, flags);
 915         } else {
 916                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
 917                 return -1;
 918         }
 919 }
 920
 921
 922 /**
 923  Copy a string from a unicode or ascii source (depending on
 924  the packet flags) to a char* destination.
 925  Flags can have:
 926   STR_TERMINATE means the string in src is null terminated.
 927   STR_UNICODE   means to force as unicode.
 928   STR_ASCII     use ascii even with unicode packet.
 929   STR_NOALIGN   means don't do alignment.
 930  if STR_TERMINATE is set then src_len is ignored is it is -1
 931  src_len is the length of the source area in bytes.
 932  Return the number of bytes occupied by the string in src.
 933  The resulting string in "dest" is always null terminated.
 934 **/
 935
 936 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 937 {
 938         if (flags & STR_ASCII) {
 939                 return pull_ascii(dest, src, dest_len, src_len, flags);
 940         } else if (flags & STR_UNICODE) {
 941                 return pull_ucs2(dest, src, dest_len, src_len, flags);
 942         } else {
 943                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
 944                 return -1;
 945         }
 946 }
 947
 948
 949 /**
 950  * Convert string from one encoding to another, making error checking etc
 951  *
 952  * @param src pointer to source string (multibyte or singlebyte)
 953  * @param srclen length of the source string in bytes
 954  * @param dest pointer to destination string (multibyte or singlebyte)
 955  * @param destlen maximal length allowed for string
 956  * @returns the number of bytes occupied in the destination
 957  **/
 958 _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
 959                                 void const *src, size_t srclen,
 960                                 void *dest, size_t destlen,
 961                                 bool allow_badcharcnv)
 962 {
 963         size_t ret;
 964         if (!convert_string_convenience(get_iconv_convenience(), from, to,
 965                                                                           src, srclen,
 966                                                                           dest, destlen, &ret,
 967                                                                           allow_badcharcnv))
 968                 return -1;
 969         return ret;
 970 }
 971
 972 /**
 973  * Convert between character sets, allocating a new buffer using talloc for the result.
 974  *
 975  * @param srclen length of source buffer.
 976  * @param dest always set at least to NULL
 977  * @param converted_size Size in bytes of the converted string
 978  * @note -1 is not accepted for srclen.
 979  *
 980  * @returns boolean indication whether the conversion succeeded
 981  **/
 982
 983 _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
 984                                        charset_t from, charset_t to,
 985                                        void const *src, size_t srclen,
 986                                        void *dest, size_t *converted_size,
 987                                            bool allow_badcharcnv)
 988 {
 989         return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
 990                                                                                          from, to, src, srclen, dest,
 991                                                                                          converted_size,
 992                                                                                          allow_badcharcnv);
 993 }
 994
 995
 996 _PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
 997 {
 998         return next_codepoint_convenience(get_iconv_convenience(), str, size);
 999 }
1000
1001 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1002 {
1003         return push_codepoint_convenience(get_iconv_convenience(), str, c);
1004 }