lib/util/charset/util_unistr.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4    Copyright (C) Andrew Tridgell 1992-2001
   5    Copyright (C) Simo Sorce 2001
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "includes.h"
  22 #include "system/locale.h"
  23
  24 struct smb_iconv_convenience *global_iconv_convenience = NULL;
  25
  26 static inline struct smb_iconv_convenience *get_iconv_convenience(void)
  27 {
  28         if (global_iconv_convenience == NULL)
  29                 global_iconv_convenience = smb_iconv_convenience_init(talloc_autofree_context(), "ASCII", "UTF-8", true);
  30         return global_iconv_convenience;
  31 }
  32
  33 /**
  34  Case insensitive string compararison
  35 **/
  36 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
  37 {
  38         codepoint_t c1=0, c2=0;
  39         size_t size1, size2;
  40         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
  41
  42         /* handle null ptr comparisons to simplify the use in qsort */
  43         if (s1 == s2) return 0;
  44         if (s1 == NULL) return -1;
  45         if (s2 == NULL) return 1;
  46
  47         while (*s1 && *s2) {
  48                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
  49                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
  50
  51                 s1 += size1;
  52                 s2 += size2;
  53
  54                 if (c1 == c2) {
  55                         continue;
  56                 }
  57
  58                 if (c1 == INVALID_CODEPOINT ||
  59                     c2 == INVALID_CODEPOINT) {
  60                         /* what else can we do?? */
  61                         return strcasecmp(s1, s2);
  62                 }
  63
  64                 if (toupper_m(c1) != toupper_m(c2)) {
  65                         return c1 - c2;
  66                 }
  67         }
  68
  69         return *s1 - *s2;
  70 }
  71
  72 /**
  73  * Get the next token from a string, return False if none found.
  74  * Handles double-quotes.
  75  *
  76  * Based on a routine by GJC@VILLAGE.COM.
  77  * Extensively modified by Andrew.Tridgell@anu.edu.au
  78  **/
  79 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
  80 {
  81         const char *s;
  82         bool quoted;
  83         size_t len=1;
  84
  85         if (!ptr)
  86                 return false;
  87
  88         s = *ptr;
  89
  90         /* default to simple separators */
  91         if (!sep)
  92                 sep = " \t\n\r";
  93
  94         /* find the first non sep char */
  95         while (*s && strchr_m(sep,*s))
  96                 s++;
  97
  98         /* nothing left? */
  99         if (!*s)
 100                 return false;
 101
 102         /* copy over the token */
 103         for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
 104                 if (*s == '\"') {
 105                         quoted = !quoted;
 106                 } else {
 107                         len++;
 108                         *buff++ = *s;
 109                 }
 110         }
 111
 112         *ptr = (*s) ? s+1 : s;
 113         *buff = 0;
 114
 115         return true;
 116 }
 117
 118 /**
 119  Case insensitive string compararison, length limited
 120 **/
 121 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
 122 {
 123         codepoint_t c1=0, c2=0;
 124         size_t size1, size2;
 125         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 126
 127         /* handle null ptr comparisons to simplify the use in qsort */
 128         if (s1 == s2) return 0;
 129         if (s1 == NULL) return -1;
 130         if (s2 == NULL) return 1;
 131
 132         while (*s1 && *s2 && n) {
 133                 n--;
 134
 135                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
 136                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
 137
 138                 s1 += size1;
 139                 s2 += size2;
 140
 141                 if (c1 == c2) {
 142                         continue;
 143                 }
 144
 145                 if (c1 == INVALID_CODEPOINT ||
 146                     c2 == INVALID_CODEPOINT) {
 147                         /* what else can we do?? */
 148                         return strcasecmp(s1, s2);
 149                 }
 150
 151                 if (toupper_m(c1) != toupper_m(c2)) {
 152                         return c1 - c2;
 153                 }
 154         }
 155
 156         if (n == 0) {
 157                 return 0;
 158         }
 159
 160         return *s1 - *s2;
 161 }
 162
 163 /**
 164  * Compare 2 strings.
 165  *
 166  * @note The comparison is case-insensitive.
 167  **/
 168 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
 169 {
 170         return strcasecmp_m(s1,s2) == 0;
 171 }
 172
 173 /**
 174  Compare 2 strings (case sensitive).
 175 **/
 176 _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
 177 {
 178         if (s1 == s2)
 179                 return true;
 180         if (!s1 || !s2)
 181                 return false;
 182
 183         return strcmp(s1,s2) == 0;
 184 }
 185
 186
 187 /**
 188  String replace.
 189  NOTE: oldc and newc must be 7 bit characters
 190 **/
 191 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
 192 {
 193         struct smb_iconv_convenience *ic = get_iconv_convenience();
 194         while (s && *s) {
 195                 size_t size;
 196                 codepoint_t c = next_codepoint_convenience(ic, s, &size);
 197                 if (c == oldc) {
 198                         *s = newc;
 199                 }
 200                 s += size;
 201         }
 202 }
 203
 204 /**
 205  Paranoid strcpy into a buffer of given length (includes terminating
 206  zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
 207  and replaces with '_'. Deliberately does *NOT* check for multibyte
 208  characters. Don't change it !
 209 **/
 210
 211 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
 212 {
 213         size_t len, i;
 214
 215         if (maxlength == 0) {
 216                 /* can't fit any bytes at all! */
 217                 return NULL;
 218         }
 219
 220         if (!dest) {
 221                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
 222                 return NULL;
 223         }
 224
 225         if (!src) {
 226                 *dest = 0;
 227                 return dest;
 228         }
 229
 230         len = strlen(src);
 231         if (len >= maxlength)
 232                 len = maxlength - 1;
 233
 234         if (!other_safe_chars)
 235                 other_safe_chars = "";
 236
 237         for(i = 0; i < len; i++) {
 238                 int val = (src[i] & 0xff);
 239                 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
 240                         dest[i] = src[i];
 241                 else
 242                         dest[i] = '_';
 243         }
 244
 245         dest[i] = '\0';
 246
 247         return dest;
 248 }
 249
 250 /**
 251  Count the number of UCS2 characters in a string. Normally this will
 252  be the same as the number of bytes in a string for single byte strings,
 253  but will be different for multibyte.
 254 **/
 255 _PUBLIC_ size_t strlen_m(const char *s)
 256 {
 257         size_t count = 0;
 258         struct smb_iconv_convenience *ic = get_iconv_convenience();
 259
 260         if (!s) {
 261                 return 0;
 262         }
 263
 264         while (*s && !(((uint8_t)*s) & 0x80)) {
 265                 s++;
 266                 count++;
 267         }
 268
 269         if (!*s) {
 270                 return count;
 271         }
 272
 273         while (*s) {
 274                 size_t c_size;
 275                 codepoint_t c = next_codepoint_convenience(ic, s, &c_size);
 276                 if (c < 0x10000) {
 277                         count += 1;
 278                 } else {
 279                         count += 2;
 280                 }
 281                 s += c_size;
 282         }
 283
 284         return count;
 285 }
 286
 287 /**
 288    Work out the number of multibyte chars in a string, including the NULL
 289    terminator.
 290 **/
 291 _PUBLIC_ size_t strlen_m_term(const char *s)
 292 {
 293         if (!s) {
 294                 return 0;
 295         }
 296
 297         return strlen_m(s) + 1;
 298 }
 299
 300 /*
 301  * Weird helper routine for the winreg pipe: If nothing is around, return 0,
 302  * if a string is there, include the terminator.
 303  */
 304
 305 _PUBLIC_ size_t strlen_m_term_null(const char *s)
 306 {
 307         size_t len;
 308         if (!s) {
 309                 return 0;
 310         }
 311         len = strlen_m(s);
 312         if (len == 0) {
 313                 return 0;
 314         }
 315
 316         return len+1;
 317 }
 318
 319 /**
 320  Strchr and strrchr_m are a bit complex on general multi-byte strings.
 321 **/
 322 _PUBLIC_ char *strchr_m(const char *s, char c)
 323 {
 324         struct smb_iconv_convenience *ic = get_iconv_convenience();
 325         if (s == NULL) {
 326                 return NULL;
 327         }
 328         /* characters below 0x3F are guaranteed to not appear in
 329            non-initial position in multi-byte charsets */
 330         if ((c & 0xC0) == 0) {
 331                 return strchr(s, c);
 332         }
 333
 334         while (*s) {
 335                 size_t size;
 336                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 337                 if (c2 == c) {
 338                         return discard_const_p(char, s);
 339                 }
 340                 s += size;
 341         }
 342
 343         return NULL;
 344 }
 345
 346 /**
 347  * Multibyte-character version of strrchr
 348  */
 349 _PUBLIC_ char *strrchr_m(const char *s, char c)
 350 {
 351         struct smb_iconv_convenience *ic = get_iconv_convenience();
 352         char *ret = NULL;
 353
 354         if (s == NULL) {
 355                 return NULL;
 356         }
 357
 358         /* characters below 0x3F are guaranteed to not appear in
 359            non-initial position in multi-byte charsets */
 360         if ((c & 0xC0) == 0) {
 361                 return strrchr(s, c);
 362         }
 363
 364         while (*s) {
 365                 size_t size;
 366                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 367                 if (c2 == c) {
 368                         ret = discard_const_p(char, s);
 369                 }
 370                 s += size;
 371         }
 372
 373         return ret;
 374 }
 375
 376 /**
 377   return True if any (multi-byte) character is lower case
 378 */
 379 _PUBLIC_ bool strhaslower(const char *string)
 380 {
 381         struct smb_iconv_convenience *ic = get_iconv_convenience();
 382         while (*string) {
 383                 size_t c_size;
 384                 codepoint_t s;
 385                 codepoint_t t;
 386
 387                 s = next_codepoint_convenience(ic, string, &c_size);
 388                 string += c_size;
 389
 390                 t = toupper_m(s);
 391
 392                 if (s != t) {
 393                         return true; /* that means it has lower case chars */
 394                 }
 395         }
 396
 397         return false;
 398 }
 399
 400 /**
 401   return True if any (multi-byte) character is upper case
 402 */
 403 _PUBLIC_ bool strhasupper(const char *string)
 404 {
 405         struct smb_iconv_convenience *ic = get_iconv_convenience();
 406         while (*string) {
 407                 size_t c_size;
 408                 codepoint_t s;
 409                 codepoint_t t;
 410
 411                 s = next_codepoint_convenience(ic, string, &c_size);
 412                 string += c_size;
 413
 414                 t = tolower_m(s);
 415
 416                 if (s != t) {
 417                         return true; /* that means it has upper case chars */
 418                 }
 419         }
 420
 421         return false;
 422 }
 423
 424 /**
 425  Convert a string to lower case, allocated with talloc
 426 **/
 427 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 428 {
 429         size_t size=0;
 430         char *dest;
 431         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 432
 433         /* this takes advantage of the fact that upper/lower can't
 434            change the length of a character by more than 1 byte */
 435         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
 436         if (dest == NULL) {
 437                 return NULL;
 438         }
 439
 440         while (*src) {
 441                 size_t c_size;
 442                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 443                 src += c_size;
 444
 445                 c = tolower_m(c);
 446
 447                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 448                 if (c_size == -1) {
 449                         talloc_free(dest);
 450                         return NULL;
 451                 }
 452                 size += c_size;
 453         }
 454
 455         dest[size] = 0;
 456
 457         /* trim it so talloc_append_string() works */
 458         dest = talloc_realloc(ctx, dest, char, size+1);
 459
 460         talloc_set_name_const(dest, dest);
 461
 462         return dest;
 463 }
 464
 465 /**
 466  Convert a string to UPPER case, allocated with talloc
 467  source length limited to n bytes
 468 **/
 469 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
 470 {
 471         size_t size=0;
 472         char *dest;
 473         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 474
 475         if (!src) {
 476                 return NULL;
 477         }
 478
 479         /* this takes advantage of the fact that upper/lower can't
 480            change the length of a character by more than 1 byte */
 481         dest = talloc_array(ctx, char, 2*(n+1));
 482         if (dest == NULL) {
 483                 return NULL;
 484         }
 485
 486         while (n-- && *src) {
 487                 size_t c_size;
 488                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 489                 src += c_size;
 490
 491                 c = toupper_m(c);
 492
 493                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 494                 if (c_size == -1) {
 495                         talloc_free(dest);
 496                         return NULL;
 497                 }
 498                 size += c_size;
 499         }
 500
 501         dest[size] = 0;
 502
 503         /* trim it so talloc_append_string() works */
 504         dest = talloc_realloc(ctx, dest, char, size+1);
 505
 506         talloc_set_name_const(dest, dest);
 507
 508         return dest;
 509 }
 510
 511 /**
 512  Convert a string to UPPER case, allocated with talloc
 513 **/
 514 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 515 {
 516         return strupper_talloc_n(ctx, src, src?strlen(src):0);
 517 }
 518
 519 /**
 520  talloc_strdup() a unix string to upper case.
 521 **/
 522 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
 523 {
 524         return strupper_talloc(ctx, src);
 525 }
 526
 527 /**
 528  Convert a string to lower case.
 529 **/
 530 _PUBLIC_ void strlower_m(char *s)
 531 {
 532         char *d;
 533         struct smb_iconv_convenience *iconv_convenience;
 534
 535         /* this is quite a common operation, so we want it to be
 536            fast. We optimise for the ascii case, knowing that all our
 537            supported multi-byte character sets are ascii-compatible
 538            (ie. they match for the first 128 chars) */
 539         while (*s && !(((uint8_t)*s) & 0x80)) {
 540                 *s = tolower((uint8_t)*s);
 541                 s++;
 542         }
 543
 544         if (!*s)
 545                 return;
 546
 547         iconv_convenience = get_iconv_convenience();
 548
 549         d = s;
 550
 551         while (*s) {
 552                 size_t c_size, c_size2;
 553                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 554                 c_size2 = push_codepoint_convenience(iconv_convenience, d, tolower_m(c));
 555                 if (c_size2 > c_size) {
 556                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 557                                  c, tolower_m(c), (int)c_size, (int)c_size2));
 558                         smb_panic("codepoint expansion in strlower_m\n");
 559                 }
 560                 s += c_size;
 561                 d += c_size2;
 562         }
 563         *d = 0;
 564 }
 565
 566 /**
 567  Convert a string to UPPER case.
 568 **/
 569 _PUBLIC_ void strupper_m(char *s)
 570 {
 571         char *d;
 572         struct smb_iconv_convenience *iconv_convenience;
 573
 574         /* this is quite a common operation, so we want it to be
 575            fast. We optimise for the ascii case, knowing that all our
 576            supported multi-byte character sets are ascii-compatible
 577            (ie. they match for the first 128 chars) */
 578         while (*s && !(((uint8_t)*s) & 0x80)) {
 579                 *s = toupper((uint8_t)*s);
 580                 s++;
 581         }
 582
 583         if (!*s)
 584                 return;
 585
 586         iconv_convenience = get_iconv_convenience();
 587
 588         d = s;
 589
 590         while (*s) {
 591                 size_t c_size, c_size2;
 592                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 593                 c_size2 = push_codepoint_convenience(iconv_convenience, d, toupper_m(c));
 594                 if (c_size2 > c_size) {
 595                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 596                                  c, toupper_m(c), (int)c_size, (int)c_size2));
 597                         smb_panic("codepoint expansion in strupper_m\n");
 598                 }
 599                 s += c_size;
 600                 d += c_size2;
 601         }
 602         *d = 0;
 603 }
 604
 605
 606 /**
 607  Find the number of 'c' chars in a string
 608 **/
 609 _PUBLIC_ size_t count_chars_m(const char *s, char c)
 610 {
 611         struct smb_iconv_convenience *ic = get_iconv_convenience();
 612         size_t count = 0;
 613
 614         while (*s) {
 615                 size_t size;
 616                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 617                 if (c2 == c) count++;
 618                 s += size;
 619         }
 620
 621         return count;
 622 }
 623
 624
 625 /**
 626  * Copy a string from a char* unix src to a dos codepage string destination.
 627  *
 628  * @return the number of bytes occupied by the string in the destination.
 629  *
 630  * @param flags can include
 631  * <dl>
 632  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 633  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 634  * </dl>
 635  *
 636  * @param dest_len the maximum length in bytes allowed in the
 637  * destination.  If @p dest_len is -1 then no maximum is used.
 638  **/
 639 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 640 {
 641         size_t src_len;
 642         ssize_t ret;
 643
 644         if (flags & STR_UPPER) {
 645                 char *tmpbuf = strupper_talloc(NULL, src);
 646                 if (tmpbuf == NULL) {
 647                         return -1;
 648                 }
 649                 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 650                 talloc_free(tmpbuf);
 651                 return ret;
 652         }
 653
 654         src_len = strlen(src);
 655
 656         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 657                 src_len++;
 658
 659         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, false);
 660 }
 661
 662 /**
 663  * Copy a string from a unix char* src to an ASCII destination,
 664  * allocating a buffer using talloc().
 665  *
 666  * @param dest always set at least to NULL
 667  *
 668  * @returns The number of bytes occupied by the string in the destination
 669  *         or -1 in case of error.
 670  **/
 671 _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 672 {
 673         size_t src_len = strlen(src)+1;
 674         *dest = NULL;
 675         return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
 676 }
 677
 678
 679 /**
 680  * Copy a string from a dos codepage source to a unix char* destination.
 681  *
 682  * The resulting string in "dest" is always null terminated.
 683  *
 684  * @param flags can have:
 685  * <dl>
 686  * <dt>STR_TERMINATE</dt>
 687  * <dd>STR_TERMINATE means the string in @p src
 688  * is null terminated, and src_len is ignored.</dd>
 689  * </dl>
 690  *
 691  * @param src_len is the length of the source area in bytes.
 692  * @returns the number of bytes occupied by the string in @p src.
 693  **/
 694 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 695 {
 696         size_t ret;
 697
 698         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 699                 if (src_len == (size_t)-1) {
 700                         src_len = strlen((const char *)src) + 1;
 701                 } else {
 702                         size_t len = strnlen((const char *)src, src_len);
 703                         if (len < src_len)
 704                                 len++;
 705                         src_len = len;
 706                 }
 707         }
 708
 709         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, false);
 710
 711         if (dest_len)
 712                 dest[MIN(ret, dest_len-1)] = 0;
 713
 714         return src_len;
 715 }
 716
 717 /**
 718  * Copy a string from a char* src to a unicode destination.
 719  *
 720  * @returns the number of bytes occupied by the string in the destination.
 721  *
 722  * @param flags can have:
 723  *
 724  * <dl>
 725  * <dt>STR_TERMINATE <dd>means include the null termination.
 726  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 727  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 728  * </dl>
 729  *
 730  * @param dest_len is the maximum length allowed in the
 731  * destination. If dest_len is -1 then no maxiumum is used.
 732  **/
 733 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
 734 {
 735         size_t len=0;
 736         size_t src_len = strlen(src);
 737         size_t ret;
 738
 739         if (flags & STR_UPPER) {
 740                 char *tmpbuf = strupper_talloc(NULL, src);
 741                 if (tmpbuf == NULL) {
 742                         return -1;
 743                 }
 744                 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 745                 talloc_free(tmpbuf);
 746                 return ret;
 747         }
 748
 749         if (flags & STR_TERMINATE)
 750                 src_len++;
 751
 752         if (ucs2_align(NULL, dest, flags)) {
 753                 *(char *)dest = 0;
 754                 dest = (void *)((char *)dest + 1);
 755                 if (dest_len) dest_len--;
 756                 len++;
 757         }
 758
 759         /* ucs2 is always a multiple of 2 bytes */
 760         dest_len &= ~1;
 761
 762         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, false);
 763         if (ret == (size_t)-1) {
 764                 return 0;
 765         }
 766
 767         len += ret;
 768
 769         return len;
 770 }
 771
 772
 773 /**
 774  * Copy a string from a unix char* src to a UCS2 destination,
 775  * allocating a buffer using talloc().
 776  *
 777  * @param dest always set at least to NULL
 778  *
 779  * @returns The number of bytes occupied by the string in the destination
 780  *         or -1 in case of error.
 781  **/
 782 _PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
 783 {
 784         size_t src_len = strlen(src)+1;
 785         *dest = NULL;
 786         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
 787 }
 788
 789
 790 /**
 791  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
 792  *
 793  * @param dest always set at least to NULL
 794  *
 795  * @returns The number of bytes occupied by the string in the destination
 796  **/
 797
 798 _PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 799 {
 800         size_t src_len = strlen(src)+1;
 801         *dest = NULL;
 802         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
 803 }
 804
 805 /**
 806  Copy a string from a ucs2 source to a unix char* destination.
 807  Flags can have:
 808   STR_TERMINATE means the string in src is null terminated.
 809   STR_NOALIGN   means don't try to align.
 810  if STR_TERMINATE is set then src_len is ignored if it is -1.
 811  src_len is the length of the source area in bytes
 812  Return the number of bytes occupied by the string in src.
 813  The resulting string in "dest" is always null terminated.
 814 **/
 815
 816 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 817 {
 818         size_t ret;
 819
 820         if (ucs2_align(NULL, src, flags)) {
 821                 src = (const void *)((const char *)src + 1);
 822                 if (src_len > 0)
 823                         src_len--;
 824         }
 825
 826         if (flags & STR_TERMINATE) {
 827                 if (src_len == (size_t)-1) {
 828                         src_len = utf16_len(src);
 829                 } else {
 830                         src_len = utf16_len_n(src, src_len);
 831                 }
 832         }
 833
 834         /* ucs2 is always a multiple of 2 bytes */
 835         if (src_len != (size_t)-1)
 836                 src_len &= ~1;
 837
 838         ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, false);
 839         if (dest_len)
 840                 dest[MIN(ret, dest_len-1)] = 0;
 841
 842         return src_len;
 843 }
 844
 845 /**
 846  * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
 847  *
 848  * @param dest always set at least to NULL
 849  *
 850  * @returns The number of bytes occupied by the string in the destination
 851  **/
 852
 853 _PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 854 {
 855         size_t src_len = strlen(src)+1;
 856         *dest = NULL;
 857         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 858 }
 859
 860 /**
 861  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
 862  *
 863  * @param dest always set at least to NULL
 864  *
 865  * @returns The number of bytes occupied by the string in the destination
 866  **/
 867
 868 _PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
 869 {
 870         size_t src_len = utf16_len(src);
 871         *dest = NULL;
 872         return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 873 }
 874
 875 /**
 876  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
 877  *
 878  * @param dest always set at least to NULL
 879  *
 880  * @returns The number of bytes occupied by the string in the destination
 881  **/
 882
 883 _PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 884 {
 885         size_t src_len = strlen(src)+1;
 886         *dest = NULL;
 887         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 888 }
 889
 890 /**
 891  Copy a string from a char* src to a unicode or ascii
 892  dos codepage destination choosing unicode or ascii based on the
 893  flags in the SMB buffer starting at base_ptr.
 894  Return the number of bytes occupied by the string in the destination.
 895  flags can have:
 896   STR_TERMINATE means include the null termination.
 897   STR_UPPER     means uppercase in the destination.
 898   STR_ASCII     use ascii even with unicode packet.
 899   STR_NOALIGN   means don't do alignment.
 900  dest_len is the maximum length allowed in the destination. If dest_len
 901  is -1 then no maxiumum is used.
 902 **/
 903
 904 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
 905 {
 906         if (flags & STR_ASCII) {
 907                 return push_ascii(dest, src, dest_len, flags);
 908         } else if (flags & STR_UNICODE) {
 909                 return push_ucs2(dest, src, dest_len, flags);
 910         } else {
 911                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
 912                 return -1;
 913         }
 914 }
 915
 916
 917 /**
 918  Copy a string from a unicode or ascii source (depending on
 919  the packet flags) to a char* destination.
 920  Flags can have:
 921   STR_TERMINATE means the string in src is null terminated.
 922   STR_UNICODE   means to force as unicode.
 923   STR_ASCII     use ascii even with unicode packet.
 924   STR_NOALIGN   means don't do alignment.
 925  if STR_TERMINATE is set then src_len is ignored is it is -1
 926  src_len is the length of the source area in bytes.
 927  Return the number of bytes occupied by the string in src.
 928  The resulting string in "dest" is always null terminated.
 929 **/
 930
 931 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 932 {
 933         if (flags & STR_ASCII) {
 934                 return pull_ascii(dest, src, dest_len, src_len, flags);
 935         } else if (flags & STR_UNICODE) {
 936                 return pull_ucs2(dest, src, dest_len, src_len, flags);
 937         } else {
 938                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
 939                 return -1;
 940         }
 941 }
 942
 943
 944 /**
 945  * Convert string from one encoding to another, making error checking etc
 946  *
 947  * @param src pointer to source string (multibyte or singlebyte)
 948  * @param srclen length of the source string in bytes
 949  * @param dest pointer to destination string (multibyte or singlebyte)
 950  * @param destlen maximal length allowed for string
 951  * @returns the number of bytes occupied in the destination
 952  **/
 953 _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
 954                                 void const *src, size_t srclen,
 955                                 void *dest, size_t destlen,
 956                                 bool allow_badcharcnv)
 957 {
 958         size_t ret;
 959         if (!convert_string_convenience(get_iconv_convenience(), from, to,
 960                                                                           src, srclen,
 961                                                                           dest, destlen, &ret,
 962                                                                           allow_badcharcnv))
 963                 return -1;
 964         return ret;
 965 }
 966
 967 /**
 968  * Convert between character sets, allocating a new buffer using talloc for the result.
 969  *
 970  * @param srclen length of source buffer.
 971  * @param dest always set at least to NULL
 972  * @param converted_size Size in bytes of the converted string
 973  * @note -1 is not accepted for srclen.
 974  *
 975  * @returns boolean indication whether the conversion succeeded
 976  **/
 977
 978 _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
 979                                        charset_t from, charset_t to,
 980                                        void const *src, size_t srclen,
 981                                        void *dest, size_t *converted_size,
 982                                            bool allow_badcharcnv)
 983 {
 984         return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
 985                                                                                          from, to, src, srclen, dest,
 986                                                                                          converted_size,
 987                                                                                          allow_badcharcnv);
 988 }
 989
 990
 991 _PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
 992 {
 993         return next_codepoint_convenience(get_iconv_convenience(), str, size);
 994 }
 995
 996 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
 997 {
 998         return push_codepoint_convenience(get_iconv_convenience(), str, c);
 999 }