lib/util/charset/util_unistr.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4    Copyright (C) Andrew Tridgell 1992-2001
   5    Copyright (C) Simo Sorce 2001
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 #include "includes.h"
  22 #include "system/locale.h"
  23
  24 struct smb_iconv_convenience *global_iconv_convenience = NULL;
  25
  26 static inline struct smb_iconv_convenience *get_iconv_convenience(void)
  27 {
  28         if (global_iconv_convenience == NULL)
  29                 global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(),
  30                                                                         "ASCII", "UTF-8", true, NULL);
  31         return global_iconv_convenience;
  32 }
  33
  34 /**
  35  Case insensitive string compararison
  36 **/
  37 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
  38 {
  39         codepoint_t c1=0, c2=0;
  40         size_t size1, size2;
  41         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
  42
  43         /* handle null ptr comparisons to simplify the use in qsort */
  44         if (s1 == s2) return 0;
  45         if (s1 == NULL) return -1;
  46         if (s2 == NULL) return 1;
  47
  48         while (*s1 && *s2) {
  49                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
  50                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
  51
  52                 s1 += size1;
  53                 s2 += size2;
  54
  55                 if (c1 == c2) {
  56                         continue;
  57                 }
  58
  59                 if (c1 == INVALID_CODEPOINT ||
  60                     c2 == INVALID_CODEPOINT) {
  61                         /* what else can we do?? */
  62                         return strcasecmp(s1, s2);
  63                 }
  64
  65                 if (toupper_m(c1) != toupper_m(c2)) {
  66                         return c1 - c2;
  67                 }
  68         }
  69
  70         return *s1 - *s2;
  71 }
  72
  73 /**
  74  * Get the next token from a string, return False if none found.
  75  * Handles double-quotes.
  76  *
  77  * Based on a routine by GJC@VILLAGE.COM.
  78  * Extensively modified by Andrew.Tridgell@anu.edu.au
  79  **/
  80 _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
  81 {
  82         const char *s;
  83         bool quoted;
  84         size_t len=1;
  85
  86         if (!ptr)
  87                 return false;
  88
  89         s = *ptr;
  90
  91         /* default to simple separators */
  92         if (!sep)
  93                 sep = " \t\n\r";
  94
  95         /* find the first non sep char */
  96         while (*s && strchr_m(sep,*s))
  97                 s++;
  98
  99         /* nothing left? */
 100         if (!*s)
 101                 return false;
 102
 103         /* copy over the token */
 104         for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
 105                 if (*s == '\"') {
 106                         quoted = !quoted;
 107                 } else {
 108                         len++;
 109                         *buff++ = *s;
 110                 }
 111         }
 112
 113         *ptr = (*s) ? s+1 : s;
 114         *buff = 0;
 115
 116         return true;
 117 }
 118
 119 /**
 120  Case insensitive string compararison, length limited
 121 **/
 122 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
 123 {
 124         codepoint_t c1=0, c2=0;
 125         size_t size1, size2;
 126         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 127
 128         /* handle null ptr comparisons to simplify the use in qsort */
 129         if (s1 == s2) return 0;
 130         if (s1 == NULL) return -1;
 131         if (s2 == NULL) return 1;
 132
 133         while (*s1 && *s2 && n) {
 134                 n--;
 135
 136                 c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
 137                 c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
 138
 139                 s1 += size1;
 140                 s2 += size2;
 141
 142                 if (c1 == c2) {
 143                         continue;
 144                 }
 145
 146                 if (c1 == INVALID_CODEPOINT ||
 147                     c2 == INVALID_CODEPOINT) {
 148                         /* what else can we do?? */
 149                         return strcasecmp(s1, s2);
 150                 }
 151
 152                 if (toupper_m(c1) != toupper_m(c2)) {
 153                         return c1 - c2;
 154                 }
 155         }
 156
 157         if (n == 0) {
 158                 return 0;
 159         }
 160
 161         return *s1 - *s2;
 162 }
 163
 164 /**
 165  * Compare 2 strings.
 166  *
 167  * @note The comparison is case-insensitive.
 168  **/
 169 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
 170 {
 171         return strcasecmp_m(s1,s2) == 0;
 172 }
 173
 174 /**
 175  Compare 2 strings (case sensitive).
 176 **/
 177 _PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
 178 {
 179         if (s1 == s2)
 180                 return true;
 181         if (!s1 || !s2)
 182                 return false;
 183
 184         return strcmp(s1,s2) == 0;
 185 }
 186
 187
 188 /**
 189  String replace.
 190  NOTE: oldc and newc must be 7 bit characters
 191 **/
 192 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
 193 {
 194         struct smb_iconv_convenience *ic = get_iconv_convenience();
 195         while (s && *s) {
 196                 size_t size;
 197                 codepoint_t c = next_codepoint_convenience(ic, s, &size);
 198                 if (c == oldc) {
 199                         *s = newc;
 200                 }
 201                 s += size;
 202         }
 203 }
 204
 205 /**
 206  Paranoid strcpy into a buffer of given length (includes terminating
 207  zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
 208  and replaces with '_'. Deliberately does *NOT* check for multibyte
 209  characters. Don't change it !
 210 **/
 211
 212 _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
 213 {
 214         size_t len, i;
 215
 216         if (maxlength == 0) {
 217                 /* can't fit any bytes at all! */
 218                 return NULL;
 219         }
 220
 221         if (!dest) {
 222                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
 223                 return NULL;
 224         }
 225
 226         if (!src) {
 227                 *dest = 0;
 228                 return dest;
 229         }
 230
 231         len = strlen(src);
 232         if (len >= maxlength)
 233                 len = maxlength - 1;
 234
 235         if (!other_safe_chars)
 236                 other_safe_chars = "";
 237
 238         for(i = 0; i < len; i++) {
 239                 int val = (src[i] & 0xff);
 240                 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
 241                         dest[i] = src[i];
 242                 else
 243                         dest[i] = '_';
 244         }
 245
 246         dest[i] = '\0';
 247
 248         return dest;
 249 }
 250
 251 /**
 252  * Calculate the number of units (8 or 16-bit, depending on the
 253  * destination charset), that would be needed to convert the input
 254  * string which is expected to be in in src_charset encoding to the
 255  * destination charset (which should be a unicode charset).
 256  */
 257 _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
 258 {
 259         size_t count = 0;
 260         struct smb_iconv_convenience *ic = get_iconv_convenience();
 261
 262         if (!s) {
 263                 return 0;
 264         }
 265
 266         while (*s && !(((uint8_t)*s) & 0x80)) {
 267                 s++;
 268                 count++;
 269         }
 270
 271         if (!*s) {
 272                 return count;
 273         }
 274
 275         while (*s) {
 276                 size_t c_size;
 277                 codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
 278                 s += c_size;
 279
 280                 switch (dst_charset) {
 281                 case CH_UTF16LE:
 282                 case CH_UTF16BE:
 283                 case CH_UTF16MUNGED:
 284                         if (c < 0x10000) {
 285                                 count += 1;
 286                         } else {
 287                                 count += 2;
 288                         }
 289                         break;
 290                 case CH_UTF8:
 291                         /*
 292                          * this only checks ranges, and does not
 293                          * check for invalid codepoints
 294                          */
 295                         if (c < 0x80) {
 296                                 count += 1;
 297                         } else if (c < 0x800) {
 298                                 count += 2;
 299                         } else if (c < 0x1000) {
 300                                 count += 3;
 301                         } else {
 302                                 count += 4;
 303                         }
 304                         break;
 305                 default:
 306                         /*
 307                          * non-unicode encoding:
 308                          * assume that each codepoint fits into
 309                          * one unit in the destination encoding.
 310                          */
 311                         count += 1;
 312                 }
 313         }
 314
 315         return count;
 316 }
 317
 318 _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
 319                                   const charset_t dst_charset)
 320 {
 321         if (!s) {
 322                 return 0;
 323         }
 324         return strlen_m_ext(s, src_charset, dst_charset) + 1;
 325 }
 326
 327
 328 /**
 329  Count the number of UCS2 characters in a string. Normally this will
 330  be the same as the number of bytes in a string for single byte strings,
 331  but will be different for multibyte.
 332 **/
 333 _PUBLIC_ size_t strlen_m(const char *s)
 334 {
 335         return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
 336 }
 337
 338 /**
 339    Work out the number of multibyte chars in a string, including the NULL
 340    terminator.
 341 **/
 342 _PUBLIC_ size_t strlen_m_term(const char *s)
 343 {
 344         if (!s) {
 345                 return 0;
 346         }
 347
 348         return strlen_m(s) + 1;
 349 }
 350
 351 /*
 352  * Weird helper routine for the winreg pipe: If nothing is around, return 0,
 353  * if a string is there, include the terminator.
 354  */
 355
 356 _PUBLIC_ size_t strlen_m_term_null(const char *s)
 357 {
 358         size_t len;
 359         if (!s) {
 360                 return 0;
 361         }
 362         len = strlen_m(s);
 363         if (len == 0) {
 364                 return 0;
 365         }
 366
 367         return len+1;
 368 }
 369
 370 /**
 371  Strchr and strrchr_m are a bit complex on general multi-byte strings.
 372 **/
 373 _PUBLIC_ char *strchr_m(const char *s, char c)
 374 {
 375         struct smb_iconv_convenience *ic = get_iconv_convenience();
 376         if (s == NULL) {
 377                 return NULL;
 378         }
 379         /* characters below 0x3F are guaranteed to not appear in
 380            non-initial position in multi-byte charsets */
 381         if ((c & 0xC0) == 0) {
 382                 return strchr(s, c);
 383         }
 384
 385         while (*s) {
 386                 size_t size;
 387                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 388                 if (c2 == c) {
 389                         return discard_const_p(char, s);
 390                 }
 391                 s += size;
 392         }
 393
 394         return NULL;
 395 }
 396
 397 /**
 398  * Multibyte-character version of strrchr
 399  */
 400 _PUBLIC_ char *strrchr_m(const char *s, char c)
 401 {
 402         struct smb_iconv_convenience *ic = get_iconv_convenience();
 403         char *ret = NULL;
 404
 405         if (s == NULL) {
 406                 return NULL;
 407         }
 408
 409         /* characters below 0x3F are guaranteed to not appear in
 410            non-initial position in multi-byte charsets */
 411         if ((c & 0xC0) == 0) {
 412                 return strrchr(s, c);
 413         }
 414
 415         while (*s) {
 416                 size_t size;
 417                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 418                 if (c2 == c) {
 419                         ret = discard_const_p(char, s);
 420                 }
 421                 s += size;
 422         }
 423
 424         return ret;
 425 }
 426
 427 /**
 428   return True if any (multi-byte) character is lower case
 429 */
 430 _PUBLIC_ bool strhaslower(const char *string)
 431 {
 432         struct smb_iconv_convenience *ic = get_iconv_convenience();
 433         while (*string) {
 434                 size_t c_size;
 435                 codepoint_t s;
 436                 codepoint_t t;
 437
 438                 s = next_codepoint_convenience(ic, string, &c_size);
 439                 string += c_size;
 440
 441                 t = toupper_m(s);
 442
 443                 if (s != t) {
 444                         return true; /* that means it has lower case chars */
 445                 }
 446         }
 447
 448         return false;
 449 }
 450
 451 /**
 452   return True if any (multi-byte) character is upper case
 453 */
 454 _PUBLIC_ bool strhasupper(const char *string)
 455 {
 456         struct smb_iconv_convenience *ic = get_iconv_convenience();
 457         while (*string) {
 458                 size_t c_size;
 459                 codepoint_t s;
 460                 codepoint_t t;
 461
 462                 s = next_codepoint_convenience(ic, string, &c_size);
 463                 string += c_size;
 464
 465                 t = tolower_m(s);
 466
 467                 if (s != t) {
 468                         return true; /* that means it has upper case chars */
 469                 }
 470         }
 471
 472         return false;
 473 }
 474
 475 /**
 476  Convert a string to lower case, allocated with talloc
 477 **/
 478 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 479 {
 480         size_t size=0;
 481         char *dest;
 482         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 483
 484         if(src == NULL) {
 485                 return NULL;
 486         }
 487
 488         /* this takes advantage of the fact that upper/lower can't
 489            change the length of a character by more than 1 byte */
 490         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
 491         if (dest == NULL) {
 492                 return NULL;
 493         }
 494
 495         while (*src) {
 496                 size_t c_size;
 497                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 498                 src += c_size;
 499
 500                 c = tolower_m(c);
 501
 502                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 503                 if (c_size == -1) {
 504                         talloc_free(dest);
 505                         return NULL;
 506                 }
 507                 size += c_size;
 508         }
 509
 510         dest[size] = 0;
 511
 512         /* trim it so talloc_append_string() works */
 513         dest = talloc_realloc(ctx, dest, char, size+1);
 514
 515         talloc_set_name_const(dest, dest);
 516
 517         return dest;
 518 }
 519
 520 /**
 521  Convert a string to UPPER case, allocated with talloc
 522  source length limited to n bytes
 523 **/
 524 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
 525 {
 526         size_t size=0;
 527         char *dest;
 528         struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
 529
 530         if (!src) {
 531                 return NULL;
 532         }
 533
 534         /* this takes advantage of the fact that upper/lower can't
 535            change the length of a character by more than 1 byte */
 536         dest = talloc_array(ctx, char, 2*(n+1));
 537         if (dest == NULL) {
 538                 return NULL;
 539         }
 540
 541         while (n-- && *src) {
 542                 size_t c_size;
 543                 codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
 544                 src += c_size;
 545
 546                 c = toupper_m(c);
 547
 548                 c_size = push_codepoint_convenience(iconv_convenience, dest+size, c);
 549                 if (c_size == -1) {
 550                         talloc_free(dest);
 551                         return NULL;
 552                 }
 553                 size += c_size;
 554         }
 555
 556         dest[size] = 0;
 557
 558         /* trim it so talloc_append_string() works */
 559         dest = talloc_realloc(ctx, dest, char, size+1);
 560
 561         talloc_set_name_const(dest, dest);
 562
 563         return dest;
 564 }
 565
 566 /**
 567  Convert a string to UPPER case, allocated with talloc
 568 **/
 569 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 570 {
 571         return strupper_talloc_n(ctx, src, src?strlen(src):0);
 572 }
 573
 574 /**
 575  talloc_strdup() a unix string to upper case.
 576 **/
 577 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
 578 {
 579         return strupper_talloc(ctx, src);
 580 }
 581
 582 /**
 583  Convert a string to lower case.
 584 **/
 585 _PUBLIC_ void strlower_m(char *s)
 586 {
 587         char *d;
 588         struct smb_iconv_convenience *iconv_convenience;
 589
 590         /* this is quite a common operation, so we want it to be
 591            fast. We optimise for the ascii case, knowing that all our
 592            supported multi-byte character sets are ascii-compatible
 593            (ie. they match for the first 128 chars) */
 594         while (*s && !(((uint8_t)*s) & 0x80)) {
 595                 *s = tolower((uint8_t)*s);
 596                 s++;
 597         }
 598
 599         if (!*s)
 600                 return;
 601
 602         iconv_convenience = get_iconv_convenience();
 603
 604         d = s;
 605
 606         while (*s) {
 607                 size_t c_size, c_size2;
 608                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 609                 c_size2 = push_codepoint_convenience(iconv_convenience, d, tolower_m(c));
 610                 if (c_size2 > c_size) {
 611                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 612                                  c, tolower_m(c), (int)c_size, (int)c_size2));
 613                         smb_panic("codepoint expansion in strlower_m\n");
 614                 }
 615                 s += c_size;
 616                 d += c_size2;
 617         }
 618         *d = 0;
 619 }
 620
 621 /**
 622  Convert a string to UPPER case.
 623 **/
 624 _PUBLIC_ void strupper_m(char *s)
 625 {
 626         char *d;
 627         struct smb_iconv_convenience *iconv_convenience;
 628
 629         /* this is quite a common operation, so we want it to be
 630            fast. We optimise for the ascii case, knowing that all our
 631            supported multi-byte character sets are ascii-compatible
 632            (ie. they match for the first 128 chars) */
 633         while (*s && !(((uint8_t)*s) & 0x80)) {
 634                 *s = toupper((uint8_t)*s);
 635                 s++;
 636         }
 637
 638         if (!*s)
 639                 return;
 640
 641         iconv_convenience = get_iconv_convenience();
 642
 643         d = s;
 644
 645         while (*s) {
 646                 size_t c_size, c_size2;
 647                 codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
 648                 c_size2 = push_codepoint_convenience(iconv_convenience, d, toupper_m(c));
 649                 if (c_size2 > c_size) {
 650                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 651                                  c, toupper_m(c), (int)c_size, (int)c_size2));
 652                         smb_panic("codepoint expansion in strupper_m\n");
 653                 }
 654                 s += c_size;
 655                 d += c_size2;
 656         }
 657         *d = 0;
 658 }
 659
 660
 661 /**
 662  Find the number of 'c' chars in a string
 663 **/
 664 _PUBLIC_ size_t count_chars_m(const char *s, char c)
 665 {
 666         struct smb_iconv_convenience *ic = get_iconv_convenience();
 667         size_t count = 0;
 668
 669         while (*s) {
 670                 size_t size;
 671                 codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
 672                 if (c2 == c) count++;
 673                 s += size;
 674         }
 675
 676         return count;
 677 }
 678
 679
 680 /**
 681  * Copy a string from a char* unix src to a dos codepage string destination.
 682  *
 683  * @return the number of bytes occupied by the string in the destination.
 684  *
 685  * @param flags can include
 686  * <dl>
 687  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
 688  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
 689  * </dl>
 690  *
 691  * @param dest_len the maximum length in bytes allowed in the
 692  * destination.  If @p dest_len is -1 then no maximum is used.
 693  **/
 694 static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 695 {
 696         size_t src_len;
 697         ssize_t ret;
 698
 699         if (flags & STR_UPPER) {
 700                 char *tmpbuf = strupper_talloc(NULL, src);
 701                 if (tmpbuf == NULL) {
 702                         return -1;
 703                 }
 704                 ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 705                 talloc_free(tmpbuf);
 706                 return ret;
 707         }
 708
 709         src_len = strlen(src);
 710
 711         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 712                 src_len++;
 713
 714         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, false);
 715 }
 716
 717 /**
 718  * Copy a string from a unix char* src to an ASCII destination,
 719  * allocating a buffer using talloc().
 720  *
 721  * @param dest always set at least to NULL
 722  *
 723  * @returns The number of bytes occupied by the string in the destination
 724  *         or -1 in case of error.
 725  **/
 726 _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 727 {
 728         size_t src_len = strlen(src)+1;
 729         *dest = NULL;
 730         return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
 731 }
 732
 733
 734 /**
 735  * Copy a string from a dos codepage source to a unix char* destination.
 736  *
 737  * The resulting string in "dest" is always null terminated.
 738  *
 739  * @param flags can have:
 740  * <dl>
 741  * <dt>STR_TERMINATE</dt>
 742  * <dd>STR_TERMINATE means the string in @p src
 743  * is null terminated, and src_len is ignored.</dd>
 744  * </dl>
 745  *
 746  * @param src_len is the length of the source area in bytes.
 747  * @returns the number of bytes occupied by the string in @p src.
 748  **/
 749 static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 750 {
 751         size_t ret;
 752
 753         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
 754                 if (src_len == (size_t)-1) {
 755                         src_len = strlen((const char *)src) + 1;
 756                 } else {
 757                         size_t len = strnlen((const char *)src, src_len);
 758                         if (len < src_len)
 759                                 len++;
 760                         src_len = len;
 761                 }
 762         }
 763
 764         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, false);
 765
 766         if (dest_len)
 767                 dest[MIN(ret, dest_len-1)] = 0;
 768
 769         return src_len;
 770 }
 771
 772 /**
 773  * Copy a string from a char* src to a unicode destination.
 774  *
 775  * @returns the number of bytes occupied by the string in the destination.
 776  *
 777  * @param flags can have:
 778  *
 779  * <dl>
 780  * <dt>STR_TERMINATE <dd>means include the null termination.
 781  * <dt>STR_UPPER     <dd>means uppercase in the destination.
 782  * <dt>STR_NOALIGN   <dd>means don't do alignment.
 783  * </dl>
 784  *
 785  * @param dest_len is the maximum length allowed in the
 786  * destination. If dest_len is -1 then no maxiumum is used.
 787  **/
 788 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
 789 {
 790         size_t len=0;
 791         size_t src_len = strlen(src);
 792         size_t ret;
 793
 794         if (flags & STR_UPPER) {
 795                 char *tmpbuf = strupper_talloc(NULL, src);
 796                 if (tmpbuf == NULL) {
 797                         return -1;
 798                 }
 799                 ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
 800                 talloc_free(tmpbuf);
 801                 return ret;
 802         }
 803
 804         if (flags & STR_TERMINATE)
 805                 src_len++;
 806
 807         if (ucs2_align(NULL, dest, flags)) {
 808                 *(char *)dest = 0;
 809                 dest = (void *)((char *)dest + 1);
 810                 if (dest_len) dest_len--;
 811                 len++;
 812         }
 813
 814         /* ucs2 is always a multiple of 2 bytes */
 815         dest_len &= ~1;
 816
 817         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, false);
 818         if (ret == (size_t)-1) {
 819                 return 0;
 820         }
 821
 822         len += ret;
 823
 824         return len;
 825 }
 826
 827
 828 /**
 829  * Copy a string from a unix char* src to a UCS2 destination,
 830  * allocating a buffer using talloc().
 831  *
 832  * @param dest always set at least to NULL
 833  *
 834  * @returns The number of bytes occupied by the string in the destination
 835  *         or -1 in case of error.
 836  **/
 837 _PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
 838 {
 839         size_t src_len = strlen(src)+1;
 840         *dest = NULL;
 841         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
 842 }
 843
 844
 845 /**
 846  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
 847  *
 848  * @param dest always set at least to NULL
 849  *
 850  * @returns The number of bytes occupied by the string in the destination
 851  **/
 852
 853 _PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 854 {
 855         size_t src_len = strlen(src)+1;
 856         *dest = NULL;
 857         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
 858 }
 859
 860 /**
 861  Copy a string from a ucs2 source to a unix char* destination.
 862  Flags can have:
 863   STR_TERMINATE means the string in src is null terminated.
 864   STR_NOALIGN   means don't try to align.
 865  if STR_TERMINATE is set then src_len is ignored if it is -1.
 866  src_len is the length of the source area in bytes
 867  Return the number of bytes occupied by the string in src.
 868  The resulting string in "dest" is always null terminated.
 869 **/
 870
 871 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 872 {
 873         size_t ret;
 874
 875         if (ucs2_align(NULL, src, flags)) {
 876                 src = (const void *)((const char *)src + 1);
 877                 if (src_len > 0)
 878                         src_len--;
 879         }
 880
 881         if (flags & STR_TERMINATE) {
 882                 if (src_len == (size_t)-1) {
 883                         src_len = utf16_len(src);
 884                 } else {
 885                         src_len = utf16_len_n(src, src_len);
 886                 }
 887         }
 888
 889         /* ucs2 is always a multiple of 2 bytes */
 890         if (src_len != (size_t)-1)
 891                 src_len &= ~1;
 892
 893         ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, false);
 894         if (dest_len)
 895                 dest[MIN(ret, dest_len-1)] = 0;
 896
 897         return src_len;
 898 }
 899
 900 /**
 901  * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
 902  *
 903  * @param dest always set at least to NULL
 904  *
 905  * @returns The number of bytes occupied by the string in the destination
 906  **/
 907
 908 _PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 909 {
 910         size_t src_len = strlen(src)+1;
 911         *dest = NULL;
 912         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 913 }
 914
 915 /**
 916  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
 917  *
 918  * @param dest always set at least to NULL
 919  *
 920  * @returns The number of bytes occupied by the string in the destination
 921  **/
 922
 923 _PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
 924 {
 925         size_t src_len = utf16_len(src);
 926         *dest = NULL;
 927         return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 928 }
 929
 930 /**
 931  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
 932  *
 933  * @param dest always set at least to NULL
 934  *
 935  * @returns The number of bytes occupied by the string in the destination
 936  **/
 937
 938 _PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 939 {
 940         size_t src_len = strlen(src)+1;
 941         *dest = NULL;
 942         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 943 }
 944
 945 /**
 946  Copy a string from a char* src to a unicode or ascii
 947  dos codepage destination choosing unicode or ascii based on the
 948  flags in the SMB buffer starting at base_ptr.
 949  Return the number of bytes occupied by the string in the destination.
 950  flags can have:
 951   STR_TERMINATE means include the null termination.
 952   STR_UPPER     means uppercase in the destination.
 953   STR_ASCII     use ascii even with unicode packet.
 954   STR_NOALIGN   means don't do alignment.
 955  dest_len is the maximum length allowed in the destination. If dest_len
 956  is -1 then no maxiumum is used.
 957 **/
 958
 959 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
 960 {
 961         if (flags & STR_ASCII) {
 962                 return push_ascii(dest, src, dest_len, flags);
 963         } else if (flags & STR_UNICODE) {
 964                 return push_ucs2(dest, src, dest_len, flags);
 965         } else {
 966                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
 967                 return -1;
 968         }
 969 }
 970
 971
 972 /**
 973  Copy a string from a unicode or ascii source (depending on
 974  the packet flags) to a char* destination.
 975  Flags can have:
 976   STR_TERMINATE means the string in src is null terminated.
 977   STR_UNICODE   means to force as unicode.
 978   STR_ASCII     use ascii even with unicode packet.
 979   STR_NOALIGN   means don't do alignment.
 980  if STR_TERMINATE is set then src_len is ignored is it is -1
 981  src_len is the length of the source area in bytes.
 982  Return the number of bytes occupied by the string in src.
 983  The resulting string in "dest" is always null terminated.
 984 **/
 985
 986 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
 987 {
 988         if (flags & STR_ASCII) {
 989                 return pull_ascii(dest, src, dest_len, src_len, flags);
 990         } else if (flags & STR_UNICODE) {
 991                 return pull_ucs2(dest, src, dest_len, src_len, flags);
 992         } else {
 993                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
 994                 return -1;
 995         }
 996 }
 997
 998
 999 /**
1000  * Convert string from one encoding to another, making error checking etc
1001  *
1002  * @param src pointer to source string (multibyte or singlebyte)
1003  * @param srclen length of the source string in bytes
1004  * @param dest pointer to destination string (multibyte or singlebyte)
1005  * @param destlen maximal length allowed for string
1006  * @returns the number of bytes occupied in the destination
1007  **/
1008 _PUBLIC_ size_t convert_string(charset_t from, charset_t to,
1009                                 void const *src, size_t srclen,
1010                                 void *dest, size_t destlen,
1011                                 bool allow_badcharcnv)
1012 {
1013         size_t ret;
1014         if (!convert_string_convenience(get_iconv_convenience(), from, to,
1015                                                                           src, srclen,
1016                                                                           dest, destlen, &ret,
1017                                                                           allow_badcharcnv))
1018                 return -1;
1019         return ret;
1020 }
1021
1022 /**
1023  * Convert between character sets, allocating a new buffer using talloc for the result.
1024  *
1025  * @param srclen length of source buffer.
1026  * @param dest always set at least to NULL
1027  * @param converted_size Size in bytes of the converted string
1028  * @note -1 is not accepted for srclen.
1029  *
1030  * @returns boolean indication whether the conversion succeeded
1031  **/
1032
1033 _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
1034                                        charset_t from, charset_t to,
1035                                        void const *src, size_t srclen,
1036                                        void *dest, size_t *converted_size,
1037                                            bool allow_badcharcnv)
1038 {
1039         return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
1040                                                                                          from, to, src, srclen, dest,
1041                                                                                          converted_size,
1042                                                                                          allow_badcharcnv);
1043 }
1044
1045 _PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
1046                                         size_t *size)
1047 {
1048         return next_codepoint_convenience_ext(get_iconv_convenience(), str,
1049                                               src_charset, size);
1050 }
1051
1052 _PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
1053 {
1054         return next_codepoint_convenience(get_iconv_convenience(), str, size);
1055 }
1056
1057 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1058 {
1059         return push_codepoint_convenience(get_iconv_convenience(), str, c);
1060 }