source3/lib/util_unistr.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4    Copyright (C) Andrew Tridgell 1992-2001
   5    Copyright (C) Simo Sorce 2001
   6    Copyright (C) Jeremy Allison 2005
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20 */
  21
  22 #include "includes.h"
  23
  24 #ifndef MAXUNI
  25 #define MAXUNI 1024
  26 #endif
  27
  28 /* these 3 tables define the unicode case handling.  They are loaded
  29    at startup either via mmap() or read() from the lib directory */
  30 static uint8 *valid_table;
  31 static bool initialized;
  32
  33 /**
  34  * Destroy global objects allocated by load_case_tables()
  35  **/
  36 void gfree_case_tables(void)
  37 {
  38         if ( valid_table ) {
  39                 unmap_file(valid_table, 0x10000);
  40                 valid_table = NULL;
  41         }
  42         initialized = false;
  43 }
  44
  45 /**
  46  * Load the valid character map table from <tt>valid.dat</tt> or
  47  * create from the configured codepage.
  48  *
  49  * This function is called whenever the configuration is reloaded.
  50  * However, the valid character table is not changed if it's loaded
  51  * from a file, because we can't unmap files.
  52  **/
  53
  54 static void init_valid_table(void)
  55 {
  56         if (valid_table) {
  57                 return;
  58         }
  59
  60         valid_table = (uint8 *)map_file(data_path("valid.dat"), 0x10000);
  61         if (!valid_table) {
  62                 smb_panic("Could not load valid.dat file required for mangle method=hash");
  63                 return;
  64         }
  65 }
  66
  67 /*******************************************************************
  68  Write a string in (little-endian) unicode format. src is in
  69  the current DOS codepage. len is the length in bytes of the
  70  string pointed to by dst.
  71
  72  if null_terminate is True then null terminate the packet (adds 2 bytes)
  73
  74  the return value is the length in bytes consumed by the string, including the
  75  null termination if applied
  76 ********************************************************************/
  77
  78 size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate)
  79 {
  80         int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE
  81                                    : STR_UNICODE|STR_NOALIGN;
  82         return push_ucs2(NULL, dst, src, len, flags);
  83 }
  84
  85
  86 /*******************************************************************
  87  Skip past a unicode string, but not more than len. Always move
  88  past a terminating zero if found.
  89 ********************************************************************/
  90
  91 char *skip_unibuf(char *src, size_t len)
  92 {
  93         char *srcend = src + len;
  94
  95         while (src < srcend && SVAL(src,0)) {
  96                 src += 2;
  97         }
  98
  99         if(!SVAL(src,0)) {
 100                 src += 2;
 101         }
 102
 103         return src;
 104 }
 105
 106 /* Converts a string from internal samba format to unicode
 107  */
 108
 109 int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags)
 110 {
 111         return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
 112 }
 113
 114 /* Converts a string from internal samba format to unicode. Always terminates.
 115  * Actually just a wrapper round push_ucs2_talloc().
 116  */
 117
 118 int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
 119 {
 120         size_t size;
 121         if (push_ucs2_talloc(ctx, dest, src, &size))
 122                 return size;
 123         else
 124                 return -1;
 125 }
 126
 127 /*******************************************************************
 128  Determine if a character is valid in a 8.3 name.
 129 ********************************************************************/
 130
 131 bool isvalid83_w(smb_ucs2_t c)
 132 {
 133         init_valid_table();
 134         return valid_table[SVAL(&c,0)] != 0;
 135 }
 136
 137 /*******************************************************************
 138  Count the number of characters in a smb_ucs2_t string.
 139 ********************************************************************/
 140
 141 size_t strlen_w(const smb_ucs2_t *src)
 142 {
 143         size_t len;
 144         smb_ucs2_t c;
 145
 146         for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
 147                 ;
 148         }
 149
 150         return len;
 151 }
 152
 153 /*******************************************************************
 154  Count up to max number of characters in a smb_ucs2_t string.
 155 ********************************************************************/
 156
 157 size_t strnlen_w(const smb_ucs2_t *src, size_t max)
 158 {
 159         size_t len;
 160         smb_ucs2_t c;
 161
 162         for(len = 0; (len < max) && *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
 163                 ;
 164         }
 165
 166         return len;
 167 }
 168
 169 /*******************************************************************
 170  Wide strchr().
 171 ********************************************************************/
 172
 173 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
 174 {
 175         smb_ucs2_t cp;
 176         while (*(COPY_UCS2_CHAR(&cp,s))) {
 177                 if (c == cp) {
 178                         return (smb_ucs2_t *)s;
 179                 }
 180                 s++;
 181         }
 182         if (c == cp) {
 183                 return (smb_ucs2_t *)s;
 184         }
 185
 186         return NULL;
 187 }
 188
 189 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
 190 {
 191         return strchr_w(s, UCS2_CHAR(c));
 192 }
 193
 194 /*******************************************************************
 195  Wide strrchr().
 196 ********************************************************************/
 197
 198 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
 199 {
 200         smb_ucs2_t cp;
 201         const smb_ucs2_t *p = s;
 202         int len = strlen_w(s);
 203
 204         if (len == 0) {
 205                 return NULL;
 206         }
 207         p += (len - 1);
 208         do {
 209                 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
 210                         return (smb_ucs2_t *)p;
 211                 }
 212         } while (p-- != s);
 213         return NULL;
 214 }
 215
 216 /*******************************************************************
 217  Wide version of strrchr that returns after doing strrchr 'n' times.
 218 ********************************************************************/
 219
 220 smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n)
 221 {
 222         smb_ucs2_t cp;
 223         const smb_ucs2_t *p = s;
 224         int len = strlen_w(s);
 225
 226         if (len == 0 || !n) {
 227                 return NULL;
 228         }
 229         p += (len - 1);
 230         do {
 231                 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
 232                         n--;
 233                 }
 234
 235                 if (!n) {
 236                         return (smb_ucs2_t *)p;
 237                 }
 238         } while (p-- != s);
 239         return NULL;
 240 }
 241
 242 /*******************************************************************
 243  Wide strstr().
 244 ********************************************************************/
 245
 246 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
 247 {
 248         smb_ucs2_t *r;
 249         size_t inslen;
 250
 251         if (!s || !*s || !ins || !*ins) {
 252                 return NULL;
 253         }
 254
 255         inslen = strlen_w(ins);
 256         r = (smb_ucs2_t *)s;
 257
 258         while ((r = strchr_w(r, *ins))) {
 259                 if (strncmp_w(r, ins, inslen) == 0) {
 260                         return r;
 261                 }
 262                 r++;
 263         }
 264
 265         return NULL;
 266 }
 267
 268 /*******************************************************************
 269  Convert a string to lower case.
 270  return True if any char is converted
 271
 272  This is unsafe for any string involving a UTF16 character
 273 ********************************************************************/
 274
 275 bool strlower_w(smb_ucs2_t *s)
 276 {
 277         smb_ucs2_t cp;
 278         bool ret = False;
 279
 280         while (*(COPY_UCS2_CHAR(&cp,s))) {
 281                 smb_ucs2_t v = tolower_m(cp);
 282                 if (v != cp) {
 283                         COPY_UCS2_CHAR(s,&v);
 284                         ret = True;
 285                 }
 286                 s++;
 287         }
 288         return ret;
 289 }
 290
 291 /*******************************************************************
 292  Convert a string to upper case.
 293  return True if any char is converted
 294
 295  This is unsafe for any string involving a UTF16 character
 296 ********************************************************************/
 297
 298 bool strupper_w(smb_ucs2_t *s)
 299 {
 300         smb_ucs2_t cp;
 301         bool ret = False;
 302         while (*(COPY_UCS2_CHAR(&cp,s))) {
 303                 smb_ucs2_t v = toupper_m(cp);
 304                 if (v != cp) {
 305                         COPY_UCS2_CHAR(s,&v);
 306                         ret = True;
 307                 }
 308                 s++;
 309         }
 310         return ret;
 311 }
 312
 313 /*******************************************************************
 314  Convert a string to "normal" form.
 315 ********************************************************************/
 316
 317 void strnorm_w(smb_ucs2_t *s, int case_default)
 318 {
 319         if (case_default == CASE_UPPER) {
 320                 strupper_w(s);
 321         } else {
 322                 strlower_w(s);
 323         }
 324 }
 325
 326 int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
 327 {
 328         smb_ucs2_t cpa, cpb;
 329
 330         while ((*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
 331                 a++;
 332                 b++;
 333         }
 334         return (*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b)));
 335         /* warning: if *a != *b and both are not 0 we return a random
 336                 greater or lesser than 0 number not realted to which
 337                 string is longer */
 338 }
 339
 340 int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
 341 {
 342         smb_ucs2_t cpa, cpb;
 343         size_t n = 0;
 344
 345         while ((n < len) && (*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
 346                 a++;
 347                 b++;
 348                 n++;
 349         }
 350         return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0;
 351 }
 352
 353 /*******************************************************************
 354  Case insensitive string comparison.
 355 ********************************************************************/
 356
 357 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
 358 {
 359         smb_ucs2_t cpa, cpb;
 360
 361         while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) {
 362                 a++;
 363                 b++;
 364         }
 365         return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b))));
 366 }
 367
 368 /*******************************************************************
 369  Case insensitive string comparison, length limited.
 370 ********************************************************************/
 371
 372 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
 373 {
 374         smb_ucs2_t cpa, cpb;
 375         size_t n = 0;
 376
 377         while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) {
 378                 a++;
 379                 b++;
 380                 n++;
 381         }
 382         return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0;
 383 }
 384
 385 /*******************************************************************
 386  Compare 2 strings.
 387 ********************************************************************/
 388
 389 bool strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
 390 {
 391         if (s1 == s2) {
 392                 return(True);
 393         }
 394         if (!s1 || !s2) {
 395                 return(False);
 396         }
 397
 398         return(strcasecmp_w(s1,s2)==0);
 399 }
 400
 401 /*******************************************************************
 402  Compare 2 strings up to and including the nth char.
 403 ******************************************************************/
 404
 405 bool strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
 406 {
 407         if (s1 == s2) {
 408                 return(True);
 409         }
 410         if (!s1 || !s2 || !n) {
 411                 return(False);
 412         }
 413
 414         return(strncasecmp_w(s1,s2,n)==0);
 415 }
 416
 417 /*******************************************************************
 418  Duplicate string.
 419 ********************************************************************/
 420
 421 smb_ucs2_t *strdup_w(const smb_ucs2_t *src)
 422 {
 423         return strndup_w(src, 0);
 424 }
 425
 426 /* if len == 0 then duplicate the whole string */
 427
 428 smb_ucs2_t *strndup_w(const smb_ucs2_t *src, size_t len)
 429 {
 430         smb_ucs2_t *dest;
 431
 432         if (!len) {
 433                 len = strlen_w(src);
 434         }
 435         dest = SMB_MALLOC_ARRAY(smb_ucs2_t, len + 1);
 436         if (!dest) {
 437                 DEBUG(0,("strdup_w: out of memory!\n"));
 438                 return NULL;
 439         }
 440
 441         memcpy(dest, src, len * sizeof(smb_ucs2_t));
 442         dest[len] = 0;
 443         return dest;
 444 }
 445
 446 /*******************************************************************
 447  Copy a string with max len.
 448 ********************************************************************/
 449
 450 smb_ucs2_t *strncpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
 451 {
 452         smb_ucs2_t cp;
 453         size_t len;
 454
 455         if (!dest || !src) {
 456                 return NULL;
 457         }
 458
 459         for (len = 0; (*COPY_UCS2_CHAR(&cp,(src+len))) && (len < max); len++) {
 460                 cp = *COPY_UCS2_CHAR(dest+len,src+len);
 461         }
 462         cp = 0;
 463         for ( /*nothing*/ ; len < max; len++ ) {
 464                 cp = *COPY_UCS2_CHAR(dest+len,&cp);
 465         }
 466
 467         return dest;
 468 }
 469
 470 /*******************************************************************
 471  Append a string of len bytes and add a terminator.
 472 ********************************************************************/
 473
 474 smb_ucs2_t *strncat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
 475 {
 476         size_t start;
 477         size_t len;
 478         smb_ucs2_t z = 0;
 479
 480         if (!dest || !src) {
 481                 return NULL;
 482         }
 483
 484         start = strlen_w(dest);
 485         len = strnlen_w(src, max);
 486
 487         memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
 488         z = *COPY_UCS2_CHAR(dest+start+len,&z);
 489
 490         return dest;
 491 }
 492
 493 smb_ucs2_t *strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src)
 494 {
 495         size_t start;
 496         size_t len;
 497         smb_ucs2_t z = 0;
 498
 499         if (!dest || !src) {
 500                 return NULL;
 501         }
 502
 503         start = strlen_w(dest);
 504         len = strlen_w(src);
 505
 506         memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
 507         z = *COPY_UCS2_CHAR(dest+start+len,&z);
 508
 509         return dest;
 510 }
 511
 512
 513 /*******************************************************************
 514  Replace any occurence of oldc with newc in unicode string.
 515 ********************************************************************/
 516
 517 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
 518 {
 519         smb_ucs2_t cp;
 520
 521         for(;*(COPY_UCS2_CHAR(&cp,s));s++) {
 522                 if(cp==oldc) {
 523                         COPY_UCS2_CHAR(s,&newc);
 524                 }
 525         }
 526 }
 527
 528 /*******************************************************************
 529  Trim unicode string.
 530 ********************************************************************/
 531
 532 bool trim_string_w(smb_ucs2_t *s, const smb_ucs2_t *front,
 533                                   const smb_ucs2_t *back)
 534 {
 535         bool ret = False;
 536         size_t len, front_len, back_len;
 537
 538         if (!s) {
 539                 return False;
 540         }
 541
 542         len = strlen_w(s);
 543
 544         if (front && *front) {
 545                 front_len = strlen_w(front);
 546                 while (len && strncmp_w(s, front, front_len) == 0) {
 547                         memmove(s, (s + front_len), (len - front_len + 1) * sizeof(smb_ucs2_t));
 548                         len -= front_len;
 549                         ret = True;
 550                 }
 551         }
 552
 553         if (back && *back) {
 554                 back_len = strlen_w(back);
 555                 while (len && strncmp_w((s + (len - back_len)), back, back_len) == 0) {
 556                         s[len - back_len] = 0;
 557                         len -= back_len;
 558                         ret = True;
 559                 }
 560         }
 561
 562         return ret;
 563 }
 564
 565 /*
 566   The *_wa() functions take a combination of 7 bit ascii
 567   and wide characters They are used so that you can use string
 568   functions combining C string constants with ucs2 strings
 569
 570   The char* arguments must NOT be multibyte - to be completely sure
 571   of this only pass string constants */
 572
 573 int strcmp_wa(const smb_ucs2_t *a, const char *b)
 574 {
 575         smb_ucs2_t cp = 0;
 576
 577         while (*b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
 578                 a++;
 579                 b++;
 580         }
 581         return (*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b));
 582 }
 583
 584 int strncmp_wa(const smb_ucs2_t *a, const char *b, size_t len)
 585 {
 586         smb_ucs2_t cp = 0;
 587         size_t n = 0;
 588
 589         while ((n < len) && *b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
 590                 a++;
 591                 b++;
 592                 n++;
 593         }
 594         return (len - n)?(*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b)):0;
 595 }
 596
 597 smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
 598 {
 599         smb_ucs2_t cp;
 600
 601         while (*(COPY_UCS2_CHAR(&cp,s))) {
 602                 int i;
 603                 for (i=0; p[i] && cp != UCS2_CHAR(p[i]); i++)
 604                         ;
 605                 if (p[i]) {
 606                         return (smb_ucs2_t *)s;
 607                 }
 608                 s++;
 609         }
 610         return NULL;
 611 }
 612
 613 smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins)
 614 {
 615         smb_ucs2_t *r;
 616         size_t inslen;
 617
 618         if (!s || !ins) {
 619                 return NULL;
 620         }
 621
 622         inslen = strlen(ins);
 623         r = (smb_ucs2_t *)s;
 624
 625         while ((r = strchr_w(r, UCS2_CHAR(*ins)))) {
 626                 if (strncmp_wa(r, ins, inslen) == 0)
 627                         return r;
 628                 r++;
 629         }
 630
 631         return NULL;
 632 }
 633
 634 /*************************************************************
 635  ascii only toupper - saves the need for smbd to be in C locale.
 636 *************************************************************/
 637
 638 int toupper_ascii(int c)
 639 {
 640         smb_ucs2_t uc = toupper_m(UCS2_CHAR(c));
 641         return UCS2_TO_CHAR(uc);
 642 }
 643
 644 /*************************************************************
 645  ascii only tolower - saves the need for smbd to be in C locale.
 646 *************************************************************/
 647
 648 int tolower_ascii(int c)
 649 {
 650         smb_ucs2_t uc = tolower_m(UCS2_CHAR(c));
 651         return UCS2_TO_CHAR(uc);
 652 }
 653
 654 /*************************************************************
 655  ascii only isupper - saves the need for smbd to be in C locale.
 656 *************************************************************/
 657
 658 int isupper_ascii(int c)
 659 {
 660         return isupper_m(UCS2_CHAR(c));
 661 }
 662
 663 /*************************************************************
 664  ascii only islower - saves the need for smbd to be in C locale.
 665 *************************************************************/
 666
 667 int islower_ascii(int c)
 668 {
 669         return islower_m(UCS2_CHAR(c));
 670 }