source3/lib/util_unistr.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4    Copyright (C) Andrew Tridgell 1992-2001
   5    Copyright (C) Simo Sorce 2001
   6    Copyright (C) Jeremy Allison 2005
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20 */
  21
  22 #include "includes.h"
  23
  24 #ifndef MAXUNI
  25 #define MAXUNI 1024
  26 #endif
  27
  28 /* these 3 tables define the unicode case handling.  They are loaded
  29    at startup either via mmap() or read() from the lib directory */
  30 static uint8 *valid_table;
  31 static bool initialized;
  32
  33 /* Copy into a smb_ucs2_t from a possibly unaligned buffer. Return the copied smb_ucs2_t */
  34 #define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\
  35                                 ((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest))
  36
  37
  38 /* return an ascii version of a ucs2 character */
  39 #define UCS2_TO_CHAR(c) (((c) >> UCS2_SHIFT) & 0xff)
  40
  41
  42 /**
  43  * Destroy global objects allocated by load_case_tables()
  44  **/
  45 void gfree_case_tables(void)
  46 {
  47         if ( valid_table ) {
  48                 unmap_file(valid_table, 0x10000);
  49                 valid_table = NULL;
  50         }
  51         initialized = false;
  52 }
  53
  54 /**
  55  * Load the valid character map table from <tt>valid.dat</tt> or
  56  * create from the configured codepage.
  57  *
  58  * This function is called whenever the configuration is reloaded.
  59  * However, the valid character table is not changed if it's loaded
  60  * from a file, because we can't unmap files.
  61  **/
  62
  63 static void init_valid_table(void)
  64 {
  65         if (valid_table) {
  66                 return;
  67         }
  68
  69         valid_table = (uint8 *)map_file(data_path("valid.dat"), 0x10000);
  70         if (!valid_table) {
  71                 smb_panic("Could not load valid.dat file required for mangle method=hash");
  72                 return;
  73         }
  74 }
  75
  76 /*******************************************************************
  77  Write a string in (little-endian) unicode format. src is in
  78  the current DOS codepage. len is the length in bytes of the
  79  string pointed to by dst.
  80
  81  if null_terminate is True then null terminate the packet (adds 2 bytes)
  82
  83  the return value is the length in bytes consumed by the string, including the
  84  null termination if applied
  85 ********************************************************************/
  86
  87 size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate)
  88 {
  89         int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE
  90                                    : STR_UNICODE|STR_NOALIGN;
  91         return push_ucs2(NULL, dst, src, len, flags);
  92 }
  93
  94
  95 /*******************************************************************
  96  Skip past a unicode string, but not more than len. Always move
  97  past a terminating zero if found.
  98 ********************************************************************/
  99
 100 char *skip_unibuf(char *src, size_t len)
 101 {
 102         char *srcend = src + len;
 103
 104         while (src < srcend && SVAL(src,0)) {
 105                 src += 2;
 106         }
 107
 108         if(!SVAL(src,0)) {
 109                 src += 2;
 110         }
 111
 112         return src;
 113 }
 114
 115 /* Converts a string from internal samba format to unicode
 116  */
 117
 118 int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags)
 119 {
 120         return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
 121 }
 122
 123 /* Converts a string from internal samba format to unicode. Always terminates.
 124  * Actually just a wrapper round push_ucs2_talloc().
 125  */
 126
 127 int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
 128 {
 129         size_t size;
 130         if (push_ucs2_talloc(ctx, dest, src, &size))
 131                 return size;
 132         else
 133                 return -1;
 134 }
 135
 136 /*******************************************************************
 137  Determine if a character is valid in a 8.3 name.
 138 ********************************************************************/
 139
 140 bool isvalid83_w(smb_ucs2_t c)
 141 {
 142         init_valid_table();
 143         return valid_table[SVAL(&c,0)] != 0;
 144 }
 145
 146 /*******************************************************************
 147  Count the number of two-byte pairs in a UTF16 string.
 148 ********************************************************************/
 149
 150 size_t strlen_w(const smb_ucs2_t *src)
 151 {
 152         size_t len;
 153         smb_ucs2_t c;
 154
 155         for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
 156                 ;
 157         }
 158
 159         return len;
 160 }
 161
 162 /*******************************************************************
 163  Count up to max number of characters in a smb_ucs2_t string.
 164 ********************************************************************/
 165
 166 size_t strnlen_w(const smb_ucs2_t *src, size_t max)
 167 {
 168         size_t len;
 169         smb_ucs2_t c;
 170
 171         for(len = 0; (len < max) && *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
 172                 ;
 173         }
 174
 175         return len;
 176 }
 177
 178 /*******************************************************************
 179  Wide strchr().
 180 ********************************************************************/
 181
 182 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
 183 {
 184         smb_ucs2_t cp;
 185         while (*(COPY_UCS2_CHAR(&cp,s))) {
 186                 if (c == cp) {
 187                         return (smb_ucs2_t *)s;
 188                 }
 189                 s++;
 190         }
 191         if (c == cp) {
 192                 return (smb_ucs2_t *)s;
 193         }
 194
 195         return NULL;
 196 }
 197
 198 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
 199 {
 200         return strchr_w(s, UCS2_CHAR(c));
 201 }
 202
 203 /*******************************************************************
 204  Wide strrchr().
 205 ********************************************************************/
 206
 207 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
 208 {
 209         smb_ucs2_t cp;
 210         const smb_ucs2_t *p = s;
 211         int len = strlen_w(s);
 212
 213         if (len == 0) {
 214                 return NULL;
 215         }
 216         p += (len - 1);
 217         do {
 218                 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
 219                         return (smb_ucs2_t *)p;
 220                 }
 221         } while (p-- != s);
 222         return NULL;
 223 }
 224
 225 /*******************************************************************
 226  Wide version of strrchr that returns after doing strrchr 'n' times.
 227 ********************************************************************/
 228
 229 smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n)
 230 {
 231         smb_ucs2_t cp;
 232         const smb_ucs2_t *p = s;
 233         int len = strlen_w(s);
 234
 235         if (len == 0 || !n) {
 236                 return NULL;
 237         }
 238         p += (len - 1);
 239         do {
 240                 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
 241                         n--;
 242                 }
 243
 244                 if (!n) {
 245                         return (smb_ucs2_t *)p;
 246                 }
 247         } while (p-- != s);
 248         return NULL;
 249 }
 250
 251 /*******************************************************************
 252  Wide strstr().
 253 ********************************************************************/
 254
 255 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
 256 {
 257         smb_ucs2_t *r;
 258         size_t inslen;
 259
 260         if (!s || !*s || !ins || !*ins) {
 261                 return NULL;
 262         }
 263
 264         inslen = strlen_w(ins);
 265         r = (smb_ucs2_t *)s;
 266
 267         while ((r = strchr_w(r, *ins))) {
 268                 if (strncmp_w(r, ins, inslen) == 0) {
 269                         return r;
 270                 }
 271                 r++;
 272         }
 273
 274         return NULL;
 275 }
 276
 277 /*******************************************************************
 278  Convert a string to lower case.
 279  return True if any char is converted
 280
 281  This is unsafe for any string involving a UTF16 character
 282 ********************************************************************/
 283
 284 bool strlower_w(smb_ucs2_t *s)
 285 {
 286         smb_ucs2_t cp;
 287         bool ret = False;
 288
 289         while (*(COPY_UCS2_CHAR(&cp,s))) {
 290                 smb_ucs2_t v = tolower_m(cp);
 291                 if (v != cp) {
 292                         COPY_UCS2_CHAR(s,&v);
 293                         ret = True;
 294                 }
 295                 s++;
 296         }
 297         return ret;
 298 }
 299
 300 /*******************************************************************
 301  Convert a string to upper case.
 302  return True if any char is converted
 303
 304  This is unsafe for any string involving a UTF16 character
 305 ********************************************************************/
 306
 307 bool strupper_w(smb_ucs2_t *s)
 308 {
 309         smb_ucs2_t cp;
 310         bool ret = False;
 311         while (*(COPY_UCS2_CHAR(&cp,s))) {
 312                 smb_ucs2_t v = toupper_m(cp);
 313                 if (v != cp) {
 314                         COPY_UCS2_CHAR(s,&v);
 315                         ret = True;
 316                 }
 317                 s++;
 318         }
 319         return ret;
 320 }
 321
 322 /*******************************************************************
 323  Convert a string to "normal" form.
 324 ********************************************************************/
 325
 326 void strnorm_w(smb_ucs2_t *s, int case_default)
 327 {
 328         if (case_default == CASE_UPPER) {
 329                 strupper_w(s);
 330         } else {
 331                 strlower_w(s);
 332         }
 333 }
 334
 335 int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
 336 {
 337         smb_ucs2_t cpa, cpb;
 338
 339         while ((*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
 340                 a++;
 341                 b++;
 342         }
 343         return (*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b)));
 344         /* warning: if *a != *b and both are not 0 we return a random
 345                 greater or lesser than 0 number not realted to which
 346                 string is longer */
 347 }
 348
 349 int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
 350 {
 351         smb_ucs2_t cpa, cpb;
 352         size_t n = 0;
 353
 354         while ((n < len) && (*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
 355                 a++;
 356                 b++;
 357                 n++;
 358         }
 359         return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0;
 360 }
 361
 362 /*******************************************************************
 363  Case insensitive string comparison.
 364 ********************************************************************/
 365
 366 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
 367 {
 368         smb_ucs2_t cpa, cpb;
 369
 370         while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) {
 371                 a++;
 372                 b++;
 373         }
 374         return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b))));
 375 }
 376
 377 /*******************************************************************
 378  Case insensitive string comparison, length limited.
 379 ********************************************************************/
 380
 381 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
 382 {
 383         smb_ucs2_t cpa, cpb;
 384         size_t n = 0;
 385
 386         while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) {
 387                 a++;
 388                 b++;
 389                 n++;
 390         }
 391         return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0;
 392 }
 393
 394 /*******************************************************************
 395  Duplicate string.
 396 ********************************************************************/
 397
 398 smb_ucs2_t *strdup_w(const smb_ucs2_t *src)
 399 {
 400         smb_ucs2_t *dest;
 401         size_t len = strlen_w(src);
 402         dest = SMB_MALLOC_ARRAY(smb_ucs2_t, len + 1);
 403         if (!dest) {
 404                 DEBUG(0,("strdup_w: out of memory!\n"));
 405                 return NULL;
 406         }
 407
 408         memcpy(dest, src, len * sizeof(smb_ucs2_t));
 409         dest[len] = 0;
 410         return dest;
 411 }
 412 /*
 413   The *_wa() functions take a combination of 7 bit ascii
 414   and wide characters They are used so that you can use string
 415   functions combining C string constants with ucs2 strings
 416
 417   The char* arguments must NOT be multibyte - to be completely sure
 418   of this only pass string constants */
 419
 420 int strcmp_wa(const smb_ucs2_t *a, const char *b)
 421 {
 422         smb_ucs2_t cp = 0;
 423
 424         while (*b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
 425                 a++;
 426                 b++;
 427         }
 428         return (*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b));
 429 }
 430
 431 /*************************************************************
 432  ascii only toupper - saves the need for smbd to be in C locale.
 433 *************************************************************/
 434
 435 int toupper_ascii(int c)
 436 {
 437         smb_ucs2_t uc = toupper_m(UCS2_CHAR(c));
 438         return UCS2_TO_CHAR(uc);
 439 }
 440
 441 /*************************************************************
 442  ascii only tolower - saves the need for smbd to be in C locale.
 443 *************************************************************/
 444
 445 int tolower_ascii(int c)
 446 {
 447         smb_ucs2_t uc = tolower_m(UCS2_CHAR(c));
 448         return UCS2_TO_CHAR(uc);
 449 }
 450
 451 /*************************************************************
 452  ascii only isupper - saves the need for smbd to be in C locale.
 453 *************************************************************/
 454
 455 int isupper_ascii(int c)
 456 {
 457         return isupper_m(UCS2_CHAR(c));
 458 }
 459
 460 /*************************************************************
 461  ascii only islower - saves the need for smbd to be in C locale.
 462 *************************************************************/
 463
 464 int islower_ascii(int c)
 465 {
 466         return islower_m(UCS2_CHAR(c));
 467 }