usr/src/common/smbsrv/smb_string.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright (c) 2017 by Delphix. All rights reserved.
  27  */
  28
  29 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
  30 #include <sys/types.h>
  31 #include <sys/sunddi.h>
  32 #else
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 #include <strings.h>
  37 #endif
  38 #include <sys/u8_textprep.h>
  39 #include <smbsrv/alloc.h>
  40 #include <sys/errno.h>
  41 #include <smbsrv/string.h>
  42 #include <smbsrv/cp_usascii.h>
  43 #include <smbsrv/cp_unicode.h>
  44
  45 #define UNICODE_N_ENTRIES       (sizeof (a_unicode) / sizeof (a_unicode[0]))
  46
  47 /*
  48  * Global pointer to the current codepage: defaults to ASCII,
  49  * and a flag indicating whether the codepage is Unicode or ASCII.
  50  */
  51 static const smb_codepage_t *current_codepage = usascii_codepage;
  52 static boolean_t is_unicode = B_FALSE;
  53
  54 static smb_codepage_t *unicode_codepage = NULL;
  55
  56 static smb_codepage_t *smb_unicode_init(void);
  57
  58 /*
  59  * strsubst
  60  *
  61  * Scan a string replacing all occurrences of orgchar with newchar.
  62  * Returns a pointer to s, or null of s is null.
  63  */
  64 char *
  65 strsubst(char *s, char orgchar, char newchar)
  66 {
  67         char *p = s;
  68
  69         if (p == 0)
  70                 return (0);
  71
  72         while (*p) {
  73                 if (*p == orgchar)
  74                         *p = newchar;
  75                 ++p;
  76         }
  77
  78         return (s);
  79 }
  80
  81 /*
  82  * strcanon
  83  *
  84  * Normalize a string by reducing all the repeated characters in
  85  * buf as defined by class. For example;
  86  *
  87  *              char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt");
  88  *              strcanon(buf, "/\\");
  89  *
  90  * Would result in buf containing the following string:
  91  *
  92  *              /d1/d2/d3\d4\f1.txt
  93  *
  94  * This function modifies the contents of buf in place and returns
  95  * a pointer to buf.
  96  */
  97 char *
  98 strcanon(char *buf, const char *class)
  99 {
 100         char *p = buf;
 101         char *q = buf;
 102         char *r;
 103
 104         while (*p) {
 105                 *q++ = *p;
 106
 107                 if ((r = strchr(class, *p)) != 0) {
 108                         while (*p == *r)
 109                                 ++p;
 110                 } else
 111                         ++p;
 112         }
 113
 114         *q = '\0';
 115         return (buf);
 116 }
 117
 118 void
 119 smb_codepage_init(void)
 120 {
 121         smb_codepage_t *cp;
 122
 123         if (is_unicode)
 124                 return;
 125
 126         if ((cp = smb_unicode_init()) != NULL) {
 127                 current_codepage = cp;
 128                 unicode_codepage = cp;
 129                 is_unicode = B_TRUE;
 130         } else {
 131                 current_codepage = usascii_codepage;
 132                 is_unicode = B_FALSE;
 133         }
 134 }
 135
 136 void
 137 smb_codepage_fini(void)
 138 {
 139         if (unicode_codepage != NULL) {
 140                 MEM_FREE("unicode", unicode_codepage);
 141                 unicode_codepage = NULL;
 142                 current_codepage = NULL;
 143         }
 144 }
 145
 146 /*
 147  * Determine whether or not a character is an uppercase character.
 148  * This function operates on the current codepage table. Returns
 149  * non-zero if the character is uppercase. Otherwise returns zero.
 150  */
 151 int
 152 smb_isupper(int c)
 153 {
 154         uint16_t mask = is_unicode ? 0xffff : 0xff;
 155
 156         return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
 157 }
 158
 159 /*
 160  * Determine whether or not a character is an lowercase character.
 161  * This function operates on the current codepage table. Returns
 162  * non-zero if the character is lowercase. Otherwise returns zero.
 163  */
 164 int
 165 smb_islower(int c)
 166 {
 167         uint16_t mask = is_unicode ? 0xffff : 0xff;
 168
 169         return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
 170 }
 171
 172 /*
 173  * Convert individual characters to their uppercase equivalent value.
 174  * If the specified character is lowercase, the uppercase value will
 175  * be returned. Otherwise the original value will be returned.
 176  */
 177 int
 178 smb_toupper(int c)
 179 {
 180         uint16_t mask = is_unicode ? 0xffff : 0xff;
 181
 182         return (current_codepage[c & mask].upper);
 183 }
 184
 185 /*
 186  * Convert individual characters to their lowercase equivalent value.
 187  * If the specified character is uppercase, the lowercase value will
 188  * be returned. Otherwise the original value will be returned.
 189  */
 190 int
 191 smb_tolower(int c)
 192 {
 193         uint16_t mask = is_unicode ? 0xffff : 0xff;
 194
 195         return (current_codepage[c & mask].lower);
 196 }
 197
 198 /*
 199  * Convert a string to uppercase using the appropriate codepage. The
 200  * string is converted in place. A pointer to the string is returned.
 201  * There is an assumption here that uppercase and lowercase values
 202  * always result encode to the same length.
 203  */
 204 char *
 205 smb_strupr(char *s)
 206 {
 207         smb_wchar_t c;
 208         char *p = s;
 209
 210         while (*p) {
 211                 if (smb_isascii(*p)) {
 212                         *p = smb_toupper(*p);
 213                         p++;
 214                 } else {
 215                         if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
 216                                 return (0);
 217
 218                         if (c == 0)
 219                                 break;
 220
 221                         c = smb_toupper(c);
 222                         p += smb_wctomb(p, c);
 223                 }
 224         }
 225
 226         return (s);
 227 }
 228
 229 /*
 230  * Convert a string to lowercase using the appropriate codepage. The
 231  * string is converted in place. A pointer to the string is returned.
 232  * There is an assumption here that uppercase and lowercase values
 233  * always result encode to the same length.
 234  */
 235 char *
 236 smb_strlwr(char *s)
 237 {
 238         smb_wchar_t c;
 239         char *p = s;
 240
 241         while (*p) {
 242                 if (smb_isascii(*p)) {
 243                         *p = smb_tolower(*p);
 244                         p++;
 245                 } else {
 246                         if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
 247                                 return (0);
 248
 249                         if (c == 0)
 250                                 break;
 251
 252                         c = smb_tolower(c);
 253                         p += smb_wctomb(p, c);
 254                 }
 255         }
 256
 257         return (s);
 258 }
 259
 260 /*
 261  * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
 262  * -1 is returned if "s" is not a valid multi-byte string.
 263  */
 264 int
 265 smb_isstrlwr(const char *s)
 266 {
 267         smb_wchar_t c;
 268         int n;
 269         const char *p = s;
 270
 271         while (*p) {
 272                 if (smb_isascii(*p) && smb_isupper(*p))
 273                         return (0);
 274                 else {
 275                         if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
 276                                 return (-1);
 277
 278                         if (c == 0)
 279                                 break;
 280
 281                         if (smb_isupper(c))
 282                                 return (0);
 283
 284                         p += n;
 285                 }
 286         }
 287
 288         return (1);
 289 }
 290
 291 /*
 292  * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
 293  * -1 is returned if "s" is not a valid multi-byte string.
 294  */
 295 int
 296 smb_isstrupr(const char *s)
 297 {
 298         smb_wchar_t c;
 299         int n;
 300         const char *p = s;
 301
 302         while (*p) {
 303                 if (smb_isascii(*p) && smb_islower(*p))
 304                         return (0);
 305                 else {
 306                         if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
 307                                 return (-1);
 308
 309                         if (c == 0)
 310                                 break;
 311
 312                         if (smb_islower(c))
 313                                 return (0);
 314
 315                         p += n;
 316                 }
 317         }
 318
 319         return (1);
 320 }
 321
 322 /*
 323  * Compare the null-terminated strings s1 and s2 and return an integer
 324  * greater than, equal to or less than 0 dependent on whether s1 is
 325  * lexicographically greater than, equal to or less than s2 after
 326  * translation of each character to lowercase.  The original strings
 327  * are not modified.
 328  *
 329  * If n is non-zero, at most n bytes are compared.  Otherwise, the strings
 330  * are compared until a null terminator is encountered.
 331  *
 332  * Out:    0 if strings are equal
 333  *       < 0 if first string < second string
 334  *       > 0 if first string > second string
 335  */
 336 int
 337 smb_strcasecmp(const char *s1, const char *s2, size_t n)
 338 {
 339         int     err = 0;
 340         int     rc;
 341
 342         rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err);
 343         if (err != 0)
 344                 return (-1);
 345         return (rc);
 346 }
 347
 348 /*
 349  * First build a codepage based on cp_unicode.h.  Then build the unicode
 350  * codepage from this interim codepage by copying the entries over while
 351  * fixing them and filling in the gaps.
 352  */
 353 static smb_codepage_t *
 354 smb_unicode_init(void)
 355 {
 356         smb_codepage_t  *unicode;
 357         uint32_t        a = 0;
 358         uint32_t        b = 0;
 359
 360         unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16);
 361         if (unicode == NULL)
 362                 return (NULL);
 363
 364         while (b != 0xffff) {
 365                 /*
 366                  * If there is a gap in the standard,
 367                  * fill in the gap with no-case entries.
 368                  */
 369                 if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) {
 370                         unicode[b].ctype = CODEPAGE_ISNONE;
 371                         unicode[b].upper = (smb_wchar_t)b;
 372                         unicode[b].lower = (smb_wchar_t)b;
 373                         b++;
 374                         continue;
 375                 }
 376
 377                 /*
 378                  * Copy the entry and fixup as required.
 379                  */
 380                 switch (a_unicode[a].ctype) {
 381                 case CODEPAGE_ISNONE:
 382                         /*
 383                          * Replace 0xffff in upper/lower fields with its val.
 384                          */
 385                         unicode[b].ctype = CODEPAGE_ISNONE;
 386                         unicode[b].upper = (smb_wchar_t)b;
 387                         unicode[b].lower = (smb_wchar_t)b;
 388                         break;
 389                 case CODEPAGE_ISUPPER:
 390                         /*
 391                          * Some characters may have case yet not have
 392                          * case conversion.  Treat them as no-case.
 393                          */
 394                         if (a_unicode[a].lower == 0xffff) {
 395                                 unicode[b].ctype = CODEPAGE_ISNONE;
 396                                 unicode[b].upper = (smb_wchar_t)b;
 397                                 unicode[b].lower = (smb_wchar_t)b;
 398                         } else {
 399                                 unicode[b].ctype = CODEPAGE_ISUPPER;
 400                                 unicode[b].upper = (smb_wchar_t)b;
 401                                 unicode[b].lower = a_unicode[a].lower;
 402                         }
 403                         break;
 404                 case CODEPAGE_ISLOWER:
 405                         /*
 406                          * Some characters may have case yet not have
 407                          * case conversion.  Treat them as no-case.
 408                          */
 409                         if (a_unicode[a].upper == 0xffff) {
 410                                 unicode[b].ctype = CODEPAGE_ISNONE;
 411                                 unicode[b].upper = (smb_wchar_t)b;
 412                                 unicode[b].lower = (smb_wchar_t)b;
 413                         } else {
 414                                 unicode[b].ctype = CODEPAGE_ISLOWER;
 415                                 unicode[b].upper = a_unicode[a].upper;
 416                                 unicode[b].lower = (smb_wchar_t)b;
 417                         }
 418                         break;
 419                 default:
 420                         MEM_FREE("unicode", unicode);
 421                         return (NULL);
 422                 }
 423
 424                 a++;
 425                 b++;
 426         };
 427
 428         return (unicode);
 429 }
 430
 431 /*
 432  * Parse a UNC path (\\server\share\path) into its components.
 433  * Although a standard UNC path starts with two '\', in DFS
 434  * all UNC paths start with one '\'. So, this function only
 435  * checks for one.
 436  *
 437  * A valid UNC must at least contain two components i.e. server
 438  * and share. The path is parsed to:
 439  *
 440  * unc_server   server or domain name with no leading/trailing '\'
 441  * unc_share    share name with no leading/trailing '\'
 442  * unc_path     relative path to the share with no leading/trailing '\'
 443  *              it is valid for unc_path to be NULL.
 444  *
 445  * Upon successful return of this function, smb_unc_free()
 446  * MUST be called when returned 'unc' is no longer needed.
 447  *
 448  * Returns 0 on success, otherwise returns an errno code.
 449  */
 450 int
 451 smb_unc_init(const char *path, smb_unc_t *unc)
 452 {
 453         char *p;
 454
 455         if (path == NULL || unc == NULL || (*path != '\\' && *path != '/'))
 456                 return (EINVAL);
 457
 458         bzero(unc, sizeof (smb_unc_t));
 459
 460 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
 461         unc->unc_buf = smb_mem_strdup(path);
 462 #else
 463         if ((unc->unc_buf = strdup(path)) == NULL)
 464                 return (ENOMEM);
 465 #endif
 466
 467         (void) strsubst(unc->unc_buf, '\\', '/');
 468         (void) strcanon(unc->unc_buf, "/");
 469
 470         unc->unc_server = unc->unc_buf + 1;
 471         if (*unc->unc_server == '\0') {
 472                 smb_unc_free(unc);
 473                 return (EINVAL);
 474         }
 475
 476         if ((p = strchr(unc->unc_server, '/')) == NULL) {
 477                 smb_unc_free(unc);
 478                 return (EINVAL);
 479         }
 480
 481         *p++ = '\0';
 482         unc->unc_share = p;
 483
 484         if (*unc->unc_share == '\0') {
 485                 smb_unc_free(unc);
 486                 return (EINVAL);
 487         }
 488
 489         unc->unc_path = strchr(unc->unc_share, '/');
 490         if ((p = unc->unc_path) == NULL)
 491                 return (0);
 492
 493         unc->unc_path++;
 494         *p = '\0';
 495
 496         /* remove the last '/' if any */
 497         if ((p = strchr(unc->unc_path, '\0')) != NULL) {
 498                 if (*(--p) == '/')
 499                         *p = '\0';
 500         }
 501
 502         return (0);
 503 }
 504
 505 void
 506 smb_unc_free(smb_unc_t *unc)
 507 {
 508         if (unc == NULL)
 509                 return;
 510
 511 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
 512         smb_mem_free(unc->unc_buf);
 513 #else
 514         free(unc->unc_buf);
 515 #endif
 516         unc->unc_buf = NULL;
 517 }