Do not use the file system GET_REAL_FILENAME for mangled names
[Samba.git] / source3 / smbd / mangle_hash2.c
blob3a3939c51145d47c77bad555af27dd8d5ed43740
1 /*
2 Unix SMB/CIFS implementation.
3 new hash based name mangling implementation
4 Copyright (C) Andrew Tridgell 2002
5 Copyright (C) Simo Sorce 2002
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 this mangling scheme uses the following format
24 Annnn~n.AAA
26 where nnnnn is a base 36 hash, and A represents characters from the original string
28 The hash is taken of the leading part of the long filename, in uppercase
30 for simplicity, we only allow ascii characters in 8.3 names
33 /* hash alghorithm changed to FNV1 by idra@samba.org (Simo Sorce).
34 * see http://www.isthe.com/chongo/tech/comp/fnv/index.html for a
35 * discussion on Fowler / Noll / Vo (FNV) Hash by one of it's authors
39 ===============================================================================
40 NOTE NOTE NOTE!!!
42 This file deliberately uses non-multibyte string functions in many places. This
43 is *not* a mistake. This code is multi-byte safe, but it gets this property
44 through some very subtle knowledge of the way multi-byte strings are encoded
45 and the fact that this mangling algorithm only supports ascii characters in
46 8.3 names.
48 please don't convert this file to use the *_m() functions!!
49 ===============================================================================
53 #include "includes.h"
54 #include "smbd/globals.h"
56 #if 1
57 #define M_DEBUG(level, x) DEBUG(level, x)
58 #else
59 #define M_DEBUG(level, x)
60 #endif
62 /* these flags are used to mark characters in as having particular
63 properties */
64 #define FLAG_BASECHAR 1
65 #define FLAG_ASCII 2
66 #define FLAG_ILLEGAL 4
67 #define FLAG_WILDCARD 8
69 /* the "possible" flags are used as a fast way to find possible DOS
70 reserved filenames */
71 #define FLAG_POSSIBLE1 16
72 #define FLAG_POSSIBLE2 32
73 #define FLAG_POSSIBLE3 64
74 #define FLAG_POSSIBLE4 128
76 /* by default have a max of 4096 entries in the cache. */
77 #ifndef MANGLE_CACHE_SIZE
78 #define MANGLE_CACHE_SIZE 4096
79 #endif
81 #define FNV1_PRIME 0x01000193
82 /*the following number is a fnv1 of the string: idra@samba.org 2002 */
83 #define FNV1_INIT 0xa6b93095
85 #define FLAG_CHECK(c, flag) (char_flags[(unsigned char)(c)] & (flag))
87 /* these are the characters we use in the 8.3 hash. Must be 36 chars long */
88 static const char basechars[36] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
89 #define base_forward(v) basechars[v]
91 /* the list of reserved dos names - all of these are illegal */
92 static const char * const reserved_names[] =
93 { "AUX", "LOCK$", "CON", "COM1", "COM2", "COM3", "COM4",
94 "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
96 /*
97 hash a string of the specified length. The string does not need to be
98 null terminated
100 this hash needs to be fast with a low collision rate (what hash doesn't?)
102 static unsigned int mangle_hash(const char *key, unsigned int length)
104 unsigned int value;
105 unsigned int i;
106 fstring str;
108 /* we have to uppercase here to ensure that the mangled name
109 doesn't depend on the case of the long name. Note that this
110 is the only place where we need to use a multi-byte string
111 function */
112 length = MIN(length,sizeof(fstring)-1);
113 strncpy(str, key, length);
114 str[length] = 0;
115 strupper_m(str);
117 /* the length of a multi-byte string can change after a strupper_m */
118 length = strlen(str);
120 /* Set the initial value from the key size. */
121 for (value = FNV1_INIT, i=0; i < length; i++) {
122 value *= (unsigned int)FNV1_PRIME;
123 value ^= (unsigned int)(str[i]);
126 /* note that we force it to a 31 bit hash, to keep within the limits
127 of the 36^6 mangle space */
128 return value & ~0x80000000;
132 insert an entry into the prefix cache. The string might not be null
133 terminated */
134 static void cache_insert(const char *prefix, int length, unsigned int hash)
136 char *str = SMB_STRNDUP(prefix, length);
138 if (str == NULL) {
139 return;
142 memcache_add(smbd_memcache(), MANGLE_HASH2_CACHE,
143 data_blob_const(&hash, sizeof(hash)),
144 data_blob_const(str, length+1));
145 SAFE_FREE(str);
149 lookup an entry in the prefix cache. Return NULL if not found.
151 static char *cache_lookup(TALLOC_CTX *mem_ctx, unsigned int hash)
153 DATA_BLOB value;
155 if (!memcache_lookup(smbd_memcache(), MANGLE_HASH2_CACHE,
156 data_blob_const(&hash, sizeof(hash)), &value)) {
157 return NULL;
160 SMB_ASSERT((value.length > 0)
161 && (value.data[value.length-1] == '\0'));
163 return talloc_strdup(mem_ctx, (char *)value.data);
168 determine if a string is possibly in a mangled format, ignoring
169 case
171 In this algorithm, mangled names use only pure ascii characters (no
172 multi-byte) so we can avoid doing a UCS2 conversion
174 static bool is_mangled_component(const char *name, size_t len)
176 unsigned int i;
178 M_DEBUG(10,("is_mangled_component %s (len %lu) ?\n", name, (unsigned long)len));
180 /* check the length */
181 if (len > 12 || len < 8)
182 return False;
184 /* the best distinguishing characteristic is the ~ */
185 if (name[6] != '~')
186 return False;
188 /* check extension */
189 if (len > 8) {
190 if (name[8] != '.')
191 return False;
192 for (i=9; name[i] && i < len; i++) {
193 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
194 return False;
199 /* check lead characters */
200 for (i=0;i<mangle_prefix;i++) {
201 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
202 return False;
206 /* check rest of hash */
207 if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
208 return False;
210 for (i=mangle_prefix;i<6;i++) {
211 if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
212 return False;
216 M_DEBUG(10,("is_mangled_component %s (len %lu) -> yes\n", name, (unsigned long)len));
218 return True;
224 determine if a string is possibly in a mangled format, ignoring
225 case
227 In this algorithm, mangled names use only pure ascii characters (no
228 multi-byte) so we can avoid doing a UCS2 conversion
230 NOTE! This interface must be able to handle a path with unix
231 directory separators. It should return true if any component is
232 mangled
234 static bool is_mangled(const char *name, const struct share_params *parm)
236 const char *p;
237 const char *s;
239 M_DEBUG(10,("is_mangled %s ?\n", name));
241 for (s=name; (p=strchr(s, '/')); s=p+1) {
242 if (is_mangled_component(s, PTR_DIFF(p, s))) {
243 return True;
247 /* and the last part ... */
248 return is_mangled_component(s,strlen(s));
253 see if a filename is an allowable 8.3 name.
255 we are only going to allow ascii characters in 8.3 names, as this
256 simplifies things greatly (it means that we know the string won't
257 get larger when converted from UNIX to DOS formats)
259 static bool is_8_3(const char *name, bool check_case, bool allow_wildcards, const struct share_params *p)
261 int len, i;
262 char *dot_p;
264 /* as a special case, the names '.' and '..' are allowable 8.3 names */
265 if (name[0] == '.') {
266 if (!name[1] || (name[1] == '.' && !name[2])) {
267 return True;
271 /* the simplest test is on the overall length of the
272 filename. Note that we deliberately use the ascii string
273 length (not the multi-byte one) as it is faster, and gives us
274 the result we need in this case. Using strlen_m would not
275 only be slower, it would be incorrect */
276 len = strlen(name);
277 if (len > 12)
278 return False;
280 /* find the '.'. Note that once again we use the non-multibyte
281 function */
282 dot_p = strchr(name, '.');
284 if (!dot_p) {
285 /* if the name doesn't contain a '.' then its length
286 must be less than 8 */
287 if (len > 8) {
288 return False;
290 } else {
291 int prefix_len, suffix_len;
293 /* if it does contain a dot then the prefix must be <=
294 8 and the suffix <= 3 in length */
295 prefix_len = PTR_DIFF(dot_p, name);
296 suffix_len = len - (prefix_len+1);
298 if (prefix_len > 8 || suffix_len > 3 || suffix_len == 0) {
299 return False;
302 /* a 8.3 name cannot contain more than 1 '.' */
303 if (strchr(dot_p+1, '.')) {
304 return False;
308 /* the length are all OK. Now check to see if the characters themselves are OK */
309 for (i=0; name[i]; i++) {
310 /* note that we may allow wildcard petterns! */
311 if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) && name[i] != '.') {
312 return False;
316 /* it is a good 8.3 name */
317 return True;
322 reset the mangling cache on a smb.conf reload. This only really makes sense for
323 mangling backends that have parameters in smb.conf, and as this backend doesn't
324 this is a NULL operation
326 static void mangle_reset(void)
328 /* noop */
333 try to find a 8.3 name in the cache, and if found then
334 replace the string with the original long name.
336 static bool lookup_name_from_8_3(TALLOC_CTX *ctx,
337 const char *name,
338 char **pp_out, /* talloced on the given context. */
339 const struct share_params *p)
341 unsigned int hash, multiplier;
342 unsigned int i;
343 char *prefix;
344 char extension[4];
346 *pp_out = NULL;
348 /* make sure that this is a mangled name from this cache */
349 if (!is_mangled(name, p)) {
350 M_DEBUG(10,("lookup_name_from_8_3: %s -> not mangled\n", name));
351 return False;
354 /* we need to extract the hash from the 8.3 name */
355 hash = base_reverse[(unsigned char)name[7]];
356 for (multiplier=36, i=5;i>=mangle_prefix;i--) {
357 unsigned int v = base_reverse[(unsigned char)name[i]];
358 hash += multiplier * v;
359 multiplier *= 36;
362 /* now look in the prefix cache for that hash */
363 prefix = cache_lookup(ctx, hash);
364 if (!prefix) {
365 M_DEBUG(10,("lookup_name_from_8_3: %s -> %08X -> not found\n",
366 name, hash));
367 return False;
370 /* we found it - construct the full name */
371 if (name[8] == '.') {
372 strncpy(extension, name+9, 3);
373 extension[3] = 0;
374 } else {
375 extension[0] = 0;
378 if (extension[0]) {
379 M_DEBUG(10,("lookup_name_from_8_3: %s -> %s.%s\n",
380 name, prefix, extension));
381 *pp_out = talloc_asprintf(ctx, "%s.%s", prefix, extension);
382 } else {
383 M_DEBUG(10,("lookup_name_from_8_3: %s -> %s\n", name, prefix));
384 *pp_out = talloc_strdup(ctx, prefix);
387 TALLOC_FREE(prefix);
389 if (!*pp_out) {
390 M_DEBUG(0,("talloc_fail"));
391 return False;
394 return True;
398 look for a DOS reserved name
400 static bool is_reserved_name(const char *name)
402 if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) &&
403 FLAG_CHECK(name[1], FLAG_POSSIBLE2) &&
404 FLAG_CHECK(name[2], FLAG_POSSIBLE3) &&
405 FLAG_CHECK(name[3], FLAG_POSSIBLE4)) {
406 /* a likely match, scan the lot */
407 int i;
408 for (i=0; reserved_names[i]; i++) {
409 int len = strlen(reserved_names[i]);
410 /* note that we match on COM1 as well as COM1.foo */
411 if (strnequal(name, reserved_names[i], len) &&
412 (name[len] == '.' || name[len] == 0)) {
413 return True;
418 return False;
422 See if a filename is a legal long filename.
423 A filename ending in a '.' is not legal unless it's "." or "..". JRA.
424 A filename ending in ' ' is not legal either. See bug id #2769.
427 static bool is_legal_name(const char *name)
429 const char *dot_pos = NULL;
430 bool alldots = True;
431 size_t numdots = 0;
433 while (*name) {
434 if (((unsigned int)name[0]) > 128 && (name[1] != 0)) {
435 /* Possible start of mb character. */
436 char mbc[2];
438 * Note that if CH_UNIX is utf8 a string may be 3
439 * bytes, but this is ok as mb utf8 characters don't
440 * contain embedded ascii bytes. We are really checking
441 * for mb UNIX asian characters like Japanese (SJIS) here.
442 * JRA.
444 if (convert_string(CH_UNIX, CH_UTF16LE, name, 2, mbc, 2, False) == 2) {
445 /* Was a good mb string. */
446 name += 2;
447 continue;
451 if (FLAG_CHECK(name[0], FLAG_ILLEGAL)) {
452 return False;
454 if (name[0] == '.') {
455 dot_pos = name;
456 numdots++;
457 } else {
458 alldots = False;
460 if ((name[0] == ' ') && (name[1] == '\0')) {
461 /* Can't end in ' ' */
462 return False;
464 name++;
467 if (dot_pos) {
468 if (alldots && (numdots == 1 || numdots == 2))
469 return True; /* . or .. is a valid name */
471 /* A valid long name cannot end in '.' */
472 if (dot_pos[1] == '\0')
473 return False;
475 return True;
478 static bool must_mangle(const char *name,
479 const struct share_params *p)
481 if (is_reserved_name(name)) {
482 return True;
484 return !is_legal_name(name);
488 the main forward mapping function, which converts a long filename to
489 a 8.3 name
491 if cache83 is not set then we don't cache the result
494 static bool hash2_name_to_8_3(const char *name,
495 char new_name[13],
496 bool cache83,
497 int default_case,
498 const struct share_params *p)
500 char *dot_p;
501 char lead_chars[7];
502 char extension[4];
503 unsigned int extension_length, i;
504 unsigned int prefix_len;
505 unsigned int hash, v;
507 /* reserved names are handled specially */
508 if (!is_reserved_name(name)) {
509 /* if the name is already a valid 8.3 name then we don't need to
510 * change anything */
511 if (is_legal_name(name) && is_8_3(name, False, False, p)) {
512 safe_strcpy(new_name, name, 12);
513 return True;
517 /* find the '.' if any */
518 dot_p = strrchr(name, '.');
520 if (dot_p) {
521 /* if the extension contains any illegal characters or
522 is too long or zero length then we treat it as part
523 of the prefix */
524 for (i=0; i<4 && dot_p[i+1]; i++) {
525 if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
526 dot_p = NULL;
527 break;
530 if (i == 0 || i == 4) {
531 dot_p = NULL;
535 /* the leading characters in the mangled name is taken from
536 the first characters of the name, if they are ascii otherwise
537 '_' is used
539 for (i=0;i<mangle_prefix && name[i];i++) {
540 lead_chars[i] = name[i];
541 if (! FLAG_CHECK(lead_chars[i], FLAG_ASCII)) {
542 lead_chars[i] = '_';
544 lead_chars[i] = toupper_ascii(lead_chars[i]);
546 for (;i<mangle_prefix;i++) {
547 lead_chars[i] = '_';
550 /* the prefix is anything up to the first dot */
551 if (dot_p) {
552 prefix_len = PTR_DIFF(dot_p, name);
553 } else {
554 prefix_len = strlen(name);
557 /* the extension of the mangled name is taken from the first 3
558 ascii chars after the dot */
559 extension_length = 0;
560 if (dot_p) {
561 for (i=1; extension_length < 3 && dot_p[i]; i++) {
562 char c = dot_p[i];
563 if (FLAG_CHECK(c, FLAG_ASCII)) {
564 extension[extension_length++] =
565 toupper_ascii(c);
570 /* find the hash for this prefix */
571 v = hash = mangle_hash(name, prefix_len);
573 /* now form the mangled name. */
574 for (i=0;i<mangle_prefix;i++) {
575 new_name[i] = lead_chars[i];
577 new_name[7] = base_forward(v % 36);
578 new_name[6] = '~';
579 for (i=5; i>=mangle_prefix; i--) {
580 v = v / 36;
581 new_name[i] = base_forward(v % 36);
584 /* add the extension */
585 if (extension_length) {
586 new_name[8] = '.';
587 memcpy(&new_name[9], extension, extension_length);
588 new_name[9+extension_length] = 0;
589 } else {
590 new_name[8] = 0;
593 if (cache83) {
594 /* put it in the cache */
595 cache_insert(name, prefix_len, hash);
598 M_DEBUG(10,("hash2_name_to_8_3: %s -> %08X -> %s (cache=%d)\n",
599 name, hash, new_name, cache83));
601 return True;
604 /* initialise the flags table
606 we allow only a very restricted set of characters as 'ascii' in this
607 mangling backend. This isn't a significant problem as modern clients
608 use the 'long' filenames anyway, and those don't have these
609 restrictions.
611 static void init_tables(void)
613 int i;
615 memset(char_flags, 0, sizeof(char_flags));
617 for (i=1;i<128;i++) {
618 if (i <= 0x1f) {
619 /* Control characters. */
620 char_flags[i] |= FLAG_ILLEGAL;
623 if ((i >= '0' && i <= '9') ||
624 (i >= 'a' && i <= 'z') ||
625 (i >= 'A' && i <= 'Z')) {
626 char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
628 if (strchr("_-$~", i)) {
629 char_flags[i] |= FLAG_ASCII;
632 if (strchr("*\\/?<>|\":", i)) {
633 char_flags[i] |= FLAG_ILLEGAL;
636 if (strchr("*?\"<>", i)) {
637 char_flags[i] |= FLAG_WILDCARD;
641 memset(base_reverse, 0, sizeof(base_reverse));
642 for (i=0;i<36;i++) {
643 base_reverse[(unsigned char)base_forward(i)] = i;
646 /* fill in the reserved names flags. These are used as a very
647 fast filter for finding possible DOS reserved filenames */
648 for (i=0; reserved_names[i]; i++) {
649 unsigned char c1, c2, c3, c4;
651 c1 = (unsigned char)reserved_names[i][0];
652 c2 = (unsigned char)reserved_names[i][1];
653 c3 = (unsigned char)reserved_names[i][2];
654 c4 = (unsigned char)reserved_names[i][3];
656 char_flags[c1] |= FLAG_POSSIBLE1;
657 char_flags[c2] |= FLAG_POSSIBLE2;
658 char_flags[c3] |= FLAG_POSSIBLE3;
659 char_flags[c4] |= FLAG_POSSIBLE4;
660 char_flags[tolower_ascii(c1)] |= FLAG_POSSIBLE1;
661 char_flags[tolower_ascii(c2)] |= FLAG_POSSIBLE2;
662 char_flags[tolower_ascii(c3)] |= FLAG_POSSIBLE3;
663 char_flags[tolower_ascii(c4)] |= FLAG_POSSIBLE4;
665 char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
670 the following provides the abstraction layer to make it easier
671 to drop in an alternative mangling implementation */
672 static const struct mangle_fns mangle_hash2_fns = {
673 mangle_reset,
674 is_mangled,
675 must_mangle,
676 is_8_3,
677 lookup_name_from_8_3,
678 hash2_name_to_8_3
681 /* return the methods for this mangling implementation */
682 const struct mangle_fns *mangle_hash2_init(void)
684 /* the mangle prefix can only be in the mange 1 to 6 */
685 mangle_prefix = lp_mangle_prefix();
686 if (mangle_prefix > 6) {
687 mangle_prefix = 6;
689 if (mangle_prefix < 1) {
690 mangle_prefix = 1;
693 init_tables();
694 mangle_reset();
696 return &mangle_hash2_fns;
699 static void posix_mangle_reset(void)
702 static bool posix_is_mangled(const char *s, const struct share_params *p)
704 return False;
707 static bool posix_must_mangle(const char *s, const struct share_params *p)
709 return False;
712 static bool posix_is_8_3(const char *fname,
713 bool check_case,
714 bool allow_wildcards,
715 const struct share_params *p)
717 return False;
720 static bool posix_lookup_name_from_8_3(TALLOC_CTX *ctx,
721 const char *in,
722 char **out, /* talloced on the given context. */
723 const struct share_params *p)
725 return False;
728 static bool posix_name_to_8_3(const char *in,
729 char out[13],
730 bool cache83,
731 int default_case,
732 const struct share_params *p)
734 memset(out, '\0', 13);
735 return True;
738 /* POSIX paths backend - no mangle. */
739 static const struct mangle_fns posix_mangle_fns = {
740 posix_mangle_reset,
741 posix_is_mangled,
742 posix_must_mangle,
743 posix_is_8_3,
744 posix_lookup_name_from_8_3,
745 posix_name_to_8_3
748 const struct mangle_fns *posix_mangle_init(void)
750 return &posix_mangle_fns;