make man
[Samba.git] / source / smbd / mangle_hash2.c
blob447118a6b1a2e78c4cbd32e1789989018ef575c0
1 /*
2 Unix SMB/CIFS implementation.
3 new hash based name mangling implementation
4 Copyright (C) Andrew Tridgell 2002
5 Copyright (C) Simo Sorce 2002
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 this mangling scheme uses the following format
25 Annnn~n.AAA
27 where nnnnn is a base 36 hash, and A represents characters from the original string
29 The hash is taken of the leading part of the long filename, in uppercase
31 for simplicity, we only allow ascii characters in 8.3 names
34 /* hash alghorithm changed to FNV1 by idra@samba.org (Simo Sorce).
35 * see http://www.isthe.com/chongo/tech/comp/fnv/index.html for a
36 * discussion on Fowler / Noll / Vo (FNV) Hash by one of it's authors
40 ===============================================================================
41 NOTE NOTE NOTE!!!
43 This file deliberately uses non-multibyte string functions in many places. This
44 is *not* a mistake. This code is multi-byte safe, but it gets this property
45 through some very subtle knowledge of the way multi-byte strings are encoded
46 and the fact that this mangling algorithm only supports ascii characters in
47 8.3 names.
49 please don't convert this file to use the *_m() functions!!
50 ===============================================================================
54 #include "includes.h"
56 /* these flags are used to mark characters in as having particular
57 properties */
58 #define FLAG_BASECHAR 1
59 #define FLAG_ASCII 2
60 #define FLAG_ILLEGAL 4
61 #define FLAG_WILDCARD 8
63 /* the "possible" flags are used as a fast way to find possible DOS
64 reserved filenames */
65 #define FLAG_POSSIBLE1 16
66 #define FLAG_POSSIBLE2 32
67 #define FLAG_POSSIBLE3 64
68 #define FLAG_POSSIBLE4 128
70 /* by default have a max of 4096 entries in the cache. */
71 #ifndef MANGLE_CACHE_SIZE
72 #define MANGLE_CACHE_SIZE 4096
73 #endif
75 #define FNV1_PRIME 0x01000193
76 /*the following number is a fnv1 of the string: idra@samba.org 2002 */
77 #define FNV1_INIT 0xa6b93095
79 /* these tables are used to provide fast tests for characters */
80 static unsigned char char_flags[256];
82 #define FLAG_CHECK(c, flag) (char_flags[(unsigned char)(c)] & (flag))
84 /* we will use a very simple direct mapped prefix cache. The big
85 advantage of this cache structure is speed and low memory usage
87 The cache is indexed by the low-order bits of the hash, and confirmed by
88 hashing the resulting cache entry to match the known hash
90 static char **prefix_cache;
91 static u32 *prefix_cache_hashes;
93 /* these are the characters we use in the 8.3 hash. Must be 36 chars long */
94 static const char *basechars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
95 static unsigned char base_reverse[256];
96 #define base_forward(v) basechars[v]
98 /* the list of reserved dos names - all of these are illegal */
99 static const char *reserved_names[] =
100 { "AUX", "LOCK$", "CON", "COM1", "COM2", "COM3", "COM4",
101 "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
104 hash a string of the specified length. The string does not need to be
105 null terminated
107 this hash needs to be fast with a low collision rate (what hash doesn't?)
109 static u32 mangle_hash(const char *key, unsigned length)
111 u32 value;
112 u32 i;
113 fstring str;
115 /* we have to uppercase here to ensure that the mangled name
116 doesn't depend on the case of the long name. Note that this
117 is the only place where we need to use a multi-byte string
118 function */
119 strncpy(str, key, length);
120 str[length] = 0;
121 strupper(str);
123 /* the length of a multi-byte string can change after a strupper_m */
124 length = strlen(str);
126 /* Set the initial value from the key size. */
127 for (value = FNV1_INIT, i=0; i < length; i++) {
128 value *= (u32)FNV1_PRIME;
129 value ^= (u32)(str[i]);
132 /* note that we force it to a 31 bit hash, to keep within the limits
133 of the 36^6 mangle space */
134 return value & ~0x80000000;
138 initialise (ie. allocate) the prefix cache
140 static BOOL cache_init(void)
142 if (prefix_cache) return True;
144 prefix_cache = calloc(MANGLE_CACHE_SIZE, sizeof(char *));
145 if (!prefix_cache) return False;
147 prefix_cache_hashes = calloc(MANGLE_CACHE_SIZE, sizeof(u32));
148 if (!prefix_cache_hashes) return False;
150 return True;
154 insert an entry into the prefix cache. The string might not be null
155 terminated */
156 static void cache_insert(const char *prefix, int length, u32 hash)
158 int i = hash % MANGLE_CACHE_SIZE;
160 if (prefix_cache[i]) {
161 free(prefix_cache[i]);
164 prefix_cache[i] = strndup(prefix, length);
165 prefix_cache_hashes[i] = hash;
169 lookup an entry in the prefix cache. Return NULL if not found.
171 static const char *cache_lookup(u32 hash)
173 int i = hash % MANGLE_CACHE_SIZE;
175 if (!prefix_cache[i] || hash != prefix_cache_hashes[i]) {
176 return NULL;
179 /* yep, it matched */
180 return prefix_cache[i];
185 determine if a string is possibly in a mangled format, ignoring
186 case
188 In this algorithm, mangled names use only pure ascii characters (no
189 multi-byte) so we can avoid doing a UCS2 conversion
191 static BOOL is_mangled_component(const char *name)
193 int len, i;
195 DEBUG(10,("is_mangled_component %s ?\n", name));
197 /* check the length */
198 len = strlen(name);
199 if (len > 12 || len < 8) return False;
201 /* the best distinguishing characteristic is the ~ */
202 if (len > 7 && name[6] != '~') return False;
204 /* check extension */
205 if (len > 8) {
206 if (name[8] != '.') return False;
207 for (i=9; name[i]; i++) {
208 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
209 return False;
214 /* check first character */
215 if (! FLAG_CHECK(name[0], FLAG_ASCII)) {
216 return False;
219 /* check rest of hash */
220 if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
221 return False;
223 for (i=1;i<6;i++) {
224 if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
225 return False;
229 DEBUG(10,("is_mangled %s -> yes\n", name));
231 return True;
237 determine if a string is possibly in a mangled format, ignoring
238 case
240 In this algorithm, mangled names use only pure ascii characters (no
241 multi-byte) so we can avoid doing a UCS2 conversion
243 NOTE! This interface must be able to handle a path with unix
244 directory separators. It should return true if any component is
245 mangled
247 static BOOL is_mangled(const char *name)
249 const char *p;
250 const char *s;
252 DEBUG(10,("is_mangled %s ?\n", name));
254 for (s=name; (p=strchr(s, '/')); s=p+1) {
255 char *component = strndup(s, PTR_DIFF(p, s));
256 if (is_mangled_component(component)) {
257 free(component);
258 return True;
260 free(component);
263 /* and the last part ... */
264 return is_mangled_component(s);
269 see if a filename is an allowable 8.3 name.
271 we are only going to allow ascii characters in 8.3 names, as this
272 simplifies things greatly (it means that we know the string won't
273 get larger when converted from UNIX to DOS formats)
275 static BOOL is_8_3(const char *name, BOOL check_case, BOOL allow_wildcards)
277 int len, i;
278 char *dot_p;
280 /* as a special case, the names '.' and '..' are allowable 8.3 names */
281 if (name[0] == '.') {
282 if (!name[1] || (name[1] == '.' && !name[2])) {
283 return True;
287 /* the simplest test is on the overall length of the
288 filename. Note that we deliberately use the ascii string
289 length (not the multi-byte one) as it is faster, and gives us
290 the result we need in this case. Using strlen_m would not
291 only be slower, it would be incorrect */
292 len = strlen(name);
293 if (len > 12) return False;
295 /* find the '.'. Note that once again we use the non-multibyte
296 function */
297 dot_p = strchr(name, '.');
299 if (!dot_p) {
300 /* if the name doesn't contain a '.' then its length
301 must be less than 8 */
302 if (len > 8) {
303 return False;
305 } else {
306 int prefix_len, suffix_len;
308 /* if it does contain a dot then the prefix must be <=
309 8 and the suffix <= 3 in length */
310 prefix_len = PTR_DIFF(dot_p, name);
311 suffix_len = len - (prefix_len+1);
313 if (prefix_len > 8 || suffix_len > 3) {
314 return False;
317 /* a 8.3 name cannot contain more than 1 '.' */
318 if (strchr(dot_p+1, '.')) {
319 return False;
323 /* the length are all OK. Now check to see if the characters themselves are OK */
324 for (i=0; name[i]; i++) {
325 /* note that we may allow wildcard petterns! */
326 if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) && name[i] != '.') {
327 return False;
331 /* it is a good 8.3 name */
332 return True;
337 reset the mangling cache on a smb.conf reload. This only really makes sense for
338 mangling backends that have parameters in smb.conf, and as this backend doesn't
339 this is a NULL operation
341 static void mangle_reset(void)
343 /* noop */
348 try to find a 8.3 name in the cache, and if found then
349 replace the string with the original long name.
351 The filename must be able to hold at least sizeof(fstring)
353 static BOOL check_cache(char *name)
355 u32 hash, multiplier;
356 int i;
357 const char *prefix;
358 char extension[4];
360 /* make sure that this is a mangled name from this cache */
361 if (!is_mangled(name)) {
362 DEBUG(10,("check_cache: %s -> not mangled\n", name));
363 return False;
366 /* we need to extract the hash from the 8.3 name */
367 hash = base_reverse[(unsigned char)name[7]];
368 for (multiplier=36, i=5;i>=1;i--) {
369 u32 v = base_reverse[(unsigned char)name[i]];
370 hash += multiplier * v;
371 multiplier *= 36;
374 /* now look in the prefix cache for that hash */
375 prefix = cache_lookup(hash);
376 if (!prefix) {
377 DEBUG(10,("check_cache: %s -> %08X -> not found\n", name, hash));
378 return False;
381 /* we found it - construct the full name */
382 if (name[8] == '.') {
383 strncpy(extension, name+9, 3);
384 extension[3] = 0;
385 } else {
386 extension[0] = 0;
389 if (extension[0]) {
390 DEBUG(10,("check_cache: %s -> %s.%s\n", name, prefix, extension));
391 slprintf(name, sizeof(fstring), "%s.%s", prefix, extension);
392 } else {
393 DEBUG(10,("check_cache: %s -> %s\n", name, prefix));
394 fstrcpy(name, prefix);
397 return True;
402 look for a DOS reserved name
404 static BOOL is_reserved_name(const char *name)
406 if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) &&
407 FLAG_CHECK(name[1], FLAG_POSSIBLE2) &&
408 FLAG_CHECK(name[2], FLAG_POSSIBLE3) &&
409 FLAG_CHECK(name[3], FLAG_POSSIBLE4)) {
410 /* a likely match, scan the lot */
411 int i;
412 for (i=0; reserved_names[i]; i++) {
413 int len = strlen(reserved_names[i]);
414 /* note that we match on COM1 as well as COM1.foo */
415 if (strncasecmp(name, reserved_names[i], len) == 0 &&
416 (name[len] == '.' || name[len] == 0)) {
417 return True;
422 return False;
426 See if a filename is a legal long filename.
427 A filename ending in a '.' is not legal unless it's "." or "..". JRA.
430 static BOOL is_legal_name(const char *name)
432 const char *dot_pos = NULL;
433 BOOL alldots = True;
434 size_t numdots = 0;
436 while (*name) {
437 if (FLAG_CHECK(name[0], FLAG_ILLEGAL)) {
438 return False;
440 if (name[0] == '.') {
441 dot_pos = name;
442 numdots++;
443 } else {
444 alldots = False;
446 name++;
449 if (dot_pos) {
450 if (alldots && (numdots == 1 || numdots == 2))
451 return True; /* . or .. is a valid name */
453 /* A valid long name cannot end in '.' */
454 if (dot_pos[1] == '\0')
455 return False;
458 return True;
462 the main forward mapping function, which converts a long filename to
463 a 8.3 name
465 if need83 is not set then we only do the mangling if the name is illegal
466 as a long name
468 if cache83 is not set then we don't cache the result
470 the name parameter must be able to hold 13 bytes
472 static void name_map(char *name, BOOL need83, BOOL cache83)
474 char *dot_p;
475 char lead_char;
476 char extension[4];
477 int extension_length, i;
478 int prefix_len;
479 u32 hash, v;
480 char new_name[13];
482 /* reserved names are handled specially */
483 if (!is_reserved_name(name)) {
484 /* if the name is already a valid 8.3 name then we don't need to
485 do anything */
486 if (is_8_3(name, False, False)) {
487 return;
490 /* if the caller doesn't strictly need 8.3 then just check for illegal
491 filenames */
492 if (!need83 && is_legal_name(name)) {
493 return;
497 /* find the '.' if any */
498 dot_p = strrchr(name, '.');
500 if (dot_p) {
501 /* if the extension contains any illegal characters or
502 is too long or zero length then we treat it as part
503 of the prefix */
504 for (i=0; i<4 && dot_p[i+1]; i++) {
505 if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
506 dot_p = NULL;
507 break;
510 if (i == 0 || i == 4) dot_p = NULL;
513 /* the leading character in the mangled name is taken from
514 the first character of the name, if it is ascii
515 otherwise '_' is used
517 lead_char = name[0];
518 if (! FLAG_CHECK(lead_char, FLAG_ASCII)) {
519 lead_char = '_';
521 lead_char = toupper(lead_char);
523 /* the prefix is anything up to the first dot */
524 if (dot_p) {
525 prefix_len = PTR_DIFF(dot_p, name);
526 } else {
527 prefix_len = strlen(name);
530 /* the extension of the mangled name is taken from the first 3
531 ascii chars after the dot */
532 extension_length = 0;
533 if (dot_p) {
534 for (i=1; extension_length < 3 && dot_p[i]; i++) {
535 char c = dot_p[i];
536 if (FLAG_CHECK(c, FLAG_ASCII)) {
537 extension[extension_length++] = toupper(c);
542 /* find the hash for this prefix */
543 v = hash = mangle_hash(name, prefix_len);
545 /* now form the mangled name. */
546 new_name[0] = lead_char;
547 new_name[7] = base_forward(v % 36);
548 new_name[6] = '~';
549 for (i=5; i>=1; i--) {
550 v = v / 36;
551 new_name[i] = base_forward(v % 36);
554 /* add the extension */
555 if (extension_length) {
556 new_name[8] = '.';
557 memcpy(&new_name[9], extension, extension_length);
558 new_name[9+extension_length] = 0;
559 } else {
560 new_name[8] = 0;
563 if (cache83) {
564 /* put it in the cache */
565 cache_insert(name, prefix_len, hash);
568 DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
569 name, hash, new_name, cache83));
571 /* and overwrite the old name */
572 fstrcpy(name, new_name);
574 /* all done, we've managed to mangle it */
578 /* initialise the flags table
580 we allow only a very restricted set of characters as 'ascii' in this
581 mangling backend. This isn't a significant problem as modern clients
582 use the 'long' filenames anyway, and those don't have these
583 restrictions.
585 static void init_tables(void)
587 int i;
589 memset(char_flags, 0, sizeof(char_flags));
591 for (i=0;i<128;i++) {
592 if ((i >= '0' && i <= '9') ||
593 (i >= 'a' && i <= 'z') ||
594 (i >= 'A' && i <= 'Z')) {
595 char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
597 if (strchr("_-$~", i)) {
598 char_flags[i] |= FLAG_ASCII;
601 if (strchr("*\\/?<>|\":", i)) {
602 char_flags[i] |= FLAG_ILLEGAL;
605 if (strchr("*?\"<>", i)) {
606 char_flags[i] |= FLAG_WILDCARD;
610 memset(base_reverse, 0, sizeof(base_reverse));
611 for (i=0;i<36;i++) {
612 base_reverse[(unsigned char)base_forward(i)] = i;
615 /* fill in the reserved names flags. These are used as a very
616 fast filter for finding possible DOS reserved filenames */
617 for (i=0; reserved_names[i]; i++) {
618 unsigned char c1, c2, c3, c4;
620 c1 = (unsigned char)reserved_names[i][0];
621 c2 = (unsigned char)reserved_names[i][1];
622 c3 = (unsigned char)reserved_names[i][2];
623 c4 = (unsigned char)reserved_names[i][3];
625 char_flags[c1] |= FLAG_POSSIBLE1;
626 char_flags[c2] |= FLAG_POSSIBLE2;
627 char_flags[c3] |= FLAG_POSSIBLE3;
628 char_flags[c4] |= FLAG_POSSIBLE4;
629 char_flags[tolower(c1)] |= FLAG_POSSIBLE1;
630 char_flags[tolower(c2)] |= FLAG_POSSIBLE2;
631 char_flags[tolower(c3)] |= FLAG_POSSIBLE3;
632 char_flags[tolower(c4)] |= FLAG_POSSIBLE4;
634 char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
640 the following provides the abstraction layer to make it easier
641 to drop in an alternative mangling implementation */
642 static struct mangle_fns mangle_fns = {
643 is_mangled,
644 is_8_3,
645 mangle_reset,
646 check_cache,
647 name_map
650 /* return the methods for this mangling implementation */
651 struct mangle_fns *mangle_hash2_init(void)
653 init_tables();
654 mangle_reset();
656 if (!cache_init()) {
657 return NULL;
660 return &mangle_fns;