s4-repl: add a debug to make it easier to monitor replication
[Samba/aatanasov.git] / source4 / ntvfs / posix / pvfs_shortname.c
blob530def9163063dde0787e86a656f2e69459718cb
1 /*
2 Unix SMB/CIFS implementation.
4 POSIX NTVFS backend - 8.3 name routines
6 Copyright (C) Andrew Tridgell 2004
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "includes.h"
23 #include "system/locale.h"
24 #include "vfs_posix.h"
25 #include "param/param.h"
28 this mangling scheme uses the following format
30 Annnn~n.AAA
32 where nnnnn is a base 36 hash, and A represents characters from the original string
34 The hash is taken of the leading part of the long filename, in uppercase
36 for simplicity, we only allow ascii characters in 8.3 names
40 ===============================================================================
41 NOTE NOTE NOTE!!!
43 This file deliberately uses non-multibyte string functions in many places. This
44 is *not* a mistake. This code is multi-byte safe, but it gets this property
45 through some very subtle knowledge of the way multi-byte strings are encoded
46 and the fact that this mangling algorithm only supports ascii characters in
47 8.3 names.
49 please don't convert this file to use the *_m() functions!!
50 ===============================================================================
54 #if 1
55 #define M_DEBUG(level, x) DEBUG(level, x)
56 #else
57 #define M_DEBUG(level, x)
58 #endif
60 /* these flags are used to mark characters in as having particular
61 properties */
62 #define FLAG_BASECHAR 1
63 #define FLAG_ASCII 2
64 #define FLAG_ILLEGAL 4
65 #define FLAG_WILDCARD 8
67 /* the "possible" flags are used as a fast way to find possible DOS
68 reserved filenames */
69 #define FLAG_POSSIBLE1 16
70 #define FLAG_POSSIBLE2 32
71 #define FLAG_POSSIBLE3 64
72 #define FLAG_POSSIBLE4 128
74 #define DEFAULT_MANGLE_PREFIX 4
76 #define MANGLE_BASECHARS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
78 #define FLAG_CHECK(c, flag) (ctx->char_flags[(uint8_t)(c)] & (flag))
80 static const char *reserved_names[] =
81 { "AUX", "CON", "COM1", "COM2", "COM3", "COM4",
82 "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
85 struct pvfs_mangle_context {
86 uint8_t char_flags[256];
88 this determines how many characters are used from the original
89 filename in the 8.3 mangled name. A larger value leads to a weaker
90 hash and more collisions. The largest possible value is 6.
92 int mangle_prefix;
93 uint32_t mangle_modulus;
95 /* we will use a very simple direct mapped prefix cache. The big
96 advantage of this cache structure is speed and low memory usage
98 The cache is indexed by the low-order bits of the hash, and confirmed by
99 hashing the resulting cache entry to match the known hash
101 uint32_t cache_size;
102 char **prefix_cache;
103 uint32_t *prefix_cache_hashes;
105 /* this is used to reverse the base 36 mapping */
106 unsigned char base_reverse[256];
108 struct smb_iconv_convenience *iconv_convenience;
113 hash a string of the specified length. The string does not need to be
114 null terminated
116 this hash needs to be fast with a low collision rate (what hash doesn't?)
118 static uint32_t mangle_hash(struct pvfs_mangle_context *ctx,
119 const char *key, size_t length)
121 return pvfs_name_hash(key, length) % ctx->mangle_modulus;
125 insert an entry into the prefix cache. The string might not be null
126 terminated */
127 static void cache_insert(struct pvfs_mangle_context *ctx,
128 const char *prefix, int length, uint32_t hash)
130 int i = hash % ctx->cache_size;
132 if (ctx->prefix_cache[i]) {
133 talloc_free(ctx->prefix_cache[i]);
136 ctx->prefix_cache[i] = talloc_strndup(ctx->prefix_cache, prefix, length);
137 ctx->prefix_cache_hashes[i] = hash;
141 lookup an entry in the prefix cache. Return NULL if not found.
143 static const char *cache_lookup(struct pvfs_mangle_context *ctx, uint32_t hash)
145 int i = hash % ctx->cache_size;
148 if (!ctx->prefix_cache[i] || hash != ctx->prefix_cache_hashes[i]) {
149 return NULL;
152 /* yep, it matched */
153 return ctx->prefix_cache[i];
158 determine if a string is possibly in a mangled format, ignoring
159 case
161 In this algorithm, mangled names use only pure ascii characters (no
162 multi-byte) so we can avoid doing a UCS2 conversion
164 static bool is_mangled_component(struct pvfs_mangle_context *ctx,
165 const char *name, size_t len)
167 unsigned int i;
169 M_DEBUG(10,("is_mangled_component %s (len %u) ?\n", name, (unsigned int)len));
171 /* check the length */
172 if (len > 12 || len < 8)
173 return false;
175 /* the best distinguishing characteristic is the ~ */
176 if (name[6] != '~')
177 return false;
179 /* check extension */
180 if (len > 8) {
181 if (name[8] != '.')
182 return false;
183 for (i=9; name[i] && i < len; i++) {
184 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
185 return false;
190 /* check lead characters */
191 for (i=0;i<ctx->mangle_prefix;i++) {
192 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
193 return false;
197 /* check rest of hash */
198 if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
199 return false;
201 for (i=ctx->mangle_prefix;i<6;i++) {
202 if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
203 return false;
207 M_DEBUG(10,("is_mangled_component %s (len %u) -> yes\n", name, (unsigned int)len));
209 return true;
215 determine if a string is possibly in a mangled format, ignoring
216 case
218 In this algorithm, mangled names use only pure ascii characters (no
219 multi-byte) so we can avoid doing a UCS2 conversion
221 NOTE! This interface must be able to handle a path with unix
222 directory separators. It should return true if any component is
223 mangled
225 static bool is_mangled(struct pvfs_mangle_context *ctx, const char *name)
227 const char *p;
228 const char *s;
230 M_DEBUG(10,("is_mangled %s ?\n", name));
232 for (s=name; (p=strchr(s, '/')); s=p+1) {
233 if (is_mangled_component(ctx, s, PTR_DIFF(p, s))) {
234 return true;
238 /* and the last part ... */
239 return is_mangled_component(ctx, s, strlen(s));
244 see if a filename is an allowable 8.3 name.
246 we are only going to allow ascii characters in 8.3 names, as this
247 simplifies things greatly (it means that we know the string won't
248 get larger when converted from UNIX to DOS formats)
250 static bool is_8_3(struct pvfs_mangle_context *ctx,
251 const char *name, bool check_case, bool allow_wildcards)
253 int len, i;
254 char *dot_p;
256 /* as a special case, the names '.' and '..' are allowable 8.3 names */
257 if (name[0] == '.') {
258 if (!name[1] || (name[1] == '.' && !name[2])) {
259 return true;
263 /* the simplest test is on the overall length of the
264 filename. Note that we deliberately use the ascii string
265 length (not the multi-byte one) as it is faster, and gives us
266 the result we need in this case. Using strlen_m would not
267 only be slower, it would be incorrect */
268 len = strlen(name);
269 if (len > 12)
270 return false;
272 /* find the '.'. Note that once again we use the non-multibyte
273 function */
274 dot_p = strchr(name, '.');
276 if (!dot_p) {
277 /* if the name doesn't contain a '.' then its length
278 must be less than 8 */
279 if (len > 8) {
280 return false;
282 } else {
283 int prefix_len, suffix_len;
285 /* if it does contain a dot then the prefix must be <=
286 8 and the suffix <= 3 in length */
287 prefix_len = PTR_DIFF(dot_p, name);
288 suffix_len = len - (prefix_len+1);
290 if (prefix_len > 8 || suffix_len > 3 || suffix_len == 0) {
291 return false;
294 /* a 8.3 name cannot contain more than 1 '.' */
295 if (strchr(dot_p+1, '.')) {
296 return false;
300 /* the length are all OK. Now check to see if the characters themselves are OK */
301 for (i=0; name[i]; i++) {
302 /* note that we may allow wildcard petterns! */
303 if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) &&
304 name[i] != '.') {
305 return false;
309 /* it is a good 8.3 name */
310 return true;
315 try to find a 8.3 name in the cache, and if found then
316 return the original long name.
318 static char *check_cache(struct pvfs_mangle_context *ctx,
319 TALLOC_CTX *mem_ctx, const char *name)
321 uint32_t hash, multiplier;
322 unsigned int i;
323 const char *prefix;
324 char extension[4];
326 /* make sure that this is a mangled name from this cache */
327 if (!is_mangled(ctx, name)) {
328 M_DEBUG(10,("check_cache: %s -> not mangled\n", name));
329 return NULL;
332 /* we need to extract the hash from the 8.3 name */
333 hash = ctx->base_reverse[(unsigned char)name[7]];
334 for (multiplier=36, i=5;i>=ctx->mangle_prefix;i--) {
335 uint32_t v = ctx->base_reverse[(unsigned char)name[i]];
336 hash += multiplier * v;
337 multiplier *= 36;
340 /* now look in the prefix cache for that hash */
341 prefix = cache_lookup(ctx, hash);
342 if (!prefix) {
343 M_DEBUG(10,("check_cache: %s -> %08X -> not found\n", name, hash));
344 return NULL;
347 /* we found it - construct the full name */
348 if (name[8] == '.') {
349 strncpy(extension, name+9, 3);
350 extension[3] = 0;
351 } else {
352 extension[0] = 0;
355 if (extension[0]) {
356 return talloc_asprintf(mem_ctx, "%s.%s", prefix, extension);
359 return talloc_strdup(mem_ctx, prefix);
364 look for a DOS reserved name
366 static bool is_reserved_name(struct pvfs_mangle_context *ctx, const char *name)
368 if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) &&
369 FLAG_CHECK(name[1], FLAG_POSSIBLE2) &&
370 FLAG_CHECK(name[2], FLAG_POSSIBLE3) &&
371 FLAG_CHECK(name[3], FLAG_POSSIBLE4)) {
372 /* a likely match, scan the lot */
373 int i;
374 for (i=0; reserved_names[i]; i++) {
375 if (strcasecmp(name, reserved_names[i]) == 0) {
376 return true;
381 return false;
386 See if a filename is a legal long filename.
387 A filename ending in a '.' is not legal unless it's "." or "..". JRA.
389 static bool is_legal_name(struct pvfs_mangle_context *ctx, const char *name)
391 while (*name) {
392 size_t c_size;
393 codepoint_t c = next_codepoint_convenience(ctx->iconv_convenience, name, &c_size);
394 if (c == INVALID_CODEPOINT) {
395 return false;
397 /* all high chars are OK */
398 if (c >= 128) {
399 name += c_size;
400 continue;
402 if (FLAG_CHECK(c, FLAG_ILLEGAL)) {
403 return false;
405 name += c_size;
408 return true;
412 the main forward mapping function, which converts a long filename to
413 a 8.3 name
415 if need83 is not set then we only do the mangling if the name is illegal
416 as a long name
418 if cache83 is not set then we don't cache the result
420 return NULL if we don't need to do any conversion
422 static char *name_map(struct pvfs_mangle_context *ctx,
423 const char *name, bool need83, bool cache83)
425 char *dot_p;
426 char lead_chars[7];
427 char extension[4];
428 unsigned int extension_length, i;
429 unsigned int prefix_len;
430 uint32_t hash, v;
431 char *new_name;
432 const char *basechars = MANGLE_BASECHARS;
434 /* reserved names are handled specially */
435 if (!is_reserved_name(ctx, name)) {
436 /* if the name is already a valid 8.3 name then we don't need to
437 do anything */
438 if (is_8_3(ctx, name, false, false)) {
439 return NULL;
442 /* if the caller doesn't strictly need 8.3 then just check for illegal
443 filenames */
444 if (!need83 && is_legal_name(ctx, name)) {
445 return NULL;
449 /* find the '.' if any */
450 dot_p = strrchr(name, '.');
452 if (dot_p) {
453 /* if the extension contains any illegal characters or
454 is too long or zero length then we treat it as part
455 of the prefix */
456 for (i=0; i<4 && dot_p[i+1]; i++) {
457 if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
458 dot_p = NULL;
459 break;
462 if (i == 0 || i == 4) dot_p = NULL;
465 /* the leading characters in the mangled name is taken from
466 the first characters of the name, if they are ascii otherwise
467 '_' is used
469 for (i=0;i<ctx->mangle_prefix && name[i];i++) {
470 lead_chars[i] = name[i];
471 if (! FLAG_CHECK(lead_chars[i], FLAG_ASCII)) {
472 lead_chars[i] = '_';
474 lead_chars[i] = toupper((unsigned char)lead_chars[i]);
476 for (;i<ctx->mangle_prefix;i++) {
477 lead_chars[i] = '_';
480 /* the prefix is anything up to the first dot */
481 if (dot_p) {
482 prefix_len = PTR_DIFF(dot_p, name);
483 } else {
484 prefix_len = strlen(name);
487 /* the extension of the mangled name is taken from the first 3
488 ascii chars after the dot */
489 extension_length = 0;
490 if (dot_p) {
491 for (i=1; extension_length < 3 && dot_p[i]; i++) {
492 unsigned char c = dot_p[i];
493 if (FLAG_CHECK(c, FLAG_ASCII)) {
494 extension[extension_length++] = toupper(c);
499 /* find the hash for this prefix */
500 v = hash = mangle_hash(ctx, name, prefix_len);
502 new_name = talloc_array(ctx, char, 13);
503 if (new_name == NULL) {
504 return NULL;
507 /* now form the mangled name. */
508 for (i=0;i<ctx->mangle_prefix;i++) {
509 new_name[i] = lead_chars[i];
511 new_name[7] = basechars[v % 36];
512 new_name[6] = '~';
513 for (i=5; i>=ctx->mangle_prefix; i--) {
514 v = v / 36;
515 new_name[i] = basechars[v % 36];
518 /* add the extension */
519 if (extension_length) {
520 new_name[8] = '.';
521 memcpy(&new_name[9], extension, extension_length);
522 new_name[9+extension_length] = 0;
523 } else {
524 new_name[8] = 0;
527 if (cache83) {
528 /* put it in the cache */
529 cache_insert(ctx, name, prefix_len, hash);
532 M_DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
533 name, hash, new_name, cache83));
535 return new_name;
539 /* initialise the flags table
541 we allow only a very restricted set of characters as 'ascii' in this
542 mangling backend. This isn't a significant problem as modern clients
543 use the 'long' filenames anyway, and those don't have these
544 restrictions.
546 static void init_tables(struct pvfs_mangle_context *ctx)
548 const char *basechars = MANGLE_BASECHARS;
549 int i;
550 /* the list of reserved dos names - all of these are illegal */
552 ZERO_STRUCT(ctx->char_flags);
554 for (i=1;i<128;i++) {
555 if ((i >= '0' && i <= '9') ||
556 (i >= 'a' && i <= 'z') ||
557 (i >= 'A' && i <= 'Z')) {
558 ctx->char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
560 if (strchr("_-$~", i)) {
561 ctx->char_flags[i] |= FLAG_ASCII;
564 if (strchr("*\\/?<>|\":", i)) {
565 ctx->char_flags[i] |= FLAG_ILLEGAL;
568 if (strchr("*?\"<>", i)) {
569 ctx->char_flags[i] |= FLAG_WILDCARD;
573 ZERO_STRUCT(ctx->base_reverse);
574 for (i=0;i<36;i++) {
575 ctx->base_reverse[(uint8_t)basechars[i]] = i;
578 /* fill in the reserved names flags. These are used as a very
579 fast filter for finding possible DOS reserved filenames */
580 for (i=0; reserved_names[i]; i++) {
581 unsigned char c1, c2, c3, c4;
583 c1 = (unsigned char)reserved_names[i][0];
584 c2 = (unsigned char)reserved_names[i][1];
585 c3 = (unsigned char)reserved_names[i][2];
586 c4 = (unsigned char)reserved_names[i][3];
588 ctx->char_flags[c1] |= FLAG_POSSIBLE1;
589 ctx->char_flags[c2] |= FLAG_POSSIBLE2;
590 ctx->char_flags[c3] |= FLAG_POSSIBLE3;
591 ctx->char_flags[c4] |= FLAG_POSSIBLE4;
592 ctx->char_flags[tolower(c1)] |= FLAG_POSSIBLE1;
593 ctx->char_flags[tolower(c2)] |= FLAG_POSSIBLE2;
594 ctx->char_flags[tolower(c3)] |= FLAG_POSSIBLE3;
595 ctx->char_flags[tolower(c4)] |= FLAG_POSSIBLE4;
597 ctx->char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
600 ctx->mangle_modulus = 1;
601 for (i=0;i<(7-ctx->mangle_prefix);i++) {
602 ctx->mangle_modulus *= 36;
607 initialise the mangling code
609 NTSTATUS pvfs_mangle_init(struct pvfs_state *pvfs)
611 struct pvfs_mangle_context *ctx;
613 ctx = talloc(pvfs, struct pvfs_mangle_context);
614 if (ctx == NULL) {
615 return NT_STATUS_NO_MEMORY;
618 ctx->iconv_convenience = lp_iconv_convenience(pvfs->ntvfs->ctx->lp_ctx);
620 /* by default have a max of 512 entries in the cache. */
621 ctx->cache_size = lp_parm_int(pvfs->ntvfs->ctx->lp_ctx, NULL, "mangle", "cachesize", 512);
623 ctx->prefix_cache = talloc_array(ctx, char *, ctx->cache_size);
624 if (ctx->prefix_cache == NULL) {
625 return NT_STATUS_NO_MEMORY;
627 ctx->prefix_cache_hashes = talloc_array(ctx, uint32_t, ctx->cache_size);
628 if (ctx->prefix_cache_hashes == NULL) {
629 return NT_STATUS_NO_MEMORY;
632 memset(ctx->prefix_cache, 0, sizeof(char *) * ctx->cache_size);
633 memset(ctx->prefix_cache_hashes, 0, sizeof(uint32_t) * ctx->cache_size);
635 ctx->mangle_prefix = lp_parm_int(pvfs->ntvfs->ctx->lp_ctx, NULL, "mangle", "prefix", -1);
636 if (ctx->mangle_prefix < 0 || ctx->mangle_prefix > 6) {
637 ctx->mangle_prefix = DEFAULT_MANGLE_PREFIX;
640 init_tables(ctx);
642 pvfs->mangle_ctx = ctx;
644 return NT_STATUS_OK;
649 return the short name for a component of a full name
651 char *pvfs_short_name_component(struct pvfs_state *pvfs, const char *name)
653 return name_map(pvfs->mangle_ctx, name, true, true);
658 return the short name for a given entry in a directory
660 const char *pvfs_short_name(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
661 struct pvfs_filename *name)
663 char *p = strrchr(name->full_name, '/');
664 char *ret = pvfs_short_name_component(pvfs, p+1);
665 if (ret == NULL) {
666 return p+1;
668 talloc_steal(mem_ctx, ret);
669 return ret;
673 lookup a mangled name, returning the original long name if present
674 in the cache
676 char *pvfs_mangled_lookup(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
677 const char *name)
679 return check_cache(pvfs->mangle_ctx, mem_ctx, name);
684 look for a DOS reserved name
686 bool pvfs_is_reserved_name(struct pvfs_state *pvfs, const char *name)
688 return is_reserved_name(pvfs->mangle_ctx, name);
693 see if a component of a filename could be a mangled name from our
694 mangling code
696 bool pvfs_is_mangled_component(struct pvfs_state *pvfs, const char *name)
698 return is_mangled_component(pvfs->mangle_ctx, name, strlen(name));