2 Unix SMB/CIFS implementation.
4 POSIX NTVFS backend - 8.3 name routines
6 Copyright (C) Andrew Tridgell 2004
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "system/locale.h"
24 #include "vfs_posix.h"
25 #include "param/param.h"
28 this mangling scheme uses the following format
32 where nnnnn is a base 36 hash, and A represents characters from the original string
34 The hash is taken of the leading part of the long filename, in uppercase
36 for simplicity, we only allow ascii characters in 8.3 names
40 ===============================================================================
43 This file deliberately uses non-multibyte string functions in many places. This
44 is *not* a mistake. This code is multi-byte safe, but it gets this property
45 through some very subtle knowledge of the way multi-byte strings are encoded
46 and the fact that this mangling algorithm only supports ascii characters in
49 please don't convert this file to use the *_m() functions!!
50 ===============================================================================
55 #define M_DEBUG(level, x) DEBUG(level, x)
57 #define M_DEBUG(level, x)
60 /* these flags are used to mark characters in as having particular
62 #define FLAG_BASECHAR 1
64 #define FLAG_ILLEGAL 4
65 #define FLAG_WILDCARD 8
67 /* the "possible" flags are used as a fast way to find possible DOS
69 #define FLAG_POSSIBLE1 16
70 #define FLAG_POSSIBLE2 32
71 #define FLAG_POSSIBLE3 64
72 #define FLAG_POSSIBLE4 128
74 #define DEFAULT_MANGLE_PREFIX 4
76 #define MANGLE_BASECHARS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
78 #define FLAG_CHECK(c, flag) (ctx->char_flags[(uint8_t)(c)] & (flag))
80 static const char *reserved_names
[] =
81 { "AUX", "CON", "COM1", "COM2", "COM3", "COM4",
82 "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL
};
85 struct pvfs_mangle_context
{
86 uint8_t char_flags
[256];
88 this determines how many characters are used from the original
89 filename in the 8.3 mangled name. A larger value leads to a weaker
90 hash and more collisions. The largest possible value is 6.
93 uint32_t mangle_modulus
;
95 /* we will use a very simple direct mapped prefix cache. The big
96 advantage of this cache structure is speed and low memory usage
98 The cache is indexed by the low-order bits of the hash, and confirmed by
99 hashing the resulting cache entry to match the known hash
103 uint32_t *prefix_cache_hashes
;
105 /* this is used to reverse the base 36 mapping */
106 unsigned char base_reverse
[256];
108 struct smb_iconv_convenience
*iconv_convenience
;
113 hash a string of the specified length. The string does not need to be
116 this hash needs to be fast with a low collision rate (what hash doesn't?)
118 static uint32_t mangle_hash(struct pvfs_mangle_context
*ctx
,
119 const char *key
, size_t length
)
121 return pvfs_name_hash(key
, length
) % ctx
->mangle_modulus
;
125 insert an entry into the prefix cache. The string might not be null
127 static void cache_insert(struct pvfs_mangle_context
*ctx
,
128 const char *prefix
, int length
, uint32_t hash
)
130 int i
= hash
% ctx
->cache_size
;
132 if (ctx
->prefix_cache
[i
]) {
133 talloc_free(ctx
->prefix_cache
[i
]);
136 ctx
->prefix_cache
[i
] = talloc_strndup(ctx
->prefix_cache
, prefix
, length
);
137 ctx
->prefix_cache_hashes
[i
] = hash
;
141 lookup an entry in the prefix cache. Return NULL if not found.
143 static const char *cache_lookup(struct pvfs_mangle_context
*ctx
, uint32_t hash
)
145 int i
= hash
% ctx
->cache_size
;
148 if (!ctx
->prefix_cache
[i
] || hash
!= ctx
->prefix_cache_hashes
[i
]) {
152 /* yep, it matched */
153 return ctx
->prefix_cache
[i
];
158 determine if a string is possibly in a mangled format, ignoring
161 In this algorithm, mangled names use only pure ascii characters (no
162 multi-byte) so we can avoid doing a UCS2 conversion
164 static bool is_mangled_component(struct pvfs_mangle_context
*ctx
,
165 const char *name
, size_t len
)
169 M_DEBUG(10,("is_mangled_component %s (len %u) ?\n", name
, (unsigned int)len
));
171 /* check the length */
172 if (len
> 12 || len
< 8)
175 /* the best distinguishing characteristic is the ~ */
179 /* check extension */
183 for (i
=9; name
[i
] && i
< len
; i
++) {
184 if (! FLAG_CHECK(name
[i
], FLAG_ASCII
)) {
190 /* check lead characters */
191 for (i
=0;i
<ctx
->mangle_prefix
;i
++) {
192 if (! FLAG_CHECK(name
[i
], FLAG_ASCII
)) {
197 /* check rest of hash */
198 if (! FLAG_CHECK(name
[7], FLAG_BASECHAR
)) {
201 for (i
=ctx
->mangle_prefix
;i
<6;i
++) {
202 if (! FLAG_CHECK(name
[i
], FLAG_BASECHAR
)) {
207 M_DEBUG(10,("is_mangled_component %s (len %u) -> yes\n", name
, (unsigned int)len
));
215 determine if a string is possibly in a mangled format, ignoring
218 In this algorithm, mangled names use only pure ascii characters (no
219 multi-byte) so we can avoid doing a UCS2 conversion
221 NOTE! This interface must be able to handle a path with unix
222 directory separators. It should return true if any component is
225 static bool is_mangled(struct pvfs_mangle_context
*ctx
, const char *name
)
230 M_DEBUG(10,("is_mangled %s ?\n", name
));
232 for (s
=name
; (p
=strchr(s
, '/')); s
=p
+1) {
233 if (is_mangled_component(ctx
, s
, PTR_DIFF(p
, s
))) {
238 /* and the last part ... */
239 return is_mangled_component(ctx
, s
, strlen(s
));
244 see if a filename is an allowable 8.3 name.
246 we are only going to allow ascii characters in 8.3 names, as this
247 simplifies things greatly (it means that we know the string won't
248 get larger when converted from UNIX to DOS formats)
250 static bool is_8_3(struct pvfs_mangle_context
*ctx
,
251 const char *name
, bool check_case
, bool allow_wildcards
)
256 /* as a special case, the names '.' and '..' are allowable 8.3 names */
257 if (name
[0] == '.') {
258 if (!name
[1] || (name
[1] == '.' && !name
[2])) {
263 /* the simplest test is on the overall length of the
264 filename. Note that we deliberately use the ascii string
265 length (not the multi-byte one) as it is faster, and gives us
266 the result we need in this case. Using strlen_m would not
267 only be slower, it would be incorrect */
272 /* find the '.'. Note that once again we use the non-multibyte
274 dot_p
= strchr(name
, '.');
277 /* if the name doesn't contain a '.' then its length
278 must be less than 8 */
283 int prefix_len
, suffix_len
;
285 /* if it does contain a dot then the prefix must be <=
286 8 and the suffix <= 3 in length */
287 prefix_len
= PTR_DIFF(dot_p
, name
);
288 suffix_len
= len
- (prefix_len
+1);
290 if (prefix_len
> 8 || suffix_len
> 3 || suffix_len
== 0) {
294 /* a 8.3 name cannot contain more than 1 '.' */
295 if (strchr(dot_p
+1, '.')) {
300 /* the length are all OK. Now check to see if the characters themselves are OK */
301 for (i
=0; name
[i
]; i
++) {
302 /* note that we may allow wildcard petterns! */
303 if (!FLAG_CHECK(name
[i
], FLAG_ASCII
|(allow_wildcards
? FLAG_WILDCARD
: 0)) &&
309 /* it is a good 8.3 name */
315 try to find a 8.3 name in the cache, and if found then
316 return the original long name.
318 static char *check_cache(struct pvfs_mangle_context
*ctx
,
319 TALLOC_CTX
*mem_ctx
, const char *name
)
321 uint32_t hash
, multiplier
;
326 /* make sure that this is a mangled name from this cache */
327 if (!is_mangled(ctx
, name
)) {
328 M_DEBUG(10,("check_cache: %s -> not mangled\n", name
));
332 /* we need to extract the hash from the 8.3 name */
333 hash
= ctx
->base_reverse
[(unsigned char)name
[7]];
334 for (multiplier
=36, i
=5;i
>=ctx
->mangle_prefix
;i
--) {
335 uint32_t v
= ctx
->base_reverse
[(unsigned char)name
[i
]];
336 hash
+= multiplier
* v
;
340 /* now look in the prefix cache for that hash */
341 prefix
= cache_lookup(ctx
, hash
);
343 M_DEBUG(10,("check_cache: %s -> %08X -> not found\n", name
, hash
));
347 /* we found it - construct the full name */
348 if (name
[8] == '.') {
349 strncpy(extension
, name
+9, 3);
356 return talloc_asprintf(mem_ctx
, "%s.%s", prefix
, extension
);
359 return talloc_strdup(mem_ctx
, prefix
);
364 look for a DOS reserved name
366 static bool is_reserved_name(struct pvfs_mangle_context
*ctx
, const char *name
)
368 if (FLAG_CHECK(name
[0], FLAG_POSSIBLE1
) &&
369 FLAG_CHECK(name
[1], FLAG_POSSIBLE2
) &&
370 FLAG_CHECK(name
[2], FLAG_POSSIBLE3
) &&
371 FLAG_CHECK(name
[3], FLAG_POSSIBLE4
)) {
372 /* a likely match, scan the lot */
374 for (i
=0; reserved_names
[i
]; i
++) {
375 if (strcasecmp(name
, reserved_names
[i
]) == 0) {
386 See if a filename is a legal long filename.
387 A filename ending in a '.' is not legal unless it's "." or "..". JRA.
389 static bool is_legal_name(struct pvfs_mangle_context
*ctx
, const char *name
)
393 codepoint_t c
= next_codepoint_convenience(ctx
->iconv_convenience
, name
, &c_size
);
394 if (c
== INVALID_CODEPOINT
) {
397 /* all high chars are OK */
402 if (FLAG_CHECK(c
, FLAG_ILLEGAL
)) {
412 the main forward mapping function, which converts a long filename to
415 if need83 is not set then we only do the mangling if the name is illegal
418 if cache83 is not set then we don't cache the result
420 return NULL if we don't need to do any conversion
422 static char *name_map(struct pvfs_mangle_context
*ctx
,
423 const char *name
, bool need83
, bool cache83
)
428 unsigned int extension_length
, i
;
429 unsigned int prefix_len
;
432 const char *basechars
= MANGLE_BASECHARS
;
434 /* reserved names are handled specially */
435 if (!is_reserved_name(ctx
, name
)) {
436 /* if the name is already a valid 8.3 name then we don't need to
438 if (is_8_3(ctx
, name
, false, false)) {
442 /* if the caller doesn't strictly need 8.3 then just check for illegal
444 if (!need83
&& is_legal_name(ctx
, name
)) {
449 /* find the '.' if any */
450 dot_p
= strrchr(name
, '.');
453 /* if the extension contains any illegal characters or
454 is too long or zero length then we treat it as part
456 for (i
=0; i
<4 && dot_p
[i
+1]; i
++) {
457 if (! FLAG_CHECK(dot_p
[i
+1], FLAG_ASCII
)) {
462 if (i
== 0 || i
== 4) dot_p
= NULL
;
465 /* the leading characters in the mangled name is taken from
466 the first characters of the name, if they are ascii otherwise
469 for (i
=0;i
<ctx
->mangle_prefix
&& name
[i
];i
++) {
470 lead_chars
[i
] = name
[i
];
471 if (! FLAG_CHECK(lead_chars
[i
], FLAG_ASCII
)) {
474 lead_chars
[i
] = toupper((unsigned char)lead_chars
[i
]);
476 for (;i
<ctx
->mangle_prefix
;i
++) {
480 /* the prefix is anything up to the first dot */
482 prefix_len
= PTR_DIFF(dot_p
, name
);
484 prefix_len
= strlen(name
);
487 /* the extension of the mangled name is taken from the first 3
488 ascii chars after the dot */
489 extension_length
= 0;
491 for (i
=1; extension_length
< 3 && dot_p
[i
]; i
++) {
492 unsigned char c
= dot_p
[i
];
493 if (FLAG_CHECK(c
, FLAG_ASCII
)) {
494 extension
[extension_length
++] = toupper(c
);
499 /* find the hash for this prefix */
500 v
= hash
= mangle_hash(ctx
, name
, prefix_len
);
502 new_name
= talloc_array(ctx
, char, 13);
503 if (new_name
== NULL
) {
507 /* now form the mangled name. */
508 for (i
=0;i
<ctx
->mangle_prefix
;i
++) {
509 new_name
[i
] = lead_chars
[i
];
511 new_name
[7] = basechars
[v
% 36];
513 for (i
=5; i
>=ctx
->mangle_prefix
; i
--) {
515 new_name
[i
] = basechars
[v
% 36];
518 /* add the extension */
519 if (extension_length
) {
521 memcpy(&new_name
[9], extension
, extension_length
);
522 new_name
[9+extension_length
] = 0;
528 /* put it in the cache */
529 cache_insert(ctx
, name
, prefix_len
, hash
);
532 M_DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
533 name
, hash
, new_name
, cache83
));
539 /* initialise the flags table
541 we allow only a very restricted set of characters as 'ascii' in this
542 mangling backend. This isn't a significant problem as modern clients
543 use the 'long' filenames anyway, and those don't have these
546 static void init_tables(struct pvfs_mangle_context
*ctx
)
548 const char *basechars
= MANGLE_BASECHARS
;
550 /* the list of reserved dos names - all of these are illegal */
552 ZERO_STRUCT(ctx
->char_flags
);
554 for (i
=1;i
<128;i
++) {
555 if ((i
>= '0' && i
<= '9') ||
556 (i
>= 'a' && i
<= 'z') ||
557 (i
>= 'A' && i
<= 'Z')) {
558 ctx
->char_flags
[i
] |= (FLAG_ASCII
| FLAG_BASECHAR
);
560 if (strchr("_-$~", i
)) {
561 ctx
->char_flags
[i
] |= FLAG_ASCII
;
564 if (strchr("*\\/?<>|\":", i
)) {
565 ctx
->char_flags
[i
] |= FLAG_ILLEGAL
;
568 if (strchr("*?\"<>", i
)) {
569 ctx
->char_flags
[i
] |= FLAG_WILDCARD
;
573 ZERO_STRUCT(ctx
->base_reverse
);
575 ctx
->base_reverse
[(uint8_t)basechars
[i
]] = i
;
578 /* fill in the reserved names flags. These are used as a very
579 fast filter for finding possible DOS reserved filenames */
580 for (i
=0; reserved_names
[i
]; i
++) {
581 unsigned char c1
, c2
, c3
, c4
;
583 c1
= (unsigned char)reserved_names
[i
][0];
584 c2
= (unsigned char)reserved_names
[i
][1];
585 c3
= (unsigned char)reserved_names
[i
][2];
586 c4
= (unsigned char)reserved_names
[i
][3];
588 ctx
->char_flags
[c1
] |= FLAG_POSSIBLE1
;
589 ctx
->char_flags
[c2
] |= FLAG_POSSIBLE2
;
590 ctx
->char_flags
[c3
] |= FLAG_POSSIBLE3
;
591 ctx
->char_flags
[c4
] |= FLAG_POSSIBLE4
;
592 ctx
->char_flags
[tolower(c1
)] |= FLAG_POSSIBLE1
;
593 ctx
->char_flags
[tolower(c2
)] |= FLAG_POSSIBLE2
;
594 ctx
->char_flags
[tolower(c3
)] |= FLAG_POSSIBLE3
;
595 ctx
->char_flags
[tolower(c4
)] |= FLAG_POSSIBLE4
;
597 ctx
->char_flags
[(unsigned char)'.'] |= FLAG_POSSIBLE4
;
600 ctx
->mangle_modulus
= 1;
601 for (i
=0;i
<(7-ctx
->mangle_prefix
);i
++) {
602 ctx
->mangle_modulus
*= 36;
607 initialise the mangling code
609 NTSTATUS
pvfs_mangle_init(struct pvfs_state
*pvfs
)
611 struct pvfs_mangle_context
*ctx
;
613 ctx
= talloc(pvfs
, struct pvfs_mangle_context
);
615 return NT_STATUS_NO_MEMORY
;
618 ctx
->iconv_convenience
= lp_iconv_convenience(pvfs
->ntvfs
->ctx
->lp_ctx
);
620 /* by default have a max of 512 entries in the cache. */
621 ctx
->cache_size
= lp_parm_int(pvfs
->ntvfs
->ctx
->lp_ctx
, NULL
, "mangle", "cachesize", 512);
623 ctx
->prefix_cache
= talloc_array(ctx
, char *, ctx
->cache_size
);
624 if (ctx
->prefix_cache
== NULL
) {
625 return NT_STATUS_NO_MEMORY
;
627 ctx
->prefix_cache_hashes
= talloc_array(ctx
, uint32_t, ctx
->cache_size
);
628 if (ctx
->prefix_cache_hashes
== NULL
) {
629 return NT_STATUS_NO_MEMORY
;
632 memset(ctx
->prefix_cache
, 0, sizeof(char *) * ctx
->cache_size
);
633 memset(ctx
->prefix_cache_hashes
, 0, sizeof(uint32_t) * ctx
->cache_size
);
635 ctx
->mangle_prefix
= lp_parm_int(pvfs
->ntvfs
->ctx
->lp_ctx
, NULL
, "mangle", "prefix", -1);
636 if (ctx
->mangle_prefix
< 0 || ctx
->mangle_prefix
> 6) {
637 ctx
->mangle_prefix
= DEFAULT_MANGLE_PREFIX
;
642 pvfs
->mangle_ctx
= ctx
;
649 return the short name for a component of a full name
651 char *pvfs_short_name_component(struct pvfs_state
*pvfs
, const char *name
)
653 return name_map(pvfs
->mangle_ctx
, name
, true, true);
658 return the short name for a given entry in a directory
660 const char *pvfs_short_name(struct pvfs_state
*pvfs
, TALLOC_CTX
*mem_ctx
,
661 struct pvfs_filename
*name
)
663 char *p
= strrchr(name
->full_name
, '/');
664 char *ret
= pvfs_short_name_component(pvfs
, p
+1);
668 talloc_steal(mem_ctx
, ret
);
673 lookup a mangled name, returning the original long name if present
676 char *pvfs_mangled_lookup(struct pvfs_state
*pvfs
, TALLOC_CTX
*mem_ctx
,
679 return check_cache(pvfs
->mangle_ctx
, mem_ctx
, name
);
684 look for a DOS reserved name
686 bool pvfs_is_reserved_name(struct pvfs_state
*pvfs
, const char *name
)
688 return is_reserved_name(pvfs
->mangle_ctx
, name
);
693 see if a component of a filename could be a mangled name from our
696 bool pvfs_is_mangled_component(struct pvfs_state
*pvfs
, const char *name
)
698 return is_mangled_component(pvfs
->mangle_ctx
, name
, strlen(name
));