2 * Copyright (c) 2001 Ian Dowse. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.6 2002/04/10 21:41:14 dwmalone Exp $
28 * This implements a hash-based lookup scheme for UFS directories.
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/fnv_hash.h>
42 #include <sys/vnode.h>
43 #include <sys/mount.h>
44 #include <sys/sysctl.h>
45 #include <sys/objcache.h>
52 #include "ufs_extern.h"
53 #include "ffs_extern.h"
55 #define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1))
56 #define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1))
57 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0)
58 #define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n))
60 static MALLOC_DEFINE(M_DIRHASH
, "UFS dirhash", "UFS directory hash tables");
62 SYSCTL_NODE(_vfs
, OID_AUTO
, ufs
, CTLFLAG_RD
, 0, "UFS filesystem");
64 static int ufs_mindirhashsize
= DIRBLKSIZ
* 5;
65 SYSCTL_INT(_vfs_ufs
, OID_AUTO
, dirhash_minsize
, CTLFLAG_RW
,
67 0, "minimum directory size in bytes for which to use hashed lookup");
68 static int ufs_dirhashmaxmem
= 2 * 1024 * 1024;
69 SYSCTL_INT(_vfs_ufs
, OID_AUTO
, dirhash_maxmem
, CTLFLAG_RW
, &ufs_dirhashmaxmem
,
70 0, "maximum allowed dirhash memory usage");
71 static int ufs_dirhashmem
;
72 SYSCTL_INT(_vfs_ufs
, OID_AUTO
, dirhash_mem
, CTLFLAG_RD
, &ufs_dirhashmem
,
73 0, "current dirhash memory usage");
74 static int ufs_dirhashcheck
= 0;
75 SYSCTL_INT(_vfs_ufs
, OID_AUTO
, dirhash_docheck
, CTLFLAG_RW
, &ufs_dirhashcheck
,
76 0, "enable extra sanity tests");
79 static int ufsdirhash_hash(struct dirhash
*dh
, char *name
, int namelen
);
80 static void ufsdirhash_adjfree(struct dirhash
*dh
, doff_t offset
, int diff
);
81 static void ufsdirhash_delslot(struct dirhash
*dh
, int slot
);
82 static int ufsdirhash_findslot(struct dirhash
*dh
, char *name
, int namelen
,
84 static doff_t
ufsdirhash_getprev(struct direct
*dp
, doff_t offset
);
85 static void ufsdirhash_init(void);
86 static int ufsdirhash_recycle(int wanted
);
88 static struct objcache
*ufsdirhash_oc
;
90 /* Dirhash list; recently-used entries are near the tail. */
91 static TAILQ_HEAD(, dirhash
) ufsdirhash_list
;
94 * Attempt to build up a hash table for the directory contents in
95 * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
98 ufsdirhash_build(struct inode
*ip
)
101 struct buf
*bp
= NULL
;
105 int dirblocks
, i
, j
, memreqd
, nblocks
, narrays
, nslots
, slot
;
107 /* Check if we can/should use dirhash. */
108 if (ip
->i_dirhash
== NULL
) {
109 if (ip
->i_size
< ufs_mindirhashsize
|| OFSFMT(ip
->i_vnode
))
112 /* Hash exists, but sysctls could have changed. */
113 if (ip
->i_size
< ufs_mindirhashsize
||
114 ufs_dirhashmem
> ufs_dirhashmaxmem
) {
118 /* Check if hash exists and is intact (note: unlocked read). */
119 if (ip
->i_dirhash
->dh_hash
!= NULL
)
121 /* Free the old, recycled hash and build a new one. */
125 /* Don't hash removed directories. */
126 if (ip
->i_effnlink
== 0)
130 /* Allocate 50% more entries than this dir size could ever need. */
131 KASSERT(ip
->i_size
>= DIRBLKSIZ
, ("ufsdirhash_build size"));
132 nslots
= ip
->i_size
/ DIRECTSIZ(1);
133 nslots
= (nslots
* 3 + 1) / 2;
134 narrays
= howmany(nslots
, DH_NBLKOFF
);
135 nslots
= narrays
* DH_NBLKOFF
;
136 dirblocks
= howmany(ip
->i_size
, DIRBLKSIZ
);
137 nblocks
= (dirblocks
* 3 + 1) / 2;
139 memreqd
= sizeof(*dh
) + narrays
* sizeof(*dh
->dh_hash
) +
140 narrays
* DH_NBLKOFF
* sizeof(**dh
->dh_hash
) +
141 nblocks
* sizeof(*dh
->dh_blkfree
);
142 if (memreqd
+ ufs_dirhashmem
> ufs_dirhashmaxmem
) {
143 if (memreqd
> ufs_dirhashmaxmem
/ 2)
146 /* Try to free some space. */
147 if (ufsdirhash_recycle(memreqd
) != 0)
149 /* Enough was freed. */
151 ufs_dirhashmem
+= memreqd
;
154 * Use non-blocking mallocs so that we will revert to a linear
155 * lookup on failure rather than potentially blocking forever.
157 dh
= kmalloc(sizeof *dh
, M_DIRHASH
, M_NOWAIT
| M_ZERO
);
160 dh
->dh_hash
= kmalloc(narrays
* sizeof(dh
->dh_hash
[0]), M_DIRHASH
,
162 dh
->dh_blkfree
= kmalloc(nblocks
* sizeof(dh
->dh_blkfree
[0]),
163 M_DIRHASH
, M_NOWAIT
);
164 if (dh
->dh_hash
== NULL
|| dh
->dh_blkfree
== NULL
)
166 for (i
= 0; i
< narrays
; i
++) {
167 if ((dh
->dh_hash
[i
] = objcache_get(ufsdirhash_oc
, M_WAITOK
)) == NULL
)
169 for (j
= 0; j
< DH_NBLKOFF
; j
++)
170 dh
->dh_hash
[i
][j
] = DIRHASH_EMPTY
;
173 /* Initialise the hash table and block statistics. */
174 dh
->dh_narrays
= narrays
;
175 dh
->dh_hlen
= nslots
;
176 dh
->dh_nblk
= nblocks
;
177 dh
->dh_dirblks
= dirblocks
;
178 for (i
= 0; i
< dirblocks
; i
++)
179 dh
->dh_blkfree
[i
] = DIRBLKSIZ
/ DIRALIGN
;
180 for (i
= 0; i
< DH_NFSTATS
; i
++)
181 dh
->dh_firstfree
[i
] = -1;
182 dh
->dh_firstfree
[DH_NFSTATS
] = 0;
185 dh
->dh_score
= DH_SCOREINIT
;
188 bmask
= VFSTOUFS(vp
->v_mount
)->um_mountp
->mnt_stat
.f_iosize
- 1;
190 while (pos
< ip
->i_size
) {
191 /* If necessary, get the next directory block. */
192 if ((pos
& bmask
) == 0) {
195 if (ffs_blkatoff(vp
, (off_t
)pos
, NULL
, &bp
) != 0)
199 /* Add this entry to the hash. */
200 ep
= (struct direct
*)((char *)bp
->b_data
+ (pos
& bmask
));
201 if (ep
->d_reclen
== 0 || ep
->d_reclen
>
202 DIRBLKSIZ
- (pos
& (DIRBLKSIZ
- 1))) {
203 /* Corrupted directory. */
207 if (ep
->d_ino
!= 0) {
208 /* Add the entry (simplified ufsdirhash_add). */
209 slot
= ufsdirhash_hash(dh
, ep
->d_name
, ep
->d_namlen
);
210 while (DH_ENTRY(dh
, slot
) != DIRHASH_EMPTY
)
211 slot
= WRAPINCR(slot
, dh
->dh_hlen
);
213 DH_ENTRY(dh
, slot
) = pos
;
214 ufsdirhash_adjfree(dh
, pos
, -DIRSIZ(0, ep
));
221 TAILQ_INSERT_TAIL(&ufsdirhash_list
, dh
, dh_list
);
226 if (dh
->dh_hash
!= NULL
) {
227 for (i
= 0; i
< narrays
; i
++)
228 if (dh
->dh_hash
[i
] != NULL
)
229 objcache_put(ufsdirhash_oc
, dh
->dh_hash
[i
]);
230 kfree(dh
->dh_hash
, M_DIRHASH
);
232 if (dh
->dh_blkfree
!= NULL
)
233 kfree(dh
->dh_blkfree
, M_DIRHASH
);
234 kfree(dh
, M_DIRHASH
);
235 ip
->i_dirhash
= NULL
;
236 ufs_dirhashmem
-= memreqd
;
241 * Free any hash table associated with inode 'ip'.
244 ufsdirhash_free(struct inode
*ip
)
249 if ((dh
= ip
->i_dirhash
) == NULL
)
252 TAILQ_REMOVE(&ufsdirhash_list
, dh
, dh_list
);
254 /* The dirhash pointed to by 'dh' is exclusively ours now. */
257 if (dh
->dh_hash
!= NULL
) {
258 for (i
= 0; i
< dh
->dh_narrays
; i
++)
259 objcache_put(ufsdirhash_oc
, dh
->dh_hash
[i
]);
260 kfree(dh
->dh_hash
, M_DIRHASH
);
261 kfree(dh
->dh_blkfree
, M_DIRHASH
);
262 mem
+= dh
->dh_narrays
* sizeof(*dh
->dh_hash
) +
263 dh
->dh_narrays
* DH_NBLKOFF
* sizeof(**dh
->dh_hash
) +
264 dh
->dh_nblk
* sizeof(*dh
->dh_blkfree
);
266 kfree(dh
, M_DIRHASH
);
267 ip
->i_dirhash
= NULL
;
269 ufs_dirhashmem
-= mem
;
273 * Find the offset of the specified name within the given inode.
274 * Returns 0 on success, ENOENT if the entry does not exist, or
275 * EJUSTRETURN if the caller should revert to a linear search.
277 * If successful, the directory offset is stored in *offp, and a
278 * pointer to a struct buf containing the entry is stored in *bpp. If
279 * prevoffp is non-NULL, the offset of the previous entry within
280 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
281 * is the first in a block, the start of the block is used).
284 ufsdirhash_lookup(struct inode
*ip
, char *name
, int namelen
, doff_t
*offp
,
285 struct buf
**bpp
, doff_t
*prevoffp
)
287 struct dirhash
*dh
, *dh_next
;
291 doff_t blkoff
, bmask
, offset
, prevoff
;
294 if ((dh
= ip
->i_dirhash
) == NULL
)
295 return (EJUSTRETURN
);
297 * Move this dirhash towards the end of the list if it has a
298 * score higher than the next entry.
299 * Optimise the case where it's already the last by performing
300 * an unlocked read of the TAILQ_NEXT pointer.
302 if (TAILQ_NEXT(dh
, dh_list
) != NULL
) {
304 * If the new score will be greater than that of the next
305 * entry, then move this entry past it. With both mutexes
306 * held, dh_next won't go away, but its dh_score could
307 * change; that's not important since it is just a hint.
309 if (dh
->dh_hash
!= NULL
&&
310 (dh_next
= TAILQ_NEXT(dh
, dh_list
)) != NULL
&&
311 dh
->dh_score
>= dh_next
->dh_score
) {
312 KASSERT(dh
->dh_onlist
, ("dirhash: not on list"));
313 TAILQ_REMOVE(&ufsdirhash_list
, dh
, dh_list
);
314 TAILQ_INSERT_AFTER(&ufsdirhash_list
, dh_next
, dh
,
318 if (dh
->dh_hash
== NULL
) {
320 return (EJUSTRETURN
);
323 /* Update the score. */
324 if (dh
->dh_score
< DH_SCOREMAX
)
328 bmask
= VFSTOUFS(vp
->v_mount
)->um_mountp
->mnt_stat
.f_iosize
- 1;
332 slot
= ufsdirhash_hash(dh
, name
, namelen
);
336 * Sequential access optimisation. dh_seqoff contains the
337 * offset of the directory entry immediately following
338 * the last entry that was looked up. Check if this offset
339 * appears in the hash chain for the name we are looking for.
341 for (i
= slot
; (offset
= DH_ENTRY(dh
, i
)) != DIRHASH_EMPTY
;
342 i
= WRAPINCR(i
, dh
->dh_hlen
))
343 if (offset
== dh
->dh_seqoff
)
345 if (offset
== dh
->dh_seqoff
) {
347 * We found an entry with the expected offset. This
348 * is probably the entry we want, but if not, the
349 * code below will turn off seqoff and retry.
356 for (; (offset
= DH_ENTRY(dh
, slot
)) != DIRHASH_EMPTY
;
357 slot
= WRAPINCR(slot
, dh
->dh_hlen
)) {
358 if (offset
== DIRHASH_DEL
)
361 if (offset
< 0 || offset
>= ip
->i_size
)
362 panic("ufsdirhash_lookup: bad offset in hash array");
363 if ((offset
& ~bmask
) != blkoff
) {
366 blkoff
= offset
& ~bmask
;
367 if (ffs_blkatoff(vp
, (off_t
)blkoff
, NULL
, &bp
) != 0)
368 return (EJUSTRETURN
);
370 dp
= (struct direct
*)(bp
->b_data
+ (offset
& bmask
));
371 if (dp
->d_reclen
== 0 || dp
->d_reclen
>
372 DIRBLKSIZ
- (offset
& (DIRBLKSIZ
- 1))) {
373 /* Corrupted directory. */
375 return (EJUSTRETURN
);
377 if (dp
->d_namlen
== namelen
&&
378 bcmp(dp
->d_name
, name
, namelen
) == 0) {
379 /* Found. Get the prev offset if needed. */
380 if (prevoffp
!= NULL
) {
381 if (offset
& (DIRBLKSIZ
- 1)) {
382 prevoff
= ufsdirhash_getprev(dp
,
386 return (EJUSTRETURN
);
393 /* Check for sequential access, and update offset. */
394 if (dh
->dh_seqopt
== 0 && dh
->dh_seqoff
== offset
)
396 dh
->dh_seqoff
= offset
+ DIRSIZ(0, dp
);
403 if (dh
->dh_hash
== NULL
) {
407 return (EJUSTRETURN
);
410 * When the name doesn't match in the seqopt case, go back
411 * and search normally.
424 * Find a directory block with room for 'slotneeded' bytes. Returns
425 * the offset of the directory entry that begins the free space.
426 * This will either be the offset of an existing entry that has free
427 * space at the end, or the offset of an entry with d_ino == 0 at
428 * the start of a DIRBLKSIZ block.
430 * To use the space, the caller may need to compact existing entries in
431 * the directory. The total number of bytes in all of the entries involved
432 * in the compaction is stored in *slotsize. In other words, all of
433 * the entries that must be compacted are exactly contained in the
434 * region beginning at the returned offset and spanning *slotsize bytes.
436 * Returns -1 if no space was found, indicating that the directory
440 ufsdirhash_findfree(struct inode
*ip
, int slotneeded
, int *slotsize
)
445 doff_t pos
, slotstart
;
446 int dirblock
, error
, freebytes
, i
;
448 if ((dh
= ip
->i_dirhash
) == NULL
)
450 if (dh
->dh_hash
== NULL
) {
455 /* Find a directory block with the desired free space. */
457 for (i
= howmany(slotneeded
, DIRALIGN
); i
<= DH_NFSTATS
; i
++)
458 if ((dirblock
= dh
->dh_firstfree
[i
]) != -1)
460 if (dirblock
== -1) {
464 KASSERT(dirblock
< dh
->dh_nblk
&&
465 dh
->dh_blkfree
[dirblock
] >= howmany(slotneeded
, DIRALIGN
),
466 ("ufsdirhash_findfree: bad stats"));
467 pos
= dirblock
* DIRBLKSIZ
;
468 error
= ffs_blkatoff(ip
->i_vnode
, (off_t
)pos
, (char **)&dp
, &bp
);
472 /* Find the first entry with free space. */
473 for (i
= 0; i
< DIRBLKSIZ
; ) {
474 if (dp
->d_reclen
== 0) {
478 if (dp
->d_ino
== 0 || dp
->d_reclen
> DIRSIZ(0, dp
))
481 dp
= (struct direct
*)((char *)dp
+ dp
->d_reclen
);
489 /* Find the range of entries needed to get enough space */
491 while (i
< DIRBLKSIZ
&& freebytes
< slotneeded
) {
492 freebytes
+= dp
->d_reclen
;
494 freebytes
-= DIRSIZ(0, dp
);
495 if (dp
->d_reclen
== 0) {
500 dp
= (struct direct
*)((char *)dp
+ dp
->d_reclen
);
506 if (freebytes
< slotneeded
)
507 panic("ufsdirhash_findfree: free mismatch");
509 *slotsize
= pos
+ i
- slotstart
;
514 * Return the start of the unused space at the end of a directory, or
515 * -1 if there are no trailing unused blocks.
518 ufsdirhash_enduseful(struct inode
*ip
)
523 if ((dh
= ip
->i_dirhash
) == NULL
)
525 if (dh
->dh_hash
== NULL
) {
530 if (dh
->dh_blkfree
[dh
->dh_dirblks
- 1] != DIRBLKSIZ
/ DIRALIGN
) {
534 for (i
= dh
->dh_dirblks
- 1; i
>= 0; i
--)
535 if (dh
->dh_blkfree
[i
] != DIRBLKSIZ
/ DIRALIGN
)
537 return ((doff_t
)(i
+ 1) * DIRBLKSIZ
);
541 * Insert information into the hash about a new directory entry. dirp
542 * points to a struct direct containing the entry, and offset specifies
543 * the offset of this entry.
546 ufsdirhash_add(struct inode
*ip
, struct direct
*dirp
, doff_t offset
)
551 if ((dh
= ip
->i_dirhash
) == NULL
)
553 if (dh
->dh_hash
== NULL
) {
558 KASSERT(offset
< dh
->dh_dirblks
* DIRBLKSIZ
,
559 ("ufsdirhash_add: bad offset"));
561 * Normal hash usage is < 66%. If the usage gets too high then
562 * remove the hash entirely and let it be rebuilt later.
564 if (dh
->dh_hused
>= (dh
->dh_hlen
* 3) / 4) {
569 /* Find a free hash slot (empty or deleted), and add the entry. */
570 slot
= ufsdirhash_hash(dh
, dirp
->d_name
, dirp
->d_namlen
);
571 while (DH_ENTRY(dh
, slot
) >= 0)
572 slot
= WRAPINCR(slot
, dh
->dh_hlen
);
573 if (DH_ENTRY(dh
, slot
) == DIRHASH_EMPTY
)
575 DH_ENTRY(dh
, slot
) = offset
;
577 /* Update the per-block summary info. */
578 ufsdirhash_adjfree(dh
, offset
, -DIRSIZ(0, dirp
));
582 * Remove the specified directory entry from the hash. The entry to remove
583 * is defined by the name in `dirp', which must exist at the specified
584 * `offset' within the directory.
587 ufsdirhash_remove(struct inode
*ip
, struct direct
*dirp
, doff_t offset
)
592 if ((dh
= ip
->i_dirhash
) == NULL
)
594 if (dh
->dh_hash
== NULL
) {
599 KASSERT(offset
< dh
->dh_dirblks
* DIRBLKSIZ
,
600 ("ufsdirhash_remove: bad offset"));
602 slot
= ufsdirhash_findslot(dh
, dirp
->d_name
, dirp
->d_namlen
, offset
);
604 /* Remove the hash entry. */
605 ufsdirhash_delslot(dh
, slot
);
607 /* Update the per-block summary info. */
608 ufsdirhash_adjfree(dh
, offset
, DIRSIZ(0, dirp
));
612 * Change the offset associated with a directory entry in the hash. Used
613 * when compacting directory blocks.
616 ufsdirhash_move(struct inode
*ip
, struct direct
*dirp
, doff_t oldoff
,
622 if ((dh
= ip
->i_dirhash
) == NULL
)
624 if (dh
->dh_hash
== NULL
) {
629 KASSERT(oldoff
< dh
->dh_dirblks
* DIRBLKSIZ
&&
630 newoff
< dh
->dh_dirblks
* DIRBLKSIZ
,
631 ("ufsdirhash_move: bad offset"));
632 /* Find the entry, and update the offset. */
633 slot
= ufsdirhash_findslot(dh
, dirp
->d_name
, dirp
->d_namlen
, oldoff
);
634 DH_ENTRY(dh
, slot
) = newoff
;
638 * Inform dirhash that the directory has grown by one block that
639 * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
642 ufsdirhash_newblk(struct inode
*ip
, doff_t offset
)
647 if ((dh
= ip
->i_dirhash
) == NULL
)
649 if (dh
->dh_hash
== NULL
) {
654 KASSERT(offset
== dh
->dh_dirblks
* DIRBLKSIZ
,
655 ("ufsdirhash_newblk: bad offset"));
656 block
= offset
/ DIRBLKSIZ
;
657 if (block
>= dh
->dh_nblk
) {
658 /* Out of space; must rebuild. */
662 dh
->dh_dirblks
= block
+ 1;
664 /* Account for the new free block. */
665 dh
->dh_blkfree
[block
] = DIRBLKSIZ
/ DIRALIGN
;
666 if (dh
->dh_firstfree
[DH_NFSTATS
] == -1)
667 dh
->dh_firstfree
[DH_NFSTATS
] = block
;
671 * Inform dirhash that the directory is being truncated.
674 ufsdirhash_dirtrunc(struct inode
*ip
, doff_t offset
)
679 if ((dh
= ip
->i_dirhash
) == NULL
)
681 if (dh
->dh_hash
== NULL
) {
686 KASSERT(offset
<= dh
->dh_dirblks
* DIRBLKSIZ
,
687 ("ufsdirhash_dirtrunc: bad offset"));
688 block
= howmany(offset
, DIRBLKSIZ
);
690 * If the directory shrinks to less than 1/8 of dh_nblk blocks
691 * (about 20% of its original size due to the 50% extra added in
692 * ufsdirhash_build) then free it, and let the caller rebuild
695 if (block
< dh
->dh_nblk
/ 8 && dh
->dh_narrays
> 1) {
701 * Remove any `first free' information pertaining to the
702 * truncated blocks. All blocks we're removing should be
705 if (dh
->dh_firstfree
[DH_NFSTATS
] >= block
)
706 dh
->dh_firstfree
[DH_NFSTATS
] = -1;
707 for (i
= block
; i
< dh
->dh_dirblks
; i
++)
708 if (dh
->dh_blkfree
[i
] != DIRBLKSIZ
/ DIRALIGN
)
709 panic("ufsdirhash_dirtrunc: blocks in use");
710 for (i
= 0; i
< DH_NFSTATS
; i
++)
711 if (dh
->dh_firstfree
[i
] >= block
)
712 panic("ufsdirhash_dirtrunc: first free corrupt");
713 dh
->dh_dirblks
= block
;
717 * Debugging function to check that the dirhash information about
718 * a directory block matches its actual contents. Panics if a mismatch
721 * On entry, `buf' should point to the start of an in-core
722 * DIRBLKSIZ-sized directory block, and `offset' should contain the
723 * offset from the start of the directory of that block.
726 ufsdirhash_checkblock(struct inode
*ip
, char *buf
, doff_t offset
)
730 int block
, ffslot
, i
, nfree
;
732 if (!ufs_dirhashcheck
)
734 if ((dh
= ip
->i_dirhash
) == NULL
)
736 if (dh
->dh_hash
== NULL
) {
741 block
= offset
/ DIRBLKSIZ
;
742 if ((offset
& (DIRBLKSIZ
- 1)) != 0 || block
>= dh
->dh_dirblks
)
743 panic("ufsdirhash_checkblock: bad offset");
746 for (i
= 0; i
< DIRBLKSIZ
; i
+= dp
->d_reclen
) {
747 dp
= (struct direct
*)(buf
+ i
);
748 if (dp
->d_reclen
== 0 || i
+ dp
->d_reclen
> DIRBLKSIZ
)
749 panic("ufsdirhash_checkblock: bad dir");
751 if (dp
->d_ino
== 0) {
754 * XXX entries with d_ino == 0 should only occur
755 * at the start of a DIRBLKSIZ block. However the
756 * ufs code is tolerant of such entries at other
757 * offsets, and fsck does not fix them.
760 panic("ufsdirhash_checkblock: bad dir inode");
762 nfree
+= dp
->d_reclen
;
766 /* Check that the entry exists (will panic if it doesn't). */
767 ufsdirhash_findslot(dh
, dp
->d_name
, dp
->d_namlen
, offset
+ i
);
769 nfree
+= dp
->d_reclen
- DIRSIZ(0, dp
);
772 panic("ufsdirhash_checkblock: bad dir end");
774 if (dh
->dh_blkfree
[block
] * DIRALIGN
!= nfree
)
775 panic("ufsdirhash_checkblock: bad free count");
777 ffslot
= BLKFREE2IDX(nfree
/ DIRALIGN
);
778 for (i
= 0; i
<= DH_NFSTATS
; i
++)
779 if (dh
->dh_firstfree
[i
] == block
&& i
!= ffslot
)
780 panic("ufsdirhash_checkblock: bad first-free");
781 if (dh
->dh_firstfree
[ffslot
] == -1)
782 panic("ufsdirhash_checkblock: missing first-free entry");
786 * Hash the specified filename into a dirhash slot.
789 ufsdirhash_hash(struct dirhash
*dh
, char *name
, int namelen
)
794 * We hash the name and then some other bit of data that is
795 * invariant over the dirhash's lifetime. Otherwise names
796 * differing only in the last byte are placed close to one
797 * another in the table, which is bad for linear probing.
799 hash
= fnv_32_buf(name
, namelen
, FNV1_32_INIT
);
800 hash
= fnv_32_buf(&dh
, sizeof(dh
), hash
);
801 return (hash
% dh
->dh_hlen
);
805 * Adjust the number of free bytes in the block containing `offset'
806 * by the value specified by `diff'.
808 * The caller must ensure we have exclusive access to `dh'.
811 ufsdirhash_adjfree(struct dirhash
*dh
, doff_t offset
, int diff
)
813 int block
, i
, nfidx
, ofidx
;
815 /* Update the per-block summary info. */
816 block
= offset
/ DIRBLKSIZ
;
817 KASSERT(block
< dh
->dh_nblk
&& block
< dh
->dh_dirblks
,
818 ("dirhash bad offset"));
819 ofidx
= BLKFREE2IDX(dh
->dh_blkfree
[block
]);
820 dh
->dh_blkfree
[block
] = (int)dh
->dh_blkfree
[block
] + (diff
/ DIRALIGN
);
821 nfidx
= BLKFREE2IDX(dh
->dh_blkfree
[block
]);
823 /* Update the `first free' list if necessary. */
824 if (ofidx
!= nfidx
) {
825 /* If removing, scan forward for the next block. */
826 if (dh
->dh_firstfree
[ofidx
] == block
) {
827 for (i
= block
+ 1; i
< dh
->dh_dirblks
; i
++)
828 if (BLKFREE2IDX(dh
->dh_blkfree
[i
]) == ofidx
)
830 dh
->dh_firstfree
[ofidx
] = (i
< dh
->dh_dirblks
) ? i
: -1;
833 /* Make this the new `first free' if necessary */
834 if (dh
->dh_firstfree
[nfidx
] > block
||
835 dh
->dh_firstfree
[nfidx
] == -1)
836 dh
->dh_firstfree
[nfidx
] = block
;
841 * Find the specified name which should have the specified offset.
842 * Returns a slot number, and panics on failure.
844 * `dh' must be locked on entry and remains so on return.
847 ufsdirhash_findslot(struct dirhash
*dh
, char *name
, int namelen
, doff_t offset
)
851 /* Find the entry. */
852 KASSERT(dh
->dh_hused
< dh
->dh_hlen
, ("dirhash find full"));
853 slot
= ufsdirhash_hash(dh
, name
, namelen
);
854 while (DH_ENTRY(dh
, slot
) != offset
&&
855 DH_ENTRY(dh
, slot
) != DIRHASH_EMPTY
)
856 slot
= WRAPINCR(slot
, dh
->dh_hlen
);
857 if (DH_ENTRY(dh
, slot
) != offset
)
858 panic("ufsdirhash_findslot: '%.*s' not found", namelen
, name
);
864 * Remove the entry corresponding to the specified slot from the hash array.
866 * `dh' must be locked on entry and remains so on return.
869 ufsdirhash_delslot(struct dirhash
*dh
, int slot
)
873 /* Mark the entry as deleted. */
874 DH_ENTRY(dh
, slot
) = DIRHASH_DEL
;
876 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
877 for (i
= slot
; DH_ENTRY(dh
, i
) == DIRHASH_DEL
; )
878 i
= WRAPINCR(i
, dh
->dh_hlen
);
879 if (DH_ENTRY(dh
, i
) == DIRHASH_EMPTY
) {
880 i
= WRAPDECR(i
, dh
->dh_hlen
);
881 while (DH_ENTRY(dh
, i
) == DIRHASH_DEL
) {
882 DH_ENTRY(dh
, i
) = DIRHASH_EMPTY
;
884 i
= WRAPDECR(i
, dh
->dh_hlen
);
886 KASSERT(dh
->dh_hused
>= 0, ("ufsdirhash_delslot neg hlen"));
891 * Given a directory entry and its offset, find the offset of the
892 * previous entry in the same DIRBLKSIZ-sized block. Returns an
893 * offset, or -1 if there is no previous entry in the block or some
894 * other problem occurred.
897 ufsdirhash_getprev(struct direct
*dirp
, doff_t offset
)
901 doff_t blkoff
, prevoff
;
904 blkoff
= offset
& ~(DIRBLKSIZ
- 1); /* offset of start of block */
905 entrypos
= offset
& (DIRBLKSIZ
- 1); /* entry relative to block */
906 blkbuf
= (char *)dirp
- entrypos
;
909 /* If `offset' is the start of a block, there is no previous entry. */
913 /* Scan from the start of the block until we get to the entry. */
914 for (i
= 0; i
< entrypos
; i
+= dp
->d_reclen
) {
915 dp
= (struct direct
*)(blkbuf
+ i
);
916 if (dp
->d_reclen
== 0 || i
+ dp
->d_reclen
> entrypos
)
917 return (-1); /* Corrupted directory. */
918 prevoff
= blkoff
+ i
;
924 * Try to free up `wanted' bytes by stealing memory from existing
925 * dirhashes. Returns zero if successful.
928 ufsdirhash_recycle(int wanted
)
935 while (wanted
+ ufs_dirhashmem
> ufs_dirhashmaxmem
) {
936 /* Find a dirhash, and lock it. */
937 if ((dh
= TAILQ_FIRST(&ufsdirhash_list
)) == NULL
) {
940 KASSERT(dh
->dh_hash
!= NULL
, ("dirhash: NULL hash on list"));
942 /* Decrement the score; only recycle if it becomes zero. */
943 if (--dh
->dh_score
> 0) {
947 /* Remove it from the list and detach its memory. */
948 TAILQ_REMOVE(&ufsdirhash_list
, dh
, dh_list
);
952 blkfree
= dh
->dh_blkfree
;
953 dh
->dh_blkfree
= NULL
;
954 narrays
= dh
->dh_narrays
;
955 mem
= narrays
* sizeof(*dh
->dh_hash
) +
956 narrays
* DH_NBLKOFF
* sizeof(**dh
->dh_hash
) +
957 dh
->dh_nblk
* sizeof(*dh
->dh_blkfree
);
959 /* Free the detached memory. */
960 for (i
= 0; i
< narrays
; i
++)
961 objcache_put(ufsdirhash_oc
, hash
[i
]);
962 kfree(hash
, M_DIRHASH
);
963 kfree(blkfree
, M_DIRHASH
);
965 /* Account for the returned memory, and repeat if necessary. */
966 ufs_dirhashmem
-= mem
;
974 ufsdirhash_init(void)
976 ufsdirhash_oc
= objcache_create_simple(M_DIRHASH
,
977 DH_NBLKOFF
* sizeof(daddr_t
));
978 TAILQ_INIT(&ufsdirhash_list
);
980 SYSINIT(ufsdirhash
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, ufsdirhash_init
, NULL
);
983 #endif /* UFS_DIRHASH */