2 * Copyright (c) 2001 Ian Dowse. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.6 2002/04/10 21:41:14 dwmalone Exp $
26 * $DragonFly: src/sys/vfs/ufs/ufs_dirhash.c,v 1.6 2006/05/26 17:07:48 dillon Exp $
29 * This implements a hash-based lookup scheme for UFS directories.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/fnv_hash.h>
43 #include <sys/vnode.h>
44 #include <sys/mount.h>
45 #include <sys/sysctl.h>
46 #include <vm/vm_zone.h>
53 #include "ufs_extern.h"
54 #include "ffs_extern.h"
56 #define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1))
57 #define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1))
58 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0)
59 #define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n))
61 static MALLOC_DEFINE(M_DIRHASH
, "UFS dirhash", "UFS directory hash tables");
63 SYSCTL_NODE(_vfs
, OID_AUTO
, ufs
, CTLFLAG_RD
, 0, "UFS filesystem");
65 static int ufs_mindirhashsize
= DIRBLKSIZ
* 5;
66 SYSCTL_INT(_vfs_ufs
, OID_AUTO
, dirhash_minsize
, CTLFLAG_RW
,
68 0, "minimum directory size in bytes for which to use hashed lookup");
69 static int ufs_dirhashmaxmem
= 2 * 1024 * 1024;
70 SYSCTL_INT(_vfs_ufs
, OID_AUTO
, dirhash_maxmem
, CTLFLAG_RW
, &ufs_dirhashmaxmem
,
71 0, "maximum allowed dirhash memory usage");
72 static int ufs_dirhashmem
;
73 SYSCTL_INT(_vfs_ufs
, OID_AUTO
, dirhash_mem
, CTLFLAG_RD
, &ufs_dirhashmem
,
74 0, "current dirhash memory usage");
75 static int ufs_dirhashcheck
= 0;
76 SYSCTL_INT(_vfs_ufs
, OID_AUTO
, dirhash_docheck
, CTLFLAG_RW
, &ufs_dirhashcheck
,
77 0, "enable extra sanity tests");
80 static int ufsdirhash_hash(struct dirhash
*dh
, char *name
, int namelen
);
81 static void ufsdirhash_adjfree(struct dirhash
*dh
, doff_t offset
, int diff
);
82 static void ufsdirhash_delslot(struct dirhash
*dh
, int slot
);
83 static int ufsdirhash_findslot(struct dirhash
*dh
, char *name
, int namelen
,
85 static doff_t
ufsdirhash_getprev(struct direct
*dp
, doff_t offset
);
86 static void ufsdirhash_init(void);
87 static int ufsdirhash_recycle(int wanted
);
89 static vm_zone_t ufsdirhash_zone
;
91 /* Dirhash list; recently-used entries are near the tail. */
92 static TAILQ_HEAD(, dirhash
) ufsdirhash_list
;
95 * Attempt to build up a hash table for the directory contents in
96 * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
99 ufsdirhash_build(struct inode
*ip
)
102 struct buf
*bp
= NULL
;
106 int dirblocks
, i
, j
, memreqd
, nblocks
, narrays
, nslots
, slot
;
108 /* Check if we can/should use dirhash. */
109 if (ip
->i_dirhash
== NULL
) {
110 if (ip
->i_size
< ufs_mindirhashsize
|| OFSFMT(ip
->i_vnode
))
113 /* Hash exists, but sysctls could have changed. */
114 if (ip
->i_size
< ufs_mindirhashsize
||
115 ufs_dirhashmem
> ufs_dirhashmaxmem
) {
119 /* Check if hash exists and is intact (note: unlocked read). */
120 if (ip
->i_dirhash
->dh_hash
!= NULL
)
122 /* Free the old, recycled hash and build a new one. */
126 /* Don't hash removed directories. */
127 if (ip
->i_effnlink
== 0)
131 /* Allocate 50% more entries than this dir size could ever need. */
132 KASSERT(ip
->i_size
>= DIRBLKSIZ
, ("ufsdirhash_build size"));
133 nslots
= ip
->i_size
/ DIRECTSIZ(1);
134 nslots
= (nslots
* 3 + 1) / 2;
135 narrays
= howmany(nslots
, DH_NBLKOFF
);
136 nslots
= narrays
* DH_NBLKOFF
;
137 dirblocks
= howmany(ip
->i_size
, DIRBLKSIZ
);
138 nblocks
= (dirblocks
* 3 + 1) / 2;
140 memreqd
= sizeof(*dh
) + narrays
* sizeof(*dh
->dh_hash
) +
141 narrays
* DH_NBLKOFF
* sizeof(**dh
->dh_hash
) +
142 nblocks
* sizeof(*dh
->dh_blkfree
);
143 if (memreqd
+ ufs_dirhashmem
> ufs_dirhashmaxmem
) {
144 if (memreqd
> ufs_dirhashmaxmem
/ 2)
147 /* Try to free some space. */
148 if (ufsdirhash_recycle(memreqd
) != 0)
150 /* Enough was freed. */
152 ufs_dirhashmem
+= memreqd
;
155 * Use non-blocking mallocs so that we will revert to a linear
156 * lookup on failure rather than potentially blocking forever.
158 MALLOC(dh
, struct dirhash
*, sizeof *dh
, M_DIRHASH
, M_NOWAIT
| M_ZERO
);
161 MALLOC(dh
->dh_hash
, doff_t
**, narrays
* sizeof(dh
->dh_hash
[0]),
162 M_DIRHASH
, M_NOWAIT
| M_ZERO
);
163 MALLOC(dh
->dh_blkfree
, uint8_t *, nblocks
* sizeof(dh
->dh_blkfree
[0]),
164 M_DIRHASH
, M_NOWAIT
);
165 if (dh
->dh_hash
== NULL
|| dh
->dh_blkfree
== NULL
)
167 for (i
= 0; i
< narrays
; i
++) {
168 if ((dh
->dh_hash
[i
] = zalloc(ufsdirhash_zone
)) == NULL
)
170 for (j
= 0; j
< DH_NBLKOFF
; j
++)
171 dh
->dh_hash
[i
][j
] = DIRHASH_EMPTY
;
174 /* Initialise the hash table and block statistics. */
175 dh
->dh_narrays
= narrays
;
176 dh
->dh_hlen
= nslots
;
177 dh
->dh_nblk
= nblocks
;
178 dh
->dh_dirblks
= dirblocks
;
179 for (i
= 0; i
< dirblocks
; i
++)
180 dh
->dh_blkfree
[i
] = DIRBLKSIZ
/ DIRALIGN
;
181 for (i
= 0; i
< DH_NFSTATS
; i
++)
182 dh
->dh_firstfree
[i
] = -1;
183 dh
->dh_firstfree
[DH_NFSTATS
] = 0;
186 dh
->dh_score
= DH_SCOREINIT
;
189 bmask
= VFSTOUFS(vp
->v_mount
)->um_mountp
->mnt_stat
.f_iosize
- 1;
191 while (pos
< ip
->i_size
) {
192 /* If necessary, get the next directory block. */
193 if ((pos
& bmask
) == 0) {
196 if (ffs_blkatoff(vp
, (off_t
)pos
, NULL
, &bp
) != 0)
200 /* Add this entry to the hash. */
201 ep
= (struct direct
*)((char *)bp
->b_data
+ (pos
& bmask
));
202 if (ep
->d_reclen
== 0 || ep
->d_reclen
>
203 DIRBLKSIZ
- (pos
& (DIRBLKSIZ
- 1))) {
204 /* Corrupted directory. */
208 if (ep
->d_ino
!= 0) {
209 /* Add the entry (simplified ufsdirhash_add). */
210 slot
= ufsdirhash_hash(dh
, ep
->d_name
, ep
->d_namlen
);
211 while (DH_ENTRY(dh
, slot
) != DIRHASH_EMPTY
)
212 slot
= WRAPINCR(slot
, dh
->dh_hlen
);
214 DH_ENTRY(dh
, slot
) = pos
;
215 ufsdirhash_adjfree(dh
, pos
, -DIRSIZ(0, ep
));
222 TAILQ_INSERT_TAIL(&ufsdirhash_list
, dh
, dh_list
);
227 if (dh
->dh_hash
!= NULL
) {
228 for (i
= 0; i
< narrays
; i
++)
229 if (dh
->dh_hash
[i
] != NULL
)
230 zfree(ufsdirhash_zone
, dh
->dh_hash
[i
]);
231 FREE(dh
->dh_hash
, M_DIRHASH
);
233 if (dh
->dh_blkfree
!= NULL
)
234 FREE(dh
->dh_blkfree
, M_DIRHASH
);
236 ip
->i_dirhash
= NULL
;
237 ufs_dirhashmem
-= memreqd
;
242 * Free any hash table associated with inode 'ip'.
245 ufsdirhash_free(struct inode
*ip
)
250 if ((dh
= ip
->i_dirhash
) == NULL
)
253 TAILQ_REMOVE(&ufsdirhash_list
, dh
, dh_list
);
255 /* The dirhash pointed to by 'dh' is exclusively ours now. */
258 if (dh
->dh_hash
!= NULL
) {
259 for (i
= 0; i
< dh
->dh_narrays
; i
++)
260 zfree(ufsdirhash_zone
, dh
->dh_hash
[i
]);
261 FREE(dh
->dh_hash
, M_DIRHASH
);
262 FREE(dh
->dh_blkfree
, M_DIRHASH
);
263 mem
+= dh
->dh_narrays
* sizeof(*dh
->dh_hash
) +
264 dh
->dh_narrays
* DH_NBLKOFF
* sizeof(**dh
->dh_hash
) +
265 dh
->dh_nblk
* sizeof(*dh
->dh_blkfree
);
268 ip
->i_dirhash
= NULL
;
270 ufs_dirhashmem
-= mem
;
274 * Find the offset of the specified name within the given inode.
275 * Returns 0 on success, ENOENT if the entry does not exist, or
276 * EJUSTRETURN if the caller should revert to a linear search.
278 * If successful, the directory offset is stored in *offp, and a
279 * pointer to a struct buf containing the entry is stored in *bpp. If
280 * prevoffp is non-NULL, the offset of the previous entry within
281 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
282 * is the first in a block, the start of the block is used).
285 ufsdirhash_lookup(struct inode
*ip
, char *name
, int namelen
, doff_t
*offp
,
286 struct buf
**bpp
, doff_t
*prevoffp
)
288 struct dirhash
*dh
, *dh_next
;
292 doff_t blkoff
, bmask
, offset
, prevoff
;
295 if ((dh
= ip
->i_dirhash
) == NULL
)
296 return (EJUSTRETURN
);
298 * Move this dirhash towards the end of the list if it has a
299 * score higher than the next entry.
300 * Optimise the case where it's already the last by performing
301 * an unlocked read of the TAILQ_NEXT pointer.
303 if (TAILQ_NEXT(dh
, dh_list
) != NULL
) {
305 * If the new score will be greater than that of the next
306 * entry, then move this entry past it. With both mutexes
307 * held, dh_next won't go away, but its dh_score could
308 * change; that's not important since it is just a hint.
310 if (dh
->dh_hash
!= NULL
&&
311 (dh_next
= TAILQ_NEXT(dh
, dh_list
)) != NULL
&&
312 dh
->dh_score
>= dh_next
->dh_score
) {
313 KASSERT(dh
->dh_onlist
, ("dirhash: not on list"));
314 TAILQ_REMOVE(&ufsdirhash_list
, dh
, dh_list
);
315 TAILQ_INSERT_AFTER(&ufsdirhash_list
, dh_next
, dh
,
319 if (dh
->dh_hash
== NULL
) {
321 return (EJUSTRETURN
);
324 /* Update the score. */
325 if (dh
->dh_score
< DH_SCOREMAX
)
329 bmask
= VFSTOUFS(vp
->v_mount
)->um_mountp
->mnt_stat
.f_iosize
- 1;
333 slot
= ufsdirhash_hash(dh
, name
, namelen
);
337 * Sequential access optimisation. dh_seqoff contains the
338 * offset of the directory entry immediately following
339 * the last entry that was looked up. Check if this offset
340 * appears in the hash chain for the name we are looking for.
342 for (i
= slot
; (offset
= DH_ENTRY(dh
, i
)) != DIRHASH_EMPTY
;
343 i
= WRAPINCR(i
, dh
->dh_hlen
))
344 if (offset
== dh
->dh_seqoff
)
346 if (offset
== dh
->dh_seqoff
) {
348 * We found an entry with the expected offset. This
349 * is probably the entry we want, but if not, the
350 * code below will turn off seqoff and retry.
357 for (; (offset
= DH_ENTRY(dh
, slot
)) != DIRHASH_EMPTY
;
358 slot
= WRAPINCR(slot
, dh
->dh_hlen
)) {
359 if (offset
== DIRHASH_DEL
)
362 if (offset
< 0 || offset
>= ip
->i_size
)
363 panic("ufsdirhash_lookup: bad offset in hash array");
364 if ((offset
& ~bmask
) != blkoff
) {
367 blkoff
= offset
& ~bmask
;
368 if (ffs_blkatoff(vp
, (off_t
)blkoff
, NULL
, &bp
) != 0)
369 return (EJUSTRETURN
);
371 dp
= (struct direct
*)(bp
->b_data
+ (offset
& bmask
));
372 if (dp
->d_reclen
== 0 || dp
->d_reclen
>
373 DIRBLKSIZ
- (offset
& (DIRBLKSIZ
- 1))) {
374 /* Corrupted directory. */
376 return (EJUSTRETURN
);
378 if (dp
->d_namlen
== namelen
&&
379 bcmp(dp
->d_name
, name
, namelen
) == 0) {
380 /* Found. Get the prev offset if needed. */
381 if (prevoffp
!= NULL
) {
382 if (offset
& (DIRBLKSIZ
- 1)) {
383 prevoff
= ufsdirhash_getprev(dp
,
387 return (EJUSTRETURN
);
394 /* Check for sequential access, and update offset. */
395 if (dh
->dh_seqopt
== 0 && dh
->dh_seqoff
== offset
)
397 dh
->dh_seqoff
= offset
+ DIRSIZ(0, dp
);
404 if (dh
->dh_hash
== NULL
) {
408 return (EJUSTRETURN
);
411 * When the name doesn't match in the seqopt case, go back
412 * and search normally.
425 * Find a directory block with room for 'slotneeded' bytes. Returns
426 * the offset of the directory entry that begins the free space.
427 * This will either be the offset of an existing entry that has free
428 * space at the end, or the offset of an entry with d_ino == 0 at
429 * the start of a DIRBLKSIZ block.
431 * To use the space, the caller may need to compact existing entries in
432 * the directory. The total number of bytes in all of the entries involved
433 * in the compaction is stored in *slotsize. In other words, all of
434 * the entries that must be compacted are exactly contained in the
435 * region beginning at the returned offset and spanning *slotsize bytes.
437 * Returns -1 if no space was found, indicating that the directory
441 ufsdirhash_findfree(struct inode
*ip
, int slotneeded
, int *slotsize
)
446 doff_t pos
, slotstart
;
447 int dirblock
, error
, freebytes
, i
;
449 if ((dh
= ip
->i_dirhash
) == NULL
)
451 if (dh
->dh_hash
== NULL
) {
456 /* Find a directory block with the desired free space. */
458 for (i
= howmany(slotneeded
, DIRALIGN
); i
<= DH_NFSTATS
; i
++)
459 if ((dirblock
= dh
->dh_firstfree
[i
]) != -1)
461 if (dirblock
== -1) {
465 KASSERT(dirblock
< dh
->dh_nblk
&&
466 dh
->dh_blkfree
[dirblock
] >= howmany(slotneeded
, DIRALIGN
),
467 ("ufsdirhash_findfree: bad stats"));
468 pos
= dirblock
* DIRBLKSIZ
;
469 error
= ffs_blkatoff(ip
->i_vnode
, (off_t
)pos
, (char **)&dp
, &bp
);
473 /* Find the first entry with free space. */
474 for (i
= 0; i
< DIRBLKSIZ
; ) {
475 if (dp
->d_reclen
== 0) {
479 if (dp
->d_ino
== 0 || dp
->d_reclen
> DIRSIZ(0, dp
))
482 dp
= (struct direct
*)((char *)dp
+ dp
->d_reclen
);
490 /* Find the range of entries needed to get enough space */
492 while (i
< DIRBLKSIZ
&& freebytes
< slotneeded
) {
493 freebytes
+= dp
->d_reclen
;
495 freebytes
-= DIRSIZ(0, dp
);
496 if (dp
->d_reclen
== 0) {
501 dp
= (struct direct
*)((char *)dp
+ dp
->d_reclen
);
507 if (freebytes
< slotneeded
)
508 panic("ufsdirhash_findfree: free mismatch");
510 *slotsize
= pos
+ i
- slotstart
;
515 * Return the start of the unused space at the end of a directory, or
516 * -1 if there are no trailing unused blocks.
519 ufsdirhash_enduseful(struct inode
*ip
)
524 if ((dh
= ip
->i_dirhash
) == NULL
)
526 if (dh
->dh_hash
== NULL
) {
531 if (dh
->dh_blkfree
[dh
->dh_dirblks
- 1] != DIRBLKSIZ
/ DIRALIGN
) {
535 for (i
= dh
->dh_dirblks
- 1; i
>= 0; i
--)
536 if (dh
->dh_blkfree
[i
] != DIRBLKSIZ
/ DIRALIGN
)
538 return ((doff_t
)(i
+ 1) * DIRBLKSIZ
);
542 * Insert information into the hash about a new directory entry. dirp
543 * points to a struct direct containing the entry, and offset specifies
544 * the offset of this entry.
547 ufsdirhash_add(struct inode
*ip
, struct direct
*dirp
, doff_t offset
)
552 if ((dh
= ip
->i_dirhash
) == NULL
)
554 if (dh
->dh_hash
== NULL
) {
559 KASSERT(offset
< dh
->dh_dirblks
* DIRBLKSIZ
,
560 ("ufsdirhash_add: bad offset"));
562 * Normal hash usage is < 66%. If the usage gets too high then
563 * remove the hash entirely and let it be rebuilt later.
565 if (dh
->dh_hused
>= (dh
->dh_hlen
* 3) / 4) {
570 /* Find a free hash slot (empty or deleted), and add the entry. */
571 slot
= ufsdirhash_hash(dh
, dirp
->d_name
, dirp
->d_namlen
);
572 while (DH_ENTRY(dh
, slot
) >= 0)
573 slot
= WRAPINCR(slot
, dh
->dh_hlen
);
574 if (DH_ENTRY(dh
, slot
) == DIRHASH_EMPTY
)
576 DH_ENTRY(dh
, slot
) = offset
;
578 /* Update the per-block summary info. */
579 ufsdirhash_adjfree(dh
, offset
, -DIRSIZ(0, dirp
));
583 * Remove the specified directory entry from the hash. The entry to remove
584 * is defined by the name in `dirp', which must exist at the specified
585 * `offset' within the directory.
588 ufsdirhash_remove(struct inode
*ip
, struct direct
*dirp
, doff_t offset
)
593 if ((dh
= ip
->i_dirhash
) == NULL
)
595 if (dh
->dh_hash
== NULL
) {
600 KASSERT(offset
< dh
->dh_dirblks
* DIRBLKSIZ
,
601 ("ufsdirhash_remove: bad offset"));
603 slot
= ufsdirhash_findslot(dh
, dirp
->d_name
, dirp
->d_namlen
, offset
);
605 /* Remove the hash entry. */
606 ufsdirhash_delslot(dh
, slot
);
608 /* Update the per-block summary info. */
609 ufsdirhash_adjfree(dh
, offset
, DIRSIZ(0, dirp
));
613 * Change the offset associated with a directory entry in the hash. Used
614 * when compacting directory blocks.
617 ufsdirhash_move(struct inode
*ip
, struct direct
*dirp
, doff_t oldoff
,
623 if ((dh
= ip
->i_dirhash
) == NULL
)
625 if (dh
->dh_hash
== NULL
) {
630 KASSERT(oldoff
< dh
->dh_dirblks
* DIRBLKSIZ
&&
631 newoff
< dh
->dh_dirblks
* DIRBLKSIZ
,
632 ("ufsdirhash_move: bad offset"));
633 /* Find the entry, and update the offset. */
634 slot
= ufsdirhash_findslot(dh
, dirp
->d_name
, dirp
->d_namlen
, oldoff
);
635 DH_ENTRY(dh
, slot
) = newoff
;
639 * Inform dirhash that the directory has grown by one block that
640 * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
643 ufsdirhash_newblk(struct inode
*ip
, doff_t offset
)
648 if ((dh
= ip
->i_dirhash
) == NULL
)
650 if (dh
->dh_hash
== NULL
) {
655 KASSERT(offset
== dh
->dh_dirblks
* DIRBLKSIZ
,
656 ("ufsdirhash_newblk: bad offset"));
657 block
= offset
/ DIRBLKSIZ
;
658 if (block
>= dh
->dh_nblk
) {
659 /* Out of space; must rebuild. */
663 dh
->dh_dirblks
= block
+ 1;
665 /* Account for the new free block. */
666 dh
->dh_blkfree
[block
] = DIRBLKSIZ
/ DIRALIGN
;
667 if (dh
->dh_firstfree
[DH_NFSTATS
] == -1)
668 dh
->dh_firstfree
[DH_NFSTATS
] = block
;
672 * Inform dirhash that the directory is being truncated.
675 ufsdirhash_dirtrunc(struct inode
*ip
, doff_t offset
)
680 if ((dh
= ip
->i_dirhash
) == NULL
)
682 if (dh
->dh_hash
== NULL
) {
687 KASSERT(offset
<= dh
->dh_dirblks
* DIRBLKSIZ
,
688 ("ufsdirhash_dirtrunc: bad offset"));
689 block
= howmany(offset
, DIRBLKSIZ
);
691 * If the directory shrinks to less than 1/8 of dh_nblk blocks
692 * (about 20% of its original size due to the 50% extra added in
693 * ufsdirhash_build) then free it, and let the caller rebuild
696 if (block
< dh
->dh_nblk
/ 8 && dh
->dh_narrays
> 1) {
702 * Remove any `first free' information pertaining to the
703 * truncated blocks. All blocks we're removing should be
706 if (dh
->dh_firstfree
[DH_NFSTATS
] >= block
)
707 dh
->dh_firstfree
[DH_NFSTATS
] = -1;
708 for (i
= block
; i
< dh
->dh_dirblks
; i
++)
709 if (dh
->dh_blkfree
[i
] != DIRBLKSIZ
/ DIRALIGN
)
710 panic("ufsdirhash_dirtrunc: blocks in use");
711 for (i
= 0; i
< DH_NFSTATS
; i
++)
712 if (dh
->dh_firstfree
[i
] >= block
)
713 panic("ufsdirhash_dirtrunc: first free corrupt");
714 dh
->dh_dirblks
= block
;
718 * Debugging function to check that the dirhash information about
719 * a directory block matches its actual contents. Panics if a mismatch
722 * On entry, `buf' should point to the start of an in-core
723 * DIRBLKSIZ-sized directory block, and `offset' should contain the
724 * offset from the start of the directory of that block.
727 ufsdirhash_checkblock(struct inode
*ip
, char *buf
, doff_t offset
)
731 int block
, ffslot
, i
, nfree
;
733 if (!ufs_dirhashcheck
)
735 if ((dh
= ip
->i_dirhash
) == NULL
)
737 if (dh
->dh_hash
== NULL
) {
742 block
= offset
/ DIRBLKSIZ
;
743 if ((offset
& (DIRBLKSIZ
- 1)) != 0 || block
>= dh
->dh_dirblks
)
744 panic("ufsdirhash_checkblock: bad offset");
747 for (i
= 0; i
< DIRBLKSIZ
; i
+= dp
->d_reclen
) {
748 dp
= (struct direct
*)(buf
+ i
);
749 if (dp
->d_reclen
== 0 || i
+ dp
->d_reclen
> DIRBLKSIZ
)
750 panic("ufsdirhash_checkblock: bad dir");
752 if (dp
->d_ino
== 0) {
755 * XXX entries with d_ino == 0 should only occur
756 * at the start of a DIRBLKSIZ block. However the
757 * ufs code is tolerant of such entries at other
758 * offsets, and fsck does not fix them.
761 panic("ufsdirhash_checkblock: bad dir inode");
763 nfree
+= dp
->d_reclen
;
767 /* Check that the entry exists (will panic if it doesn't). */
768 ufsdirhash_findslot(dh
, dp
->d_name
, dp
->d_namlen
, offset
+ i
);
770 nfree
+= dp
->d_reclen
- DIRSIZ(0, dp
);
773 panic("ufsdirhash_checkblock: bad dir end");
775 if (dh
->dh_blkfree
[block
] * DIRALIGN
!= nfree
)
776 panic("ufsdirhash_checkblock: bad free count");
778 ffslot
= BLKFREE2IDX(nfree
/ DIRALIGN
);
779 for (i
= 0; i
<= DH_NFSTATS
; i
++)
780 if (dh
->dh_firstfree
[i
] == block
&& i
!= ffslot
)
781 panic("ufsdirhash_checkblock: bad first-free");
782 if (dh
->dh_firstfree
[ffslot
] == -1)
783 panic("ufsdirhash_checkblock: missing first-free entry");
787 * Hash the specified filename into a dirhash slot.
790 ufsdirhash_hash(struct dirhash
*dh
, char *name
, int namelen
)
795 * We hash the name and then some ofther bit of data which is
796 * invarient over the dirhash's lifetime. Otherwise names
797 * differing only in the last byte are placed close to one
798 * another in the table, which is bad for linear probing.
800 hash
= fnv_32_buf(name
, namelen
, FNV1_32_INIT
);
801 hash
= fnv_32_buf(dh
, sizeof(dh
), hash
);
802 return (hash
% dh
->dh_hlen
);
806 * Adjust the number of free bytes in the block containing `offset'
807 * by the value specified by `diff'.
809 * The caller must ensure we have exclusive access to `dh'.
812 ufsdirhash_adjfree(struct dirhash
*dh
, doff_t offset
, int diff
)
814 int block
, i
, nfidx
, ofidx
;
816 /* Update the per-block summary info. */
817 block
= offset
/ DIRBLKSIZ
;
818 KASSERT(block
< dh
->dh_nblk
&& block
< dh
->dh_dirblks
,
819 ("dirhash bad offset"));
820 ofidx
= BLKFREE2IDX(dh
->dh_blkfree
[block
]);
821 dh
->dh_blkfree
[block
] = (int)dh
->dh_blkfree
[block
] + (diff
/ DIRALIGN
);
822 nfidx
= BLKFREE2IDX(dh
->dh_blkfree
[block
]);
824 /* Update the `first free' list if necessary. */
825 if (ofidx
!= nfidx
) {
826 /* If removing, scan forward for the next block. */
827 if (dh
->dh_firstfree
[ofidx
] == block
) {
828 for (i
= block
+ 1; i
< dh
->dh_dirblks
; i
++)
829 if (BLKFREE2IDX(dh
->dh_blkfree
[i
]) == ofidx
)
831 dh
->dh_firstfree
[ofidx
] = (i
< dh
->dh_dirblks
) ? i
: -1;
834 /* Make this the new `first free' if necessary */
835 if (dh
->dh_firstfree
[nfidx
] > block
||
836 dh
->dh_firstfree
[nfidx
] == -1)
837 dh
->dh_firstfree
[nfidx
] = block
;
842 * Find the specified name which should have the specified offset.
843 * Returns a slot number, and panics on failure.
845 * `dh' must be locked on entry and remains so on return.
848 ufsdirhash_findslot(struct dirhash
*dh
, char *name
, int namelen
, doff_t offset
)
852 /* Find the entry. */
853 KASSERT(dh
->dh_hused
< dh
->dh_hlen
, ("dirhash find full"));
854 slot
= ufsdirhash_hash(dh
, name
, namelen
);
855 while (DH_ENTRY(dh
, slot
) != offset
&&
856 DH_ENTRY(dh
, slot
) != DIRHASH_EMPTY
)
857 slot
= WRAPINCR(slot
, dh
->dh_hlen
);
858 if (DH_ENTRY(dh
, slot
) != offset
)
859 panic("ufsdirhash_findslot: '%.*s' not found", namelen
, name
);
865 * Remove the entry corresponding to the specified slot from the hash array.
867 * `dh' must be locked on entry and remains so on return.
870 ufsdirhash_delslot(struct dirhash
*dh
, int slot
)
874 /* Mark the entry as deleted. */
875 DH_ENTRY(dh
, slot
) = DIRHASH_DEL
;
877 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
878 for (i
= slot
; DH_ENTRY(dh
, i
) == DIRHASH_DEL
; )
879 i
= WRAPINCR(i
, dh
->dh_hlen
);
880 if (DH_ENTRY(dh
, i
) == DIRHASH_EMPTY
) {
881 i
= WRAPDECR(i
, dh
->dh_hlen
);
882 while (DH_ENTRY(dh
, i
) == DIRHASH_DEL
) {
883 DH_ENTRY(dh
, i
) = DIRHASH_EMPTY
;
885 i
= WRAPDECR(i
, dh
->dh_hlen
);
887 KASSERT(dh
->dh_hused
>= 0, ("ufsdirhash_delslot neg hlen"));
892 * Given a directory entry and its offset, find the offset of the
893 * previous entry in the same DIRBLKSIZ-sized block. Returns an
894 * offset, or -1 if there is no previous entry in the block or some
895 * other problem occurred.
898 ufsdirhash_getprev(struct direct
*dirp
, doff_t offset
)
902 doff_t blkoff
, prevoff
;
905 blkoff
= offset
& ~(DIRBLKSIZ
- 1); /* offset of start of block */
906 entrypos
= offset
& (DIRBLKSIZ
- 1); /* entry relative to block */
907 blkbuf
= (char *)dirp
- entrypos
;
910 /* If `offset' is the start of a block, there is no previous entry. */
914 /* Scan from the start of the block until we get to the entry. */
915 for (i
= 0; i
< entrypos
; i
+= dp
->d_reclen
) {
916 dp
= (struct direct
*)(blkbuf
+ i
);
917 if (dp
->d_reclen
== 0 || i
+ dp
->d_reclen
> entrypos
)
918 return (-1); /* Corrupted directory. */
919 prevoff
= blkoff
+ i
;
925 * Try to free up `wanted' bytes by stealing memory from existing
926 * dirhashes. Returns zero if successful.
929 ufsdirhash_recycle(int wanted
)
936 while (wanted
+ ufs_dirhashmem
> ufs_dirhashmaxmem
) {
937 /* Find a dirhash, and lock it. */
938 if ((dh
= TAILQ_FIRST(&ufsdirhash_list
)) == NULL
) {
941 KASSERT(dh
->dh_hash
!= NULL
, ("dirhash: NULL hash on list"));
943 /* Decrement the score; only recycle if it becomes zero. */
944 if (--dh
->dh_score
> 0) {
948 /* Remove it from the list and detach its memory. */
949 TAILQ_REMOVE(&ufsdirhash_list
, dh
, dh_list
);
953 blkfree
= dh
->dh_blkfree
;
954 dh
->dh_blkfree
= NULL
;
955 narrays
= dh
->dh_narrays
;
956 mem
= narrays
* sizeof(*dh
->dh_hash
) +
957 narrays
* DH_NBLKOFF
* sizeof(**dh
->dh_hash
) +
958 dh
->dh_nblk
* sizeof(*dh
->dh_blkfree
);
960 /* Free the detached memory. */
961 for (i
= 0; i
< narrays
; i
++)
962 zfree(ufsdirhash_zone
, hash
[i
]);
963 FREE(hash
, M_DIRHASH
);
964 FREE(blkfree
, M_DIRHASH
);
966 /* Account for the returned memory, and repeat if necessary. */
967 ufs_dirhashmem
-= mem
;
975 ufsdirhash_init(void)
977 ufsdirhash_zone
= zinit("DIRHASH", DH_NBLKOFF
* sizeof(daddr_t
), 0,
979 TAILQ_INIT(&ufsdirhash_list
);
981 SYSINIT(ufsdirhash
, SI_SUB_PSEUDO
, SI_ORDER_ANY
, ufsdirhash_init
, NULL
)
984 #endif /* UFS_DIRHASH */