4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
26 /* All Rights Reserved */
29 * University Copyright- Copyright (c) 1982, 1986, 1988
30 * The Regents of the University of California
33 * University Acknowledgment- Portions of this document are derived from
34 * software developed by the University of California, Berkeley, and its
38 #include <sys/types.h>
39 #include <sys/systm.h>
40 #include <sys/param.h>
41 #include <sys/t_lock.h>
42 #include <sys/systm.h>
44 #include <sys/vnode.h>
47 #include <sys/cmn_err.h>
48 #include <sys/vtrace.h>
49 #include <sys/bitmap.h>
51 #include <sys/sysmacros.h>
52 #include <sys/kstat.h>
53 #include <sys/atomic.h>
54 #include <sys/taskq.h>
57 * Directory name lookup cache.
58 * Based on code originally done by Robert Elz at Melbourne.
60 * Names found by directory scans are retained in a cache
61 * for future reference. Each hash chain is ordered by LRU
62 * Cache is indexed by hash value obtained from (vp, name)
63 * where the vp refers to the directory containing the name.
67 * We want to be able to identify files that are referenced only by the DNLC.
68 * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD,
69 * since multiple DNLC references should only be counted once in v_count. This
70 * file contains only two(2) calls to VN_HOLD, renamed VN_HOLD_CALLER in the
71 * hope that no one will mistakenly add a VN_HOLD to this file. (Unfortunately
72 * it is not possible to #undef VN_HOLD and retain VN_HOLD_CALLER. Ideally a
73 * Makefile rule would grep uncommented C tokens to check that VN_HOLD is
74 * referenced only once in this file, to define VN_HOLD_CALLER.)
76 #define VN_HOLD_CALLER VN_HOLD
77 #define VN_HOLD_DNLC(vp) { \
78 mutex_enter(&(vp)->v_lock); \
79 if ((vp)->v_count_dnlc == 0) \
81 (vp)->v_count_dnlc++; \
82 mutex_exit(&(vp)->v_lock); \
84 #define VN_RELE_DNLC(vp) { \
89 * Tunable nc_hashavelen is the average length desired for this chain, from
90 * which the size of the nc_hash table is derived at create time.
92 #define NC_HASHAVELEN_DEFAULT 4
93 int nc_hashavelen
= NC_HASHAVELEN_DEFAULT
;
96 * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
97 * depth exceeds this value, we move the looked-up entry to the front of
98 * its hash chain. The idea is to make sure that the most frequently
99 * accessed entries are found most quickly (by keeping them near the
100 * front of their hash chains).
102 #define NC_MOVETOFRONT 2
106 * DNLC_MAX_RELE is used to size an array on the stack when releasing
107 * vnodes. This array is used rather than calling VN_RELE() inline because
108 * all dnlc locks must be dropped by that time in order to avoid a
109 * possible deadlock. This deadlock occurs when the dnlc holds the last
110 * reference to the vnode and so the VOP_INACTIVE vector is called which
111 * can in turn call back into the dnlc. A global array was used but had
113 * 1) Actually doesn't have an upper bound on the array size as
114 * entries can be added after starting the purge.
115 * 2) The locking scheme causes a hang.
116 * 3) Caused serialisation on the global lock.
117 * 4) The array was often unnecessarily huge.
119 * Note the current value 8 allows up to 4 cache entries (to be purged
120 * from each hash chain), before having to cycle around and retry.
121 * This ought to be ample given that nc_hashavelen is typically very small.
123 #define DNLC_MAX_RELE 8 /* must be even */
126 * Hash table of name cache entries for fast lookup, dynamically
127 * allocated at startup.
132 * Rotors. Used to select entries on a round-robin basis.
134 static nc_hash_t
*dnlc_purge_fs1_rotor
;
135 static nc_hash_t
*dnlc_free_rotor
;
138 * # of dnlc entries (uninitialized)
140 * the initial value was chosen as being
141 * a random string of bits, probably not
142 * normally chosen by a systems administrator
145 volatile uint32_t dnlc_nentries
= 0; /* current num of name cache entries */
146 static int nc_hashsz
; /* size of hash table */
147 static int nc_hashmask
; /* size of hash table minus 1 */
150 * The dnlc_reduce_cache() taskq queue is activated when there are
151 * ncsize name cache entries and if no parameter is provided, it reduces
152 * the size down to dnlc_nentries_low_water, which is by default one
153 * hundreth less (or 99%) of ncsize.
155 * If a parameter is provided to dnlc_reduce_cache(), then we reduce
156 * the size down based on ncsize_onepercent - where ncsize_onepercent
157 * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce
158 * the size below 3% of ncsize (ncsize_min_percent).
160 #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100
161 uint_t dnlc_low_water_divisor
= DNLC_LOW_WATER_DIVISOR_DEFAULT
;
162 uint_t dnlc_nentries_low_water
;
163 int dnlc_reduce_idle
= 1; /* no locking needed */
164 uint_t ncsize_onepercent
;
165 uint_t ncsize_min_percent
;
168 * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
169 * then this means the dnlc_reduce_cache() taskq is failing to
170 * keep up. In this case we refuse to add new entries to the dnlc
171 * until the taskq catches up.
173 uint_t dnlc_max_nentries
; /* twice ncsize */
174 uint64_t dnlc_max_nentries_cnt
= 0; /* statistic on times we failed */
177 * Tunable to define when we should just remove items from
178 * the end of the chain.
180 #define DNLC_LONG_CHAIN 8
181 uint_t dnlc_long_chain
= DNLC_LONG_CHAIN
;
184 * ncstats has been deprecated, due to the integer size of the counters
185 * which can easily overflow in the dnlc.
186 * It is maintained (at some expense) for compatability.
187 * The preferred interface is the kstat accessible nc_stats below.
189 struct ncstats ncstats
;
191 struct nc_stats ncs
= {
192 { "hits", KSTAT_DATA_UINT64
},
193 { "misses", KSTAT_DATA_UINT64
},
194 { "negative_cache_hits", KSTAT_DATA_UINT64
},
195 { "enters", KSTAT_DATA_UINT64
},
196 { "double_enters", KSTAT_DATA_UINT64
},
197 { "purge_total_entries", KSTAT_DATA_UINT64
},
198 { "purge_all", KSTAT_DATA_UINT64
},
199 { "purge_vp", KSTAT_DATA_UINT64
},
200 { "purge_vfs", KSTAT_DATA_UINT64
},
201 { "purge_fs1", KSTAT_DATA_UINT64
},
202 { "pick_free", KSTAT_DATA_UINT64
},
203 { "pick_heuristic", KSTAT_DATA_UINT64
},
204 { "pick_last", KSTAT_DATA_UINT64
},
206 /* directory caching stats */
208 { "dir_hits", KSTAT_DATA_UINT64
},
209 { "dir_misses", KSTAT_DATA_UINT64
},
210 { "dir_cached_current", KSTAT_DATA_UINT64
},
211 { "dir_entries_cached_current", KSTAT_DATA_UINT64
},
212 { "dir_cached_total", KSTAT_DATA_UINT64
},
213 { "dir_start_no_memory", KSTAT_DATA_UINT64
},
214 { "dir_add_no_memory", KSTAT_DATA_UINT64
},
215 { "dir_add_abort", KSTAT_DATA_UINT64
},
216 { "dir_add_max", KSTAT_DATA_UINT64
},
217 { "dir_remove_entry_fail", KSTAT_DATA_UINT64
},
218 { "dir_remove_space_fail", KSTAT_DATA_UINT64
},
219 { "dir_update_fail", KSTAT_DATA_UINT64
},
220 { "dir_fini_purge", KSTAT_DATA_UINT64
},
221 { "dir_reclaim_last", KSTAT_DATA_UINT64
},
222 { "dir_reclaim_any", KSTAT_DATA_UINT64
},
225 static int doingcache
= 1;
227 vnode_t negative_cache_vnode
;
230 * Insert entry at the front of the queue
232 #define nc_inshash(ncp, hp) \
234 (ncp)->hash_next = (hp)->hash_next; \
235 (ncp)->hash_prev = (ncache_t *)(hp); \
236 (hp)->hash_next->hash_prev = (ncp); \
237 (hp)->hash_next = (ncp); \
241 * Remove entry from hash queue
243 #define nc_rmhash(ncp) \
245 (ncp)->hash_prev->hash_next = (ncp)->hash_next; \
246 (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
247 (ncp)->hash_prev = NULL; \
248 (ncp)->hash_next = NULL; \
254 #define dnlc_free(ncp) \
256 kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
257 atomic_dec_32(&dnlc_nentries); \
262 * Cached directory info.
263 * ======================
267 * Cached directory free space hash function.
268 * Needs the free space handle and the dcp to get the hash table size
269 * Returns the hash index.
271 #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
274 * Cached directory name entry hash function.
275 * Uses the name and returns in the input arguments the hash and the name
278 #define DNLC_DIR_HASH(name, hash, namelen) \
283 for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \
284 hash = (hash << 4) + hash + Xc; \
285 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1)); \
286 namelen = Xcp - (name); \
289 /* special dircache_t pointer to indicate error should be returned */
291 * The anchor directory cache pointer can contain 3 types of values,
292 * 1) NULL: No directory cache
293 * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
294 * too big or a memory shortage occurred. This value remains in the
295 * pointer until a dnlc_dir_start() which returns the a DNOMEM error.
296 * This is kludgy but efficient and only visible in this source file.
297 * 3) A valid cache pointer.
299 #define DC_RET_LOW_MEM (dircache_t *)1
300 #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
303 uint_t dnlc_dir_enable
= 1; /* disable caching directories by setting to 0 */
304 uint_t dnlc_dir_min_size
= 40; /* min no of directory entries before caching */
305 uint_t dnlc_dir_max_size
= UINT_MAX
; /* ditto maximum */
306 uint_t dnlc_dir_hash_size_shift
= 3; /* 8 entries per hash bucket */
307 uint_t dnlc_dir_min_reclaim
= 350000; /* approx 1MB of dcentrys */
309 * dnlc_dir_hash_resize_shift determines when the hash tables
310 * get re-adjusted due to growth or shrinkage
311 * - currently 2 indicating that there can be at most 4
312 * times or at least one quarter the number of entries
313 * before hash table readjustment. Note that with
314 * dnlc_dir_hash_size_shift above set at 3 this would
315 * mean readjustment would occur if the average number
316 * of entries went above 32 or below 2
318 uint_t dnlc_dir_hash_resize_shift
= 2; /* readjust rate */
320 static kmem_cache_t
*dnlc_dir_space_cache
; /* free space entry cache */
321 static dchead_t dc_head
; /* anchor of cached directories */
324 static ncache_t
*dnlc_get(uchar_t namlen
);
325 static ncache_t
*dnlc_search(vnode_t
*dp
, const char *name
, uchar_t namlen
,
327 static void dnlc_dir_reclaim(void *unused
);
328 static void dnlc_dir_abort(dircache_t
*dcp
);
329 static void dnlc_dir_adjust_fhash(dircache_t
*dcp
);
330 static void dnlc_dir_adjust_nhash(dircache_t
*dcp
);
331 static void do_dnlc_reduce_cache(void *);
335 * Initialize the directory cache.
345 * Set up the size of the dnlc (ncsize) and its low water mark.
348 /* calculate a reasonable size for the low water */
349 dnlc_nentries_low_water
= 4 * (v
.v_proc
+ maxusers
) + 320;
350 ncsize
= dnlc_nentries_low_water
+
351 (dnlc_nentries_low_water
/ dnlc_low_water_divisor
);
353 /* don't change the user specified ncsize */
354 dnlc_nentries_low_water
=
355 ncsize
- (ncsize
/ dnlc_low_water_divisor
);
359 dnlc_dir_enable
= 0; /* also disable directory caching */
361 cmn_err(CE_NOTE
, "name cache (dnlc) disabled");
364 dnlc_max_nentries
= ncsize
* 2;
365 ncsize_onepercent
= ncsize
/ 100;
366 ncsize_min_percent
= ncsize_onepercent
* 3;
369 * Initialise the hash table.
370 * Compute hash size rounding to the next power of two.
372 nc_hashsz
= ncsize
/ nc_hashavelen
;
373 nc_hashsz
= 1 << highbit(nc_hashsz
);
374 nc_hashmask
= nc_hashsz
- 1;
375 nc_hash
= kmem_zalloc(nc_hashsz
* sizeof (*nc_hash
), KM_SLEEP
);
376 for (i
= 0; i
< nc_hashsz
; i
++) {
377 hp
= (nc_hash_t
*)&nc_hash
[i
];
378 mutex_init(&hp
->hash_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
379 hp
->hash_next
= (ncache_t
*)hp
;
380 hp
->hash_prev
= (ncache_t
*)hp
;
386 dnlc_free_rotor
= dnlc_purge_fs1_rotor
= &nc_hash
[0];
389 * Set up the directory caching to use kmem_cache_alloc
390 * for its free space entries so that we can get a callback
391 * when the system is short on memory, to allow us to free
392 * up some memory. we don't use the constructor/deconstructor
395 dnlc_dir_space_cache
= kmem_cache_create("dnlc_space_cache",
396 sizeof (dcfree_t
), 0, NULL
, NULL
, dnlc_dir_reclaim
, NULL
,
400 * Initialise the head of the cached directory structures
402 mutex_init(&dc_head
.dch_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
403 dc_head
.dch_next
= (dircache_t
*)&dc_head
;
404 dc_head
.dch_prev
= (dircache_t
*)&dc_head
;
407 * Initialise the reference count of the negative cache vnode to 1
408 * so that it never goes away (VOP_INACTIVE isn't called on it).
410 negative_cache_vnode
.v_count
= 1;
411 negative_cache_vnode
.v_count_dnlc
= 0;
414 * Initialise kstats - both the old compatability raw kind and
415 * the more extensive named stats.
417 ksp
= kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW
,
418 sizeof (struct ncstats
), KSTAT_FLAG_VIRTUAL
);
420 ksp
->ks_data
= (void *) &ncstats
;
423 ksp
= kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED
,
424 sizeof (ncs
) / sizeof (kstat_named_t
), KSTAT_FLAG_VIRTUAL
);
426 ksp
->ks_data
= (void *) &ncs
;
432 * Add a name to the directory cache.
435 dnlc_enter(vnode_t
*dp
, const char *name
, vnode_t
*vp
)
442 TRACE_0(TR_FAC_NFS
, TR_DNLC_ENTER_START
, "dnlc_enter_start:");
445 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
446 "dnlc_enter_end:(%S) %d", "not caching", 0);
451 * Get a new dnlc entry. Assume the entry won't be in the cache
452 * and initialize it now
454 DNLCHASH(name
, dp
, hash
, namlen
);
455 if ((ncp
= dnlc_get(namlen
)) == NULL
)
461 bcopy(name
, ncp
->name
, namlen
+ 1); /* name and null */
463 hp
= &nc_hash
[hash
& nc_hashmask
];
465 mutex_enter(&hp
->hash_lock
);
466 if (dnlc_search(dp
, name
, namlen
, hash
) != NULL
) {
467 mutex_exit(&hp
->hash_lock
);
468 ncstats
.dbl_enters
++;
469 ncs
.ncs_dbl_enters
.value
.ui64
++;
472 dnlc_free(ncp
); /* crfree done here */
473 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
474 "dnlc_enter_end:(%S) %d", "dbl enter", ncstats
.dbl_enters
);
478 * Insert back into the hash chain.
481 mutex_exit(&hp
->hash_lock
);
483 ncs
.ncs_enters
.value
.ui64
++;
484 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
485 "dnlc_enter_end:(%S) %d", "done", ncstats
.enters
);
489 * Add a name to the directory cache.
491 * This function is basically identical with
492 * dnlc_enter(). The difference is that when the
493 * desired dnlc entry is found, the vnode in the
494 * ncache is compared with the vnode passed in.
496 * If they are not equal then the ncache is
497 * updated with the passed in vnode. Otherwise
498 * it just frees up the newly allocated dnlc entry.
501 dnlc_update(vnode_t
*dp
, const char *name
, vnode_t
*vp
)
510 TRACE_0(TR_FAC_NFS
, TR_DNLC_ENTER_START
, "dnlc_update_start:");
513 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
514 "dnlc_update_end:(%S) %d", "not caching", 0);
519 * Get a new dnlc entry and initialize it now.
520 * If we fail to get a new entry, call dnlc_remove() to purge
521 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
523 * Failure to clear an existing entry could result in false dnlc
524 * lookup (negative/stale entry).
526 DNLCHASH(name
, dp
, hash
, namlen
);
527 if ((ncp
= dnlc_get(namlen
)) == NULL
) {
528 dnlc_remove(dp
, name
);
535 bcopy(name
, ncp
->name
, namlen
+ 1); /* name and null */
537 hp
= &nc_hash
[hash
& nc_hashmask
];
539 mutex_enter(&hp
->hash_lock
);
540 if ((tcp
= dnlc_search(dp
, name
, namlen
, hash
)) != NULL
) {
544 mutex_exit(&hp
->hash_lock
);
547 ncs
.ncs_enters
.value
.ui64
++;
548 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
549 "dnlc_update_end:(%S) %d", "done", ncstats
.enters
);
551 mutex_exit(&hp
->hash_lock
);
553 ncstats
.dbl_enters
++;
554 ncs
.ncs_dbl_enters
.value
.ui64
++;
555 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
556 "dnlc_update_end:(%S) %d",
557 "dbl enter", ncstats
.dbl_enters
);
560 dnlc_free(ncp
); /* crfree done here */
564 * insert the new entry, since it is not in dnlc yet
567 mutex_exit(&hp
->hash_lock
);
569 ncs
.ncs_enters
.value
.ui64
++;
570 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
571 "dnlc_update_end:(%S) %d", "done", ncstats
.enters
);
575 * Look up a name in the directory name cache.
577 * Return a doubly-held vnode if found: one hold so that it may
578 * remain in the cache for other users, the other hold so that
579 * the cache is not re-cycled and the identity of the vnode is
580 * lost before the caller can use the vnode.
583 dnlc_lookup(vnode_t
*dp
, const char *name
)
591 TRACE_2(TR_FAC_NFS
, TR_DNLC_LOOKUP_START
,
592 "dnlc_lookup_start:dp %x name %s", dp
, name
);
595 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
596 "dnlc_lookup_end:%S %d vp %x name %s",
597 "not_caching", 0, NULL
, name
);
601 DNLCHASH(name
, dp
, hash
, namlen
);
603 hp
= &nc_hash
[hash
& nc_hashmask
];
604 mutex_enter(&hp
->hash_lock
);
606 for (ncp
= hp
->hash_next
; ncp
!= (ncache_t
*)hp
;
607 ncp
= ncp
->hash_next
) {
608 if (ncp
->hash
== hash
&& /* fast signature check */
610 ncp
->namlen
== namlen
&&
611 bcmp(ncp
->name
, name
, namlen
) == 0) {
613 * Move this entry to the head of its hash chain
614 * if it's not already close.
616 if (depth
> NC_MOVETOFRONT
) {
617 ncache_t
*next
= ncp
->hash_next
;
618 ncache_t
*prev
= ncp
->hash_prev
;
620 prev
->hash_next
= next
;
621 next
->hash_prev
= prev
;
622 ncp
->hash_next
= next
= hp
->hash_next
;
623 ncp
->hash_prev
= (ncache_t
*)hp
;
624 next
->hash_prev
= ncp
;
627 ncstats
.move_to_front
++;
631 * Put a hold on the vnode now so its identity
632 * can't change before the caller has a chance to
636 VN_HOLD_CALLER(vp
); /* VN_HOLD 1 of 2 in this file */
637 mutex_exit(&hp
->hash_lock
);
639 ncs
.ncs_hits
.value
.ui64
++;
640 if (vp
== DNLC_NO_VNODE
) {
641 ncs
.ncs_neg_hits
.value
.ui64
++;
643 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
644 "dnlc_lookup_end:%S %d vp %x name %s", "hit",
645 ncstats
.hits
, vp
, name
);
651 mutex_exit(&hp
->hash_lock
);
653 ncs
.ncs_misses
.value
.ui64
++;
654 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
655 "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats
.misses
,
661 * Remove an entry in the directory name cache.
664 dnlc_remove(vnode_t
*dp
, const char *name
)
673 DNLCHASH(name
, dp
, hash
, namlen
);
674 hp
= &nc_hash
[hash
& nc_hashmask
];
676 mutex_enter(&hp
->hash_lock
);
677 if (ncp
= dnlc_search(dp
, name
, namlen
, hash
)) {
682 mutex_exit(&hp
->hash_lock
);
683 VN_RELE_DNLC(ncp
->vp
);
684 VN_RELE_DNLC(ncp
->dp
);
688 mutex_exit(&hp
->hash_lock
);
692 * Purge the entire cache.
701 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
707 ncs
.ncs_purge_all
.value
.ui64
++;
709 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
711 mutex_enter(&nch
->hash_lock
);
712 ncp
= nch
->hash_next
;
713 while (ncp
!= (ncache_t
*)nch
) {
717 nc_rele
[index
++] = ncp
->vp
;
718 nc_rele
[index
++] = ncp
->dp
;
723 ncs
.ncs_purge_total
.value
.ui64
++;
724 if (index
== DNLC_MAX_RELE
)
727 mutex_exit(&nch
->hash_lock
);
729 /* Release holds on all the vnodes now that we have no locks */
730 for (i
= 0; i
< index
; i
++) {
731 VN_RELE_DNLC(nc_rele
[i
]);
733 if (ncp
!= (ncache_t
*)nch
) {
734 nch
--; /* Do current hash chain again */
740 * Purge any cache entries referencing a vnode. Exit as soon as the dnlc
741 * reference count goes to zero (the caller still holds a reference).
744 dnlc_purge_vp(vnode_t
*vp
)
749 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
751 ASSERT(vp
->v_count
> 0);
752 if (vp
->v_count_dnlc
== 0) {
760 ncs
.ncs_purge_vp
.value
.ui64
++;
762 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
764 mutex_enter(&nch
->hash_lock
);
765 ncp
= nch
->hash_next
;
766 while (ncp
!= (ncache_t
*)nch
) {
770 if (ncp
->dp
== vp
|| ncp
->vp
== vp
) {
771 nc_rele
[index
++] = ncp
->vp
;
772 nc_rele
[index
++] = ncp
->dp
;
775 ncs
.ncs_purge_total
.value
.ui64
++;
776 if (index
== DNLC_MAX_RELE
) {
783 mutex_exit(&nch
->hash_lock
);
785 /* Release holds on all the vnodes now that we have no locks */
787 VN_RELE_DNLC(nc_rele
[--index
]);
790 if (vp
->v_count_dnlc
== 0) {
794 if (ncp
!= (ncache_t
*)nch
) {
795 nch
--; /* Do current hash chain again */
801 * Purge cache entries referencing a vfsp. Caller supplies a count
802 * of entries to purge; up to that many will be freed. A count of
803 * zero indicates that all such entries should be purged. Returns
804 * the number of entries that were purged.
807 dnlc_purge_vfsp(vfs_t
*vfsp
, int count
)
814 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
820 ncs
.ncs_purge_vfs
.value
.ui64
++;
822 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
824 mutex_enter(&nch
->hash_lock
);
825 ncp
= nch
->hash_next
;
826 while (ncp
!= (ncache_t
*)nch
) {
830 ASSERT(ncp
->dp
!= NULL
);
831 ASSERT(ncp
->vp
!= NULL
);
832 if ((ncp
->dp
->v_vfsp
== vfsp
) ||
833 (ncp
->vp
->v_vfsp
== vfsp
)) {
835 nc_rele
[index
++] = ncp
->vp
;
836 nc_rele
[index
++] = ncp
->dp
;
839 ncs
.ncs_purge_total
.value
.ui64
++;
840 if (index
== DNLC_MAX_RELE
) {
844 if (count
!= 0 && n
>= count
) {
850 mutex_exit(&nch
->hash_lock
);
851 /* Release holds on all the vnodes now that we have no locks */
852 for (i
= 0; i
< index
; i
++) {
853 VN_RELE_DNLC(nc_rele
[i
]);
855 if (count
!= 0 && n
>= count
) {
858 if (ncp
!= (ncache_t
*)nch
) {
859 nch
--; /* Do current hash chain again */
866 * Purge 1 entry from the dnlc that is part of the filesystem(s)
867 * represented by 'vop'. The purpose of this routine is to allow
868 * users of the dnlc to free a vnode that is being held by the dnlc.
870 * If we find a vnode that we release which will result in
871 * freeing the underlying vnode (count was 1), return 1, 0
872 * if no appropriate vnodes found.
874 * Note, vop is not the 'right' identifier for a filesystem.
877 dnlc_fs_purge1(vnodeops_t
*vop
)
887 ncs
.ncs_purge_fs1
.value
.ui64
++;
890 * Scan the dnlc entries looking for a likely candidate.
892 hp
= end
= dnlc_purge_fs1_rotor
;
895 if (++hp
== &nc_hash
[nc_hashsz
])
897 dnlc_purge_fs1_rotor
= hp
;
898 if (hp
->hash_next
== (ncache_t
*)hp
)
900 mutex_enter(&hp
->hash_lock
);
901 for (ncp
= hp
->hash_prev
;
902 ncp
!= (ncache_t
*)hp
;
903 ncp
= ncp
->hash_prev
) {
905 if (!vn_has_cached_data(vp
) && (vp
->v_count
== 1) &&
906 vn_matchops(vp
, vop
))
909 if (ncp
!= (ncache_t
*)hp
) {
911 mutex_exit(&hp
->hash_lock
);
912 VN_RELE_DNLC(ncp
->dp
);
915 ncs
.ncs_purge_total
.value
.ui64
++;
918 mutex_exit(&hp
->hash_lock
);
924 * Perform a reverse lookup in the DNLC. This will find the first occurrence of
925 * the vnode. If successful, it will return the vnode of the parent, and the
926 * name of the entry in the given buffer. If it cannot be found, or the buffer
927 * is too small, then it will return NULL. Note that this is a highly
928 * inefficient function, since the DNLC is constructed solely for forward
932 dnlc_reverse_lookup(vnode_t
*vp
, char *buf
, size_t buflen
)
941 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
942 mutex_enter(&nch
->hash_lock
);
943 ncp
= nch
->hash_next
;
944 while (ncp
!= (ncache_t
*)nch
) {
946 * We ignore '..' entries since it can create
947 * confusion and infinite loops.
949 if (ncp
->vp
== vp
&& !(ncp
->namlen
== 2 &&
950 0 == bcmp(ncp
->name
, "..", 2)) &&
951 ncp
->namlen
< buflen
) {
952 bcopy(ncp
->name
, buf
, ncp
->namlen
);
953 buf
[ncp
->namlen
] = '\0';
955 /* VN_HOLD 2 of 2 in this file */
957 mutex_exit(&nch
->hash_lock
);
960 ncp
= ncp
->hash_next
;
962 mutex_exit(&nch
->hash_lock
);
968 * Utility routine to search for a cache entry. Return the
969 * ncache entry if found, NULL otherwise.
972 dnlc_search(vnode_t
*dp
, const char *name
, uchar_t namlen
, int hash
)
977 hp
= &nc_hash
[hash
& nc_hashmask
];
979 for (ncp
= hp
->hash_next
; ncp
!= (ncache_t
*)hp
; ncp
= ncp
->hash_next
) {
980 if (ncp
->hash
== hash
&&
982 ncp
->namlen
== namlen
&&
983 bcmp(ncp
->name
, name
, namlen
) == 0)
989 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
990 #error ncache_t name length representation is too small
994 dnlc_reduce_cache(void *reduce_percent
)
996 if (dnlc_reduce_idle
&& (dnlc_nentries
>= ncsize
|| reduce_percent
)) {
997 dnlc_reduce_idle
= 0;
998 if ((taskq_dispatch(system_taskq
, do_dnlc_reduce_cache
,
999 reduce_percent
, TQ_NOSLEEP
)) == NULL
)
1000 dnlc_reduce_idle
= 1;
1005 * Get a new name cache entry.
1006 * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
1007 * is short then just return NULL. If we're over ncsize then kick off a
1008 * thread to free some in use entries down to dnlc_nentries_low_water.
1009 * Caller must initialise all fields except namlen.
1010 * Component names are defined to be less than MAXNAMELEN
1011 * which includes a null.
1014 dnlc_get(uchar_t namlen
)
1018 if (dnlc_nentries
> dnlc_max_nentries
) {
1019 dnlc_max_nentries_cnt
++; /* keep a statistic */
1022 ncp
= kmem_alloc(sizeof (ncache_t
) + namlen
, KM_NOSLEEP
);
1026 ncp
->namlen
= namlen
;
1027 atomic_inc_32(&dnlc_nentries
);
1028 dnlc_reduce_cache(NULL
);
1033 * Taskq routine to free up name cache entries to reduce the
1034 * cache size to the low water mark if "reduce_percent" is not provided.
1035 * If "reduce_percent" is provided, reduce cache size by
1036 * (ncsize_onepercent * reduce_percent).
1040 do_dnlc_reduce_cache(void *reduce_percent
)
1042 nc_hash_t
*hp
= dnlc_free_rotor
, *start_hp
= hp
;
1046 uint_t low_water
= dnlc_nentries_low_water
;
1048 if (reduce_percent
) {
1052 * Never try to reduce the current number
1053 * of cache entries below 3% of ncsize.
1055 if (dnlc_nentries
<= ncsize_min_percent
) {
1056 dnlc_reduce_idle
= 1;
1059 reduce_cnt
= ncsize_onepercent
*
1060 (uint_t
)(uintptr_t)reduce_percent
;
1062 if (reduce_cnt
> dnlc_nentries
||
1063 dnlc_nentries
- reduce_cnt
< ncsize_min_percent
)
1064 low_water
= ncsize_min_percent
;
1066 low_water
= dnlc_nentries
- reduce_cnt
;
1071 * Find the first non empty hash queue without locking.
1072 * Only look at each hash queue once to avoid an infinite loop.
1075 if (++hp
== &nc_hash
[nc_hashsz
])
1077 } while (hp
->hash_next
== (ncache_t
*)hp
&& hp
!= start_hp
);
1079 /* return if all hash queues are empty. */
1080 if (hp
->hash_next
== (ncache_t
*)hp
) {
1081 dnlc_reduce_idle
= 1;
1085 mutex_enter(&hp
->hash_lock
);
1086 for (cnt
= 0, ncp
= hp
->hash_prev
; ncp
!= (ncache_t
*)hp
;
1087 ncp
= ncp
->hash_prev
, cnt
++) {
1090 * A name cache entry with a reference count
1091 * of one is only referenced by the dnlc.
1092 * Also negative cache entries are purged first.
1094 if (!vn_has_cached_data(vp
) &&
1095 ((vp
->v_count
== 1) || (vp
== DNLC_NO_VNODE
))) {
1096 ncs
.ncs_pick_heur
.value
.ui64
++;
1100 * Remove from the end of the chain if the
1103 if (cnt
> dnlc_long_chain
) {
1104 ncp
= hp
->hash_prev
;
1105 ncs
.ncs_pick_last
.value
.ui64
++;
1110 /* check for race and continue */
1111 if (hp
->hash_next
== (ncache_t
*)hp
) {
1112 mutex_exit(&hp
->hash_lock
);
1116 ncp
= hp
->hash_prev
; /* pick the last one in the hash queue */
1117 ncs
.ncs_pick_last
.value
.ui64
++;
1121 * Remove from hash chain.
1124 mutex_exit(&hp
->hash_lock
);
1126 VN_RELE_DNLC(ncp
->dp
);
1128 } while (dnlc_nentries
> low_water
);
1130 dnlc_free_rotor
= hp
;
1131 dnlc_reduce_idle
= 1;
1135 * Directory caching routines
1136 * ==========================
1138 * See dnlc.h for details of the interfaces below.
1142 * Lookup up an entry in a complete or partial directory cache.
1145 dnlc_dir_lookup(dcanchor_t
*dcap
, const char *name
, uint64_t *handle
)
1154 * can test without lock as we are only a cache
1156 if (!VALID_DIR_CACHE(dcap
->dca_dircache
)) {
1157 ncs
.ncs_dir_misses
.value
.ui64
++;
1161 if (!dnlc_dir_enable
) {
1165 mutex_enter(&dcap
->dca_lock
);
1166 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1167 if (VALID_DIR_CACHE(dcp
)) {
1168 dcp
->dc_actime
= ddi_get_lbolt64();
1169 DNLC_DIR_HASH(name
, hash
, namlen
);
1170 dep
= dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1171 while (dep
!= NULL
) {
1172 if ((dep
->de_hash
== hash
) &&
1173 (namlen
== dep
->de_namelen
) &&
1174 bcmp(dep
->de_name
, name
, namlen
) == 0) {
1175 *handle
= dep
->de_handle
;
1176 mutex_exit(&dcap
->dca_lock
);
1177 ncs
.ncs_dir_hits
.value
.ui64
++;
1182 if (dcp
->dc_complete
) {
1187 mutex_exit(&dcap
->dca_lock
);
1190 mutex_exit(&dcap
->dca_lock
);
1191 ncs
.ncs_dir_misses
.value
.ui64
++;
1197 * Start a new directory cache. An estimate of the number of
1198 * entries is provided to as a quick check to ensure the directory
1202 dnlc_dir_start(dcanchor_t
*dcap
, uint_t num_entries
)
1206 if (!dnlc_dir_enable
||
1207 (num_entries
< dnlc_dir_min_size
)) {
1211 if (num_entries
> dnlc_dir_max_size
) {
1215 mutex_enter(&dc_head
.dch_lock
);
1216 mutex_enter(&dcap
->dca_lock
);
1218 if (dcap
->dca_dircache
== DC_RET_LOW_MEM
) {
1219 dcap
->dca_dircache
= NULL
;
1220 mutex_exit(&dcap
->dca_lock
);
1221 mutex_exit(&dc_head
.dch_lock
);
1226 * Check if there's currently a cache.
1227 * This probably only occurs on a race.
1229 if (dcap
->dca_dircache
!= NULL
) {
1230 mutex_exit(&dcap
->dca_lock
);
1231 mutex_exit(&dc_head
.dch_lock
);
1236 * Allocate the dircache struct, entry and free space hash tables.
1237 * These tables are initially just one entry but dynamically resize
1238 * when entries and free space are added or removed.
1240 if ((dcp
= kmem_zalloc(sizeof (dircache_t
), KM_NOSLEEP
)) == NULL
) {
1243 if ((dcp
->dc_namehash
= kmem_zalloc(sizeof (dcentry_t
*),
1244 KM_NOSLEEP
)) == NULL
) {
1247 if ((dcp
->dc_freehash
= kmem_zalloc(sizeof (dcfree_t
*),
1248 KM_NOSLEEP
)) == NULL
) {
1252 dcp
->dc_anchor
= dcap
; /* set back pointer to anchor */
1253 dcap
->dca_dircache
= dcp
;
1255 /* add into head of global chain */
1256 dcp
->dc_next
= dc_head
.dch_next
;
1257 dcp
->dc_prev
= (dircache_t
*)&dc_head
;
1258 dcp
->dc_next
->dc_prev
= dcp
;
1259 dc_head
.dch_next
= dcp
;
1261 mutex_exit(&dcap
->dca_lock
);
1262 mutex_exit(&dc_head
.dch_lock
);
1263 ncs
.ncs_cur_dirs
.value
.ui64
++;
1264 ncs
.ncs_dirs_cached
.value
.ui64
++;
1268 if (dcp
->dc_namehash
) {
1269 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*));
1271 kmem_free(dcp
, sizeof (dircache_t
));
1274 * Must also kmem_free dcp->dc_freehash if more error cases are added
1276 mutex_exit(&dcap
->dca_lock
);
1277 mutex_exit(&dc_head
.dch_lock
);
1278 ncs
.ncs_dir_start_nm
.value
.ui64
++;
1283 * Add a directopry entry to a partial or complete directory cache.
1286 dnlc_dir_add_entry(dcanchor_t
*dcap
, const char *name
, uint64_t handle
)
1289 dcentry_t
**hp
, *dep
;
1295 * Allocate the dcentry struct, including the variable
1296 * size name. Note, the null terminator is not copied.
1298 * We do this outside the lock to avoid possible deadlock if
1299 * dnlc_dir_reclaim() is called as a result of memory shortage.
1301 DNLC_DIR_HASH(name
, hash
, namlen
);
1302 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1306 * The kmem allocator generates random failures for
1307 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1308 * So try again before we blow away a perfectly good cache.
1309 * This is done not to cover an error but purely for
1310 * performance running a debug kernel.
1311 * This random error only occurs in debug mode.
1313 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1317 ncs
.ncs_dir_add_nm
.value
.ui64
++;
1319 * Free a directory cache. This may be the one we are
1322 dnlc_dir_reclaim(NULL
);
1323 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1326 * still no memory, better delete this cache
1328 mutex_enter(&dcap
->dca_lock
);
1329 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1330 if (VALID_DIR_CACHE(dcp
)) {
1331 dnlc_dir_abort(dcp
);
1332 dcap
->dca_dircache
= DC_RET_LOW_MEM
;
1334 mutex_exit(&dcap
->dca_lock
);
1335 ncs
.ncs_dir_addabort
.value
.ui64
++;
1339 * fall through as if the 1st kmem_alloc had worked
1345 mutex_enter(&dcap
->dca_lock
);
1346 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1347 if (VALID_DIR_CACHE(dcp
)) {
1349 * If the total number of entries goes above the max
1350 * then free this cache
1352 if ((dcp
->dc_num_entries
+ dcp
->dc_num_free
) >
1353 dnlc_dir_max_size
) {
1354 mutex_exit(&dcap
->dca_lock
);
1355 dnlc_dir_purge(dcap
);
1356 kmem_free(dep
, sizeof (dcentry_t
) - 1 + namlen
);
1357 ncs
.ncs_dir_add_max
.value
.ui64
++;
1360 dcp
->dc_num_entries
++;
1361 capacity
= (dcp
->dc_nhash_mask
+ 1) << dnlc_dir_hash_size_shift
;
1362 if (dcp
->dc_num_entries
>=
1363 (capacity
<< dnlc_dir_hash_resize_shift
)) {
1364 dnlc_dir_adjust_nhash(dcp
);
1366 hp
= &dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1369 * Initialise and chain in new entry
1371 dep
->de_handle
= handle
;
1372 dep
->de_hash
= hash
;
1374 * Note de_namelen is a uchar_t to conserve space
1375 * and alignment padding. The max length of any
1376 * pathname component is defined as MAXNAMELEN
1377 * which is 256 (including the terminating null).
1378 * So provided this doesn't change, we don't include the null,
1379 * we always use bcmp to compare strings, and we don't
1380 * start storing full names, then we are ok.
1381 * The space savings is worth it.
1383 dep
->de_namelen
= namlen
;
1384 bcopy(name
, dep
->de_name
, namlen
);
1387 dcp
->dc_actime
= ddi_get_lbolt64();
1388 mutex_exit(&dcap
->dca_lock
);
1389 ncs
.ncs_dir_num_ents
.value
.ui64
++;
1392 mutex_exit(&dcap
->dca_lock
);
1393 kmem_free(dep
, sizeof (dcentry_t
) - 1 + namlen
);
1399 * Add free space to a partial or complete directory cache.
1402 dnlc_dir_add_space(dcanchor_t
*dcap
, uint_t len
, uint64_t handle
)
1405 dcfree_t
*dfp
, **hp
;
1409 * We kmem_alloc outside the lock to avoid possible deadlock if
1410 * dnlc_dir_reclaim() is called as a result of memory shortage.
1412 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1416 * The kmem allocator generates random failures for
1417 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1418 * So try again before we blow away a perfectly good cache.
1419 * This random error only occurs in debug mode
1421 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1425 ncs
.ncs_dir_add_nm
.value
.ui64
++;
1427 * Free a directory cache. This may be the one we are
1430 dnlc_dir_reclaim(NULL
);
1431 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1434 * still no memory, better delete this cache
1436 mutex_enter(&dcap
->dca_lock
);
1437 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1438 if (VALID_DIR_CACHE(dcp
)) {
1439 dnlc_dir_abort(dcp
);
1440 dcap
->dca_dircache
= DC_RET_LOW_MEM
;
1442 mutex_exit(&dcap
->dca_lock
);
1443 ncs
.ncs_dir_addabort
.value
.ui64
++;
1447 * fall through as if the 1st kmem_alloc had worked
1454 mutex_enter(&dcap
->dca_lock
);
1455 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1456 if (VALID_DIR_CACHE(dcp
)) {
1457 if ((dcp
->dc_num_entries
+ dcp
->dc_num_free
) >
1458 dnlc_dir_max_size
) {
1459 mutex_exit(&dcap
->dca_lock
);
1460 dnlc_dir_purge(dcap
);
1461 kmem_cache_free(dnlc_dir_space_cache
, dfp
);
1462 ncs
.ncs_dir_add_max
.value
.ui64
++;
1466 capacity
= (dcp
->dc_fhash_mask
+ 1) << dnlc_dir_hash_size_shift
;
1467 if (dcp
->dc_num_free
>=
1468 (capacity
<< dnlc_dir_hash_resize_shift
)) {
1469 dnlc_dir_adjust_fhash(dcp
);
1472 * Initialise and chain a new entry
1474 dfp
->df_handle
= handle
;
1476 dcp
->dc_actime
= ddi_get_lbolt64();
1477 hp
= &(dcp
->dc_freehash
[DDFHASH(handle
, dcp
)]);
1480 mutex_exit(&dcap
->dca_lock
);
1481 ncs
.ncs_dir_num_ents
.value
.ui64
++;
1484 mutex_exit(&dcap
->dca_lock
);
1485 kmem_cache_free(dnlc_dir_space_cache
, dfp
);
1491 * Mark a directory cache as complete.
1494 dnlc_dir_complete(dcanchor_t
*dcap
)
1498 mutex_enter(&dcap
->dca_lock
);
1499 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1500 if (VALID_DIR_CACHE(dcp
)) {
1501 dcp
->dc_complete
= B_TRUE
;
1503 mutex_exit(&dcap
->dca_lock
);
1507 * Internal routine to delete a partial or full directory cache.
1508 * No additional locking needed.
1511 dnlc_dir_abort(dircache_t
*dcp
)
1513 dcentry_t
*dep
, *nhp
;
1514 dcfree_t
*fep
, *fhp
;
1515 uint_t nhtsize
= dcp
->dc_nhash_mask
+ 1; /* name hash table size */
1516 uint_t fhtsize
= dcp
->dc_fhash_mask
+ 1; /* free hash table size */
1520 * Free up the cached name entries and hash table
1522 for (i
= 0; i
< nhtsize
; i
++) { /* for each hash bucket */
1523 nhp
= dcp
->dc_namehash
[i
];
1524 while (nhp
!= NULL
) { /* for each chained entry */
1526 kmem_free(nhp
, sizeof (dcentry_t
) - 1 +
1531 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*) * nhtsize
);
1534 * Free up the free space entries and hash table
1536 for (i
= 0; i
< fhtsize
; i
++) { /* for each hash bucket */
1537 fhp
= dcp
->dc_freehash
[i
];
1538 while (fhp
!= NULL
) { /* for each chained entry */
1540 kmem_cache_free(dnlc_dir_space_cache
, fhp
);
1544 kmem_free(dcp
->dc_freehash
, sizeof (dcfree_t
*) * fhtsize
);
1547 * Finally free the directory cache structure itself
1549 ncs
.ncs_dir_num_ents
.value
.ui64
-= (dcp
->dc_num_entries
+
1551 kmem_free(dcp
, sizeof (dircache_t
));
1552 ncs
.ncs_cur_dirs
.value
.ui64
--;
1556 * Remove a partial or complete directory cache
1559 dnlc_dir_purge(dcanchor_t
*dcap
)
1563 mutex_enter(&dc_head
.dch_lock
);
1564 mutex_enter(&dcap
->dca_lock
);
1565 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1566 if (!VALID_DIR_CACHE(dcp
)) {
1567 mutex_exit(&dcap
->dca_lock
);
1568 mutex_exit(&dc_head
.dch_lock
);
1571 dcap
->dca_dircache
= NULL
;
1573 * Unchain from global list
1575 dcp
->dc_prev
->dc_next
= dcp
->dc_next
;
1576 dcp
->dc_next
->dc_prev
= dcp
->dc_prev
;
1577 mutex_exit(&dcap
->dca_lock
);
1578 mutex_exit(&dc_head
.dch_lock
);
1579 dnlc_dir_abort(dcp
);
1583 * Remove an entry from a complete or partial directory cache.
1584 * Return the handle if it's non null.
1587 dnlc_dir_rem_entry(dcanchor_t
*dcap
, const char *name
, uint64_t *handlep
)
1590 dcentry_t
**prevpp
, *te
;
1596 if (!dnlc_dir_enable
) {
1600 mutex_enter(&dcap
->dca_lock
);
1601 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1602 if (VALID_DIR_CACHE(dcp
)) {
1603 dcp
->dc_actime
= ddi_get_lbolt64();
1604 if (dcp
->dc_nhash_mask
> 0) { /* ie not minimum */
1605 capacity
= (dcp
->dc_nhash_mask
+ 1) <<
1606 dnlc_dir_hash_size_shift
;
1607 if (dcp
->dc_num_entries
<=
1608 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1609 dnlc_dir_adjust_nhash(dcp
);
1612 DNLC_DIR_HASH(name
, hash
, namlen
);
1613 prevpp
= &dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1614 while (*prevpp
!= NULL
) {
1615 if (((*prevpp
)->de_hash
== hash
) &&
1616 (namlen
== (*prevpp
)->de_namelen
) &&
1617 bcmp((*prevpp
)->de_name
, name
, namlen
) == 0) {
1618 if (handlep
!= NULL
) {
1619 *handlep
= (*prevpp
)->de_handle
;
1622 *prevpp
= (*prevpp
)->de_next
;
1623 kmem_free(te
, sizeof (dcentry_t
) - 1 +
1627 * If the total number of entries
1628 * falls below half the minimum number
1629 * of entries then free this cache.
1631 if (--dcp
->dc_num_entries
<
1632 (dnlc_dir_min_size
>> 1)) {
1633 mutex_exit(&dcap
->dca_lock
);
1634 dnlc_dir_purge(dcap
);
1636 mutex_exit(&dcap
->dca_lock
);
1638 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1641 prevpp
= &((*prevpp
)->de_next
);
1643 if (dcp
->dc_complete
) {
1644 ncs
.ncs_dir_reme_fai
.value
.ui64
++;
1649 mutex_exit(&dcap
->dca_lock
);
1652 mutex_exit(&dcap
->dca_lock
);
1659 * Remove free space of at least the given length from a complete
1660 * or partial directory cache.
1663 dnlc_dir_rem_space_by_len(dcanchor_t
*dcap
, uint_t len
, uint64_t *handlep
)
1666 dcfree_t
**prevpp
, *tfp
;
1667 uint_t fhtsize
; /* free hash table size */
1672 if (!dnlc_dir_enable
) {
1676 mutex_enter(&dcap
->dca_lock
);
1677 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1678 if (VALID_DIR_CACHE(dcp
)) {
1679 dcp
->dc_actime
= ddi_get_lbolt64();
1680 if (dcp
->dc_fhash_mask
> 0) { /* ie not minimum */
1681 capacity
= (dcp
->dc_fhash_mask
+ 1) <<
1682 dnlc_dir_hash_size_shift
;
1683 if (dcp
->dc_num_free
<=
1684 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1685 dnlc_dir_adjust_fhash(dcp
);
1689 * Search for an entry of the appropriate size
1690 * on a first fit basis.
1692 fhtsize
= dcp
->dc_fhash_mask
+ 1;
1693 for (i
= 0; i
< fhtsize
; i
++) { /* for each hash bucket */
1694 prevpp
= &(dcp
->dc_freehash
[i
]);
1695 while (*prevpp
!= NULL
) {
1696 if ((*prevpp
)->df_len
>= len
) {
1697 *handlep
= (*prevpp
)->df_handle
;
1699 *prevpp
= (*prevpp
)->df_next
;
1701 mutex_exit(&dcap
->dca_lock
);
1702 kmem_cache_free(dnlc_dir_space_cache
,
1704 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1707 prevpp
= &((*prevpp
)->df_next
);
1710 if (dcp
->dc_complete
) {
1715 mutex_exit(&dcap
->dca_lock
);
1718 mutex_exit(&dcap
->dca_lock
);
1724 * Remove free space with the given handle from a complete or partial
1728 dnlc_dir_rem_space_by_handle(dcanchor_t
*dcap
, uint64_t handle
)
1731 dcfree_t
**prevpp
, *tfp
;
1735 if (!dnlc_dir_enable
) {
1739 mutex_enter(&dcap
->dca_lock
);
1740 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1741 if (VALID_DIR_CACHE(dcp
)) {
1742 dcp
->dc_actime
= ddi_get_lbolt64();
1743 if (dcp
->dc_fhash_mask
> 0) { /* ie not minimum */
1744 capacity
= (dcp
->dc_fhash_mask
+ 1) <<
1745 dnlc_dir_hash_size_shift
;
1746 if (dcp
->dc_num_free
<=
1747 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1748 dnlc_dir_adjust_fhash(dcp
);
1753 * search for the exact entry
1755 prevpp
= &(dcp
->dc_freehash
[DDFHASH(handle
, dcp
)]);
1756 while (*prevpp
!= NULL
) {
1757 if ((*prevpp
)->df_handle
== handle
) {
1759 *prevpp
= (*prevpp
)->df_next
;
1761 mutex_exit(&dcap
->dca_lock
);
1762 kmem_cache_free(dnlc_dir_space_cache
, tfp
);
1763 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1766 prevpp
= &((*prevpp
)->df_next
);
1768 if (dcp
->dc_complete
) {
1769 ncs
.ncs_dir_rems_fai
.value
.ui64
++;
1774 mutex_exit(&dcap
->dca_lock
);
1777 mutex_exit(&dcap
->dca_lock
);
1783 * Update the handle of an directory cache entry.
1786 dnlc_dir_update(dcanchor_t
*dcap
, const char *name
, uint64_t handle
)
1794 if (!dnlc_dir_enable
) {
1798 mutex_enter(&dcap
->dca_lock
);
1799 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1800 if (VALID_DIR_CACHE(dcp
)) {
1801 dcp
->dc_actime
= ddi_get_lbolt64();
1802 DNLC_DIR_HASH(name
, hash
, namlen
);
1803 dep
= dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1804 while (dep
!= NULL
) {
1805 if ((dep
->de_hash
== hash
) &&
1806 (namlen
== dep
->de_namelen
) &&
1807 bcmp(dep
->de_name
, name
, namlen
) == 0) {
1808 dep
->de_handle
= handle
;
1809 mutex_exit(&dcap
->dca_lock
);
1814 if (dcp
->dc_complete
) {
1815 ncs
.ncs_dir_upd_fail
.value
.ui64
++;
1820 mutex_exit(&dcap
->dca_lock
);
1823 mutex_exit(&dcap
->dca_lock
);
1829 dnlc_dir_fini(dcanchor_t
*dcap
)
1833 mutex_enter(&dc_head
.dch_lock
);
1834 mutex_enter(&dcap
->dca_lock
);
1835 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1836 if (VALID_DIR_CACHE(dcp
)) {
1838 * Unchain from global list
1840 ncs
.ncs_dir_finipurg
.value
.ui64
++;
1841 dcp
->dc_prev
->dc_next
= dcp
->dc_next
;
1842 dcp
->dc_next
->dc_prev
= dcp
->dc_prev
;
1846 dcap
->dca_dircache
= NULL
;
1847 mutex_exit(&dcap
->dca_lock
);
1848 mutex_exit(&dc_head
.dch_lock
);
1849 mutex_destroy(&dcap
->dca_lock
);
1851 dnlc_dir_abort(dcp
);
1856 * Reclaim callback for dnlc directory caching.
1857 * Invoked by the kernel memory allocator when memory gets tight.
1858 * This is a pretty serious condition and can lead easily lead to system
1859 * hangs if not enough space is returned.
1861 * Deciding which directory (or directories) to purge is tricky.
1862 * Purging everything is an overkill, but purging just the oldest used
1863 * was found to lead to hangs. The largest cached directories use the
1864 * most memory, but take the most effort to rebuild, whereas the smaller
1865 * ones have little value and give back little space. So what to do?
1867 * The current policy is to continue purging the oldest used directories
1868 * until at least dnlc_dir_min_reclaim directory entries have been purged.
1872 dnlc_dir_reclaim(void *unused
)
1874 dircache_t
*dcp
, *oldest
;
1875 uint_t dirent_cnt
= 0;
1877 mutex_enter(&dc_head
.dch_lock
);
1878 while (dirent_cnt
< dnlc_dir_min_reclaim
) {
1879 dcp
= dc_head
.dch_next
;
1881 while (dcp
!= (dircache_t
*)&dc_head
) {
1882 if (oldest
== NULL
) {
1885 if (dcp
->dc_actime
< oldest
->dc_actime
) {
1891 if (oldest
== NULL
) {
1892 /* nothing to delete */
1893 mutex_exit(&dc_head
.dch_lock
);
1897 * remove from directory chain and purge
1899 oldest
->dc_prev
->dc_next
= oldest
->dc_next
;
1900 oldest
->dc_next
->dc_prev
= oldest
->dc_prev
;
1901 mutex_enter(&oldest
->dc_anchor
->dca_lock
);
1903 * If this was the last entry then it must be too large.
1904 * Mark it as such by saving a special dircache_t
1905 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1906 * will be presented to the caller of dnlc_dir_start()
1908 if (oldest
->dc_next
== oldest
->dc_prev
) {
1909 oldest
->dc_anchor
->dca_dircache
= DC_RET_LOW_MEM
;
1910 ncs
.ncs_dir_rec_last
.value
.ui64
++;
1912 oldest
->dc_anchor
->dca_dircache
= NULL
;
1913 ncs
.ncs_dir_recl_any
.value
.ui64
++;
1915 mutex_exit(&oldest
->dc_anchor
->dca_lock
);
1916 dirent_cnt
+= oldest
->dc_num_entries
;
1917 dnlc_dir_abort(oldest
);
1919 mutex_exit(&dc_head
.dch_lock
);
1923 * Dynamically grow or shrink the size of the name hash table
1926 dnlc_dir_adjust_nhash(dircache_t
*dcp
)
1928 dcentry_t
**newhash
, *dep
, **nhp
, *tep
;
1935 * Allocate new hash table
1937 newsize
= dcp
->dc_num_entries
>> dnlc_dir_hash_size_shift
;
1938 newhash
= kmem_zalloc(sizeof (dcentry_t
*) * newsize
, KM_NOSLEEP
);
1939 if (newhash
== NULL
) {
1941 * System is short on memory just return
1942 * Note, the old hash table is still usable.
1943 * This return is unlikely to repeatedy occur, because
1944 * either some other directory caches will be reclaimed
1945 * due to memory shortage, thus freeing memory, or this
1946 * directory cahe will be reclaimed.
1950 oldsize
= dcp
->dc_nhash_mask
+ 1;
1951 dcp
->dc_nhash_mask
= newsizemask
= newsize
- 1;
1954 * Move entries from the old table to the new
1956 for (i
= 0; i
< oldsize
; i
++) { /* for each hash bucket */
1957 dep
= dcp
->dc_namehash
[i
];
1958 while (dep
!= NULL
) { /* for each chained entry */
1961 nhp
= &newhash
[tep
->de_hash
& newsizemask
];
1962 tep
->de_next
= *nhp
;
1968 * delete old hash table and set new one in place
1970 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*) * oldsize
);
1971 dcp
->dc_namehash
= newhash
;
1975 * Dynamically grow or shrink the size of the free space hash table
1978 dnlc_dir_adjust_fhash(dircache_t
*dcp
)
1980 dcfree_t
**newhash
, *dfp
, **nhp
, *tfp
;
1986 * Allocate new hash table
1988 newsize
= dcp
->dc_num_free
>> dnlc_dir_hash_size_shift
;
1989 newhash
= kmem_zalloc(sizeof (dcfree_t
*) * newsize
, KM_NOSLEEP
);
1990 if (newhash
== NULL
) {
1992 * System is short on memory just return
1993 * Note, the old hash table is still usable.
1994 * This return is unlikely to repeatedy occur, because
1995 * either some other directory caches will be reclaimed
1996 * due to memory shortage, thus freeing memory, or this
1997 * directory cahe will be reclaimed.
2001 oldsize
= dcp
->dc_fhash_mask
+ 1;
2002 dcp
->dc_fhash_mask
= newsize
- 1;
2005 * Move entries from the old table to the new
2007 for (i
= 0; i
< oldsize
; i
++) { /* for each hash bucket */
2008 dfp
= dcp
->dc_freehash
[i
];
2009 while (dfp
!= NULL
) { /* for each chained entry */
2012 nhp
= &newhash
[DDFHASH(tfp
->df_handle
, dcp
)];
2013 tfp
->df_next
= *nhp
;
2019 * delete old hash table and set new one in place
2021 kmem_free(dcp
->dc_freehash
, sizeof (dcfree_t
*) * oldsize
);
2022 dcp
->dc_freehash
= newhash
;