4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2015, Joyent, Inc.
24 * Copyright (c) 2017 by Delphix. All rights reserved.
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
40 #include <sys/types.h>
41 #include <sys/systm.h>
42 #include <sys/param.h>
43 #include <sys/t_lock.h>
44 #include <sys/systm.h>
46 #include <sys/vnode.h>
49 #include <sys/cmn_err.h>
50 #include <sys/vtrace.h>
51 #include <sys/bitmap.h>
53 #include <sys/sysmacros.h>
54 #include <sys/kstat.h>
55 #include <sys/atomic.h>
56 #include <sys/taskq.h>
59 * Directory name lookup cache.
60 * Based on code originally done by Robert Elz at Melbourne.
62 * Names found by directory scans are retained in a cache
63 * for future reference. Each hash chain is ordered by LRU
64 * Cache is indexed by hash value obtained from (vp, name)
65 * where the vp refers to the directory containing the name.
69 * We want to be able to identify files that are referenced only by the DNLC.
70 * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD,
71 * since multiple DNLC references should only be counted once in v_count. The
72 * VN_HOLD macro itself is aliased to VN_HOLD_CALLER in this file to help
73 * differentiate the behaviors. (Unfortunately it is not possible to #undef
74 * VN_HOLD and retain VN_HOLD_CALLER. Ideally a Makefile rule would grep
75 * uncommented C tokens to check that VN_HOLD is referenced only once in this
76 * file, to define VN_HOLD_CALLER.)
78 #define VN_HOLD_CALLER VN_HOLD
79 #define VN_HOLD_DNLC(vp) { \
80 mutex_enter(&(vp)->v_lock); \
81 if ((vp)->v_count_dnlc == 0) { \
84 (vp)->v_count_dnlc++; \
85 mutex_exit(&(vp)->v_lock); \
87 #define VN_RELE_DNLC(vp) { \
92 * Tunable nc_hashavelen is the average length desired for this chain, from
93 * which the size of the nc_hash table is derived at create time.
95 #define NC_HASHAVELEN_DEFAULT 4
96 int nc_hashavelen
= NC_HASHAVELEN_DEFAULT
;
99 * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
100 * depth exceeds this value, we move the looked-up entry to the front of
101 * its hash chain. The idea is to make sure that the most frequently
102 * accessed entries are found most quickly (by keeping them near the
103 * front of their hash chains).
105 #define NC_MOVETOFRONT 2
109 * DNLC_MAX_RELE is used to size an array on the stack when releasing
110 * vnodes. This array is used rather than calling VN_RELE() inline because
111 * all dnlc locks must be dropped by that time in order to avoid a
112 * possible deadlock. This deadlock occurs when the dnlc holds the last
113 * reference to the vnode and so the VOP_INACTIVE vector is called which
114 * can in turn call back into the dnlc. A global array was used but had
116 * 1) Actually doesn't have an upper bound on the array size as
117 * entries can be added after starting the purge.
118 * 2) The locking scheme causes a hang.
119 * 3) Caused serialisation on the global lock.
120 * 4) The array was often unnecessarily huge.
122 * Note the current value 8 allows up to 4 cache entries (to be purged
123 * from each hash chain), before having to cycle around and retry.
124 * This ought to be ample given that nc_hashavelen is typically very small.
126 #define DNLC_MAX_RELE 8 /* must be even */
129 * Hash table of name cache entries for fast lookup, dynamically
130 * allocated at startup.
135 * Rotors. Used to select entries on a round-robin basis.
137 static nc_hash_t
*dnlc_purge_fs1_rotor
;
138 static nc_hash_t
*dnlc_free_rotor
;
141 * # of dnlc entries (uninitialized)
143 * the initial value was chosen as being
144 * a random string of bits, probably not
145 * normally chosen by a systems administrator
148 volatile uint32_t dnlc_nentries
= 0; /* current num of name cache entries */
149 static int nc_hashsz
; /* size of hash table */
150 static int nc_hashmask
; /* size of hash table minus 1 */
153 * The dnlc_reduce_cache() taskq queue is activated when there are
154 * ncsize name cache entries and if no parameter is provided, it reduces
155 * the size down to dnlc_nentries_low_water, which is by default one
156 * hundreth less (or 99%) of ncsize.
158 * If a parameter is provided to dnlc_reduce_cache(), then we reduce
159 * the size down based on ncsize_onepercent - where ncsize_onepercent
160 * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce
161 * the size below 3% of ncsize (ncsize_min_percent).
163 #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100
164 uint_t dnlc_low_water_divisor
= DNLC_LOW_WATER_DIVISOR_DEFAULT
;
165 uint_t dnlc_nentries_low_water
;
166 int dnlc_reduce_idle
= 1; /* no locking needed */
167 uint_t ncsize_onepercent
;
168 uint_t ncsize_min_percent
;
171 * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
172 * then this means the dnlc_reduce_cache() taskq is failing to
173 * keep up. In this case we refuse to add new entries to the dnlc
174 * until the taskq catches up.
176 uint_t dnlc_max_nentries
; /* twice ncsize */
177 uint64_t dnlc_max_nentries_cnt
= 0; /* statistic on times we failed */
180 * Tunable to define when we should just remove items from
181 * the end of the chain.
183 #define DNLC_LONG_CHAIN 8
184 uint_t dnlc_long_chain
= DNLC_LONG_CHAIN
;
187 * ncstats has been deprecated, due to the integer size of the counters
188 * which can easily overflow in the dnlc.
189 * It is maintained (at some expense) for compatability.
190 * The preferred interface is the kstat accessible nc_stats below.
192 struct ncstats ncstats
;
194 struct nc_stats ncs
= {
195 { "hits", KSTAT_DATA_UINT64
},
196 { "misses", KSTAT_DATA_UINT64
},
197 { "negative_cache_hits", KSTAT_DATA_UINT64
},
198 { "enters", KSTAT_DATA_UINT64
},
199 { "double_enters", KSTAT_DATA_UINT64
},
200 { "purge_total_entries", KSTAT_DATA_UINT64
},
201 { "purge_all", KSTAT_DATA_UINT64
},
202 { "purge_vp", KSTAT_DATA_UINT64
},
203 { "purge_vfs", KSTAT_DATA_UINT64
},
204 { "purge_fs1", KSTAT_DATA_UINT64
},
205 { "pick_free", KSTAT_DATA_UINT64
},
206 { "pick_heuristic", KSTAT_DATA_UINT64
},
207 { "pick_last", KSTAT_DATA_UINT64
},
209 /* directory caching stats */
211 { "dir_hits", KSTAT_DATA_UINT64
},
212 { "dir_misses", KSTAT_DATA_UINT64
},
213 { "dir_cached_current", KSTAT_DATA_UINT64
},
214 { "dir_entries_cached_current", KSTAT_DATA_UINT64
},
215 { "dir_cached_total", KSTAT_DATA_UINT64
},
216 { "dir_start_no_memory", KSTAT_DATA_UINT64
},
217 { "dir_add_no_memory", KSTAT_DATA_UINT64
},
218 { "dir_add_abort", KSTAT_DATA_UINT64
},
219 { "dir_add_max", KSTAT_DATA_UINT64
},
220 { "dir_remove_entry_fail", KSTAT_DATA_UINT64
},
221 { "dir_remove_space_fail", KSTAT_DATA_UINT64
},
222 { "dir_update_fail", KSTAT_DATA_UINT64
},
223 { "dir_fini_purge", KSTAT_DATA_UINT64
},
224 { "dir_reclaim_last", KSTAT_DATA_UINT64
},
225 { "dir_reclaim_any", KSTAT_DATA_UINT64
},
228 static int doingcache
= 1;
230 vnode_t negative_cache_vnode
;
233 * Insert entry at the front of the queue
235 #define nc_inshash(ncp, hp) \
237 (ncp)->hash_next = (hp)->hash_next; \
238 (ncp)->hash_prev = (ncache_t *)(hp); \
239 (hp)->hash_next->hash_prev = (ncp); \
240 (hp)->hash_next = (ncp); \
244 * Remove entry from hash queue
246 #define nc_rmhash(ncp) \
248 (ncp)->hash_prev->hash_next = (ncp)->hash_next; \
249 (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
250 (ncp)->hash_prev = NULL; \
251 (ncp)->hash_next = NULL; \
257 #define dnlc_free(ncp) \
259 kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
260 atomic_dec_32(&dnlc_nentries); \
265 * Cached directory info.
266 * ======================
270 * Cached directory free space hash function.
271 * Needs the free space handle and the dcp to get the hash table size
272 * Returns the hash index.
274 #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
277 * Cached directory name entry hash function.
278 * Uses the name and returns in the input arguments the hash and the name
281 #define DNLC_DIR_HASH(name, hash, namelen) \
286 for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \
287 hash = (hash << 4) + hash + Xc; \
288 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1)); \
289 namelen = Xcp - (name); \
292 /* special dircache_t pointer to indicate error should be returned */
294 * The anchor directory cache pointer can contain 3 types of values,
295 * 1) NULL: No directory cache
296 * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
297 * too big or a memory shortage occurred. This value remains in the
298 * pointer until a dnlc_dir_start() which returns the a DNOMEM error.
299 * This is kludgy but efficient and only visible in this source file.
300 * 3) A valid cache pointer.
302 #define DC_RET_LOW_MEM (dircache_t *)1
303 #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
306 uint_t dnlc_dir_enable
= 1; /* disable caching directories by setting to 0 */
307 uint_t dnlc_dir_min_size
= 40; /* min no of directory entries before caching */
308 uint_t dnlc_dir_max_size
= UINT_MAX
; /* ditto maximum */
309 uint_t dnlc_dir_hash_size_shift
= 3; /* 8 entries per hash bucket */
310 uint_t dnlc_dir_min_reclaim
= 350000; /* approx 1MB of dcentrys */
312 * dnlc_dir_hash_resize_shift determines when the hash tables
313 * get re-adjusted due to growth or shrinkage
314 * - currently 2 indicating that there can be at most 4
315 * times or at least one quarter the number of entries
316 * before hash table readjustment. Note that with
317 * dnlc_dir_hash_size_shift above set at 3 this would
318 * mean readjustment would occur if the average number
319 * of entries went above 32 or below 2
321 uint_t dnlc_dir_hash_resize_shift
= 2; /* readjust rate */
323 static kmem_cache_t
*dnlc_dir_space_cache
; /* free space entry cache */
324 static dchead_t dc_head
; /* anchor of cached directories */
327 static ncache_t
*dnlc_get(uchar_t namlen
);
328 static ncache_t
*dnlc_search(vnode_t
*dp
, const char *name
, uchar_t namlen
,
330 static void dnlc_dir_reclaim(void *unused
);
331 static void dnlc_dir_abort(dircache_t
*dcp
);
332 static void dnlc_dir_adjust_fhash(dircache_t
*dcp
);
333 static void dnlc_dir_adjust_nhash(dircache_t
*dcp
);
334 static void do_dnlc_reduce_cache(void *);
338 * Initialize the directory cache.
348 * Set up the size of the dnlc (ncsize) and its low water mark.
351 /* calculate a reasonable size for the low water */
352 dnlc_nentries_low_water
= 4 * (v
.v_proc
+ maxusers
) + 320;
353 ncsize
= dnlc_nentries_low_water
+
354 (dnlc_nentries_low_water
/ dnlc_low_water_divisor
);
356 /* don't change the user specified ncsize */
357 dnlc_nentries_low_water
=
358 ncsize
- (ncsize
/ dnlc_low_water_divisor
);
362 dnlc_dir_enable
= 0; /* also disable directory caching */
364 cmn_err(CE_NOTE
, "name cache (dnlc) disabled");
367 dnlc_max_nentries
= ncsize
* 2;
368 ncsize_onepercent
= ncsize
/ 100;
369 ncsize_min_percent
= ncsize_onepercent
* 3;
372 * Initialise the hash table.
373 * Compute hash size rounding to the next power of two.
375 nc_hashsz
= ncsize
/ nc_hashavelen
;
376 nc_hashsz
= 1 << highbit(nc_hashsz
);
377 nc_hashmask
= nc_hashsz
- 1;
378 nc_hash
= kmem_zalloc(nc_hashsz
* sizeof (*nc_hash
), KM_SLEEP
);
379 for (i
= 0; i
< nc_hashsz
; i
++) {
380 hp
= (nc_hash_t
*)&nc_hash
[i
];
381 mutex_init(&hp
->hash_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
382 hp
->hash_next
= (ncache_t
*)hp
;
383 hp
->hash_prev
= (ncache_t
*)hp
;
389 dnlc_free_rotor
= dnlc_purge_fs1_rotor
= &nc_hash
[0];
392 * Set up the directory caching to use kmem_cache_alloc
393 * for its free space entries so that we can get a callback
394 * when the system is short on memory, to allow us to free
395 * up some memory. we don't use the constructor/deconstructor
398 dnlc_dir_space_cache
= kmem_cache_create("dnlc_space_cache",
399 sizeof (dcfree_t
), 0, NULL
, NULL
, dnlc_dir_reclaim
, NULL
,
403 * Initialise the head of the cached directory structures
405 mutex_init(&dc_head
.dch_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
406 dc_head
.dch_next
= (dircache_t
*)&dc_head
;
407 dc_head
.dch_prev
= (dircache_t
*)&dc_head
;
410 * Put a hold on the negative cache vnode so that it never goes away
411 * (VOP_INACTIVE isn't called on it).
413 vn_reinit(&negative_cache_vnode
);
416 * Initialise kstats - both the old compatability raw kind and
417 * the more extensive named stats.
419 ksp
= kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW
,
420 sizeof (struct ncstats
), KSTAT_FLAG_VIRTUAL
);
422 ksp
->ks_data
= (void *) &ncstats
;
425 ksp
= kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED
,
426 sizeof (ncs
) / sizeof (kstat_named_t
), KSTAT_FLAG_VIRTUAL
);
428 ksp
->ks_data
= (void *) &ncs
;
434 * Add a name to the directory cache.
437 dnlc_enter(vnode_t
*dp
, const char *name
, vnode_t
*vp
)
444 TRACE_0(TR_FAC_NFS
, TR_DNLC_ENTER_START
, "dnlc_enter_start:");
447 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
448 "dnlc_enter_end:(%S) %d", "not caching", 0);
453 * Get a new dnlc entry. Assume the entry won't be in the cache
454 * and initialize it now
456 DNLCHASH(name
, dp
, hash
, namlen
);
457 if ((ncp
= dnlc_get(namlen
)) == NULL
)
463 bcopy(name
, ncp
->name
, namlen
+ 1); /* name and null */
465 hp
= &nc_hash
[hash
& nc_hashmask
];
467 mutex_enter(&hp
->hash_lock
);
468 if (dnlc_search(dp
, name
, namlen
, hash
) != NULL
) {
469 mutex_exit(&hp
->hash_lock
);
470 ncstats
.dbl_enters
++;
471 ncs
.ncs_dbl_enters
.value
.ui64
++;
474 dnlc_free(ncp
); /* crfree done here */
475 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
476 "dnlc_enter_end:(%S) %d", "dbl enter", ncstats
.dbl_enters
);
480 * Insert back into the hash chain.
483 mutex_exit(&hp
->hash_lock
);
485 ncs
.ncs_enters
.value
.ui64
++;
486 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
487 "dnlc_enter_end:(%S) %d", "done", ncstats
.enters
);
491 * Add a name to the directory cache.
493 * This function is basically identical with
494 * dnlc_enter(). The difference is that when the
495 * desired dnlc entry is found, the vnode in the
496 * ncache is compared with the vnode passed in.
498 * If they are not equal then the ncache is
499 * updated with the passed in vnode. Otherwise
500 * it just frees up the newly allocated dnlc entry.
503 dnlc_update(vnode_t
*dp
, const char *name
, vnode_t
*vp
)
512 TRACE_0(TR_FAC_NFS
, TR_DNLC_ENTER_START
, "dnlc_update_start:");
515 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
516 "dnlc_update_end:(%S) %d", "not caching", 0);
521 * Get a new dnlc entry and initialize it now.
522 * If we fail to get a new entry, call dnlc_remove() to purge
523 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
525 * Failure to clear an existing entry could result in false dnlc
526 * lookup (negative/stale entry).
528 DNLCHASH(name
, dp
, hash
, namlen
);
529 if ((ncp
= dnlc_get(namlen
)) == NULL
) {
530 dnlc_remove(dp
, name
);
537 bcopy(name
, ncp
->name
, namlen
+ 1); /* name and null */
539 hp
= &nc_hash
[hash
& nc_hashmask
];
541 mutex_enter(&hp
->hash_lock
);
542 if ((tcp
= dnlc_search(dp
, name
, namlen
, hash
)) != NULL
) {
546 mutex_exit(&hp
->hash_lock
);
549 ncs
.ncs_enters
.value
.ui64
++;
550 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
551 "dnlc_update_end:(%S) %d", "done", ncstats
.enters
);
553 mutex_exit(&hp
->hash_lock
);
555 ncstats
.dbl_enters
++;
556 ncs
.ncs_dbl_enters
.value
.ui64
++;
557 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
558 "dnlc_update_end:(%S) %d",
559 "dbl enter", ncstats
.dbl_enters
);
562 dnlc_free(ncp
); /* crfree done here */
566 * insert the new entry, since it is not in dnlc yet
569 mutex_exit(&hp
->hash_lock
);
571 ncs
.ncs_enters
.value
.ui64
++;
572 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
573 "dnlc_update_end:(%S) %d", "done", ncstats
.enters
);
577 * Look up a name in the directory name cache.
579 * Return a doubly-held vnode if found: one hold so that it may
580 * remain in the cache for other users, the other hold so that
581 * the cache is not re-cycled and the identity of the vnode is
582 * lost before the caller can use the vnode.
585 dnlc_lookup(vnode_t
*dp
, const char *name
)
593 TRACE_2(TR_FAC_NFS
, TR_DNLC_LOOKUP_START
,
594 "dnlc_lookup_start:dp %x name %s", dp
, name
);
597 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
598 "dnlc_lookup_end:%S %d vp %x name %s",
599 "not_caching", 0, NULL
, name
);
603 DNLCHASH(name
, dp
, hash
, namlen
);
605 hp
= &nc_hash
[hash
& nc_hashmask
];
606 mutex_enter(&hp
->hash_lock
);
608 for (ncp
= hp
->hash_next
; ncp
!= (ncache_t
*)hp
;
609 ncp
= ncp
->hash_next
) {
610 if (ncp
->hash
== hash
&& /* fast signature check */
612 ncp
->namlen
== namlen
&&
613 bcmp(ncp
->name
, name
, namlen
) == 0) {
615 * Move this entry to the head of its hash chain
616 * if it's not already close.
618 if (depth
> NC_MOVETOFRONT
) {
619 ncache_t
*next
= ncp
->hash_next
;
620 ncache_t
*prev
= ncp
->hash_prev
;
622 prev
->hash_next
= next
;
623 next
->hash_prev
= prev
;
624 ncp
->hash_next
= next
= hp
->hash_next
;
625 ncp
->hash_prev
= (ncache_t
*)hp
;
626 next
->hash_prev
= ncp
;
629 ncstats
.move_to_front
++;
633 * Put a hold on the vnode now so its identity
634 * can't change before the caller has a chance to
639 mutex_exit(&hp
->hash_lock
);
641 ncs
.ncs_hits
.value
.ui64
++;
642 if (vp
== DNLC_NO_VNODE
) {
643 ncs
.ncs_neg_hits
.value
.ui64
++;
645 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
646 "dnlc_lookup_end:%S %d vp %x name %s", "hit",
647 ncstats
.hits
, vp
, name
);
653 mutex_exit(&hp
->hash_lock
);
655 ncs
.ncs_misses
.value
.ui64
++;
656 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
657 "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats
.misses
,
663 * Remove an entry in the directory name cache.
666 dnlc_remove(vnode_t
*dp
, const char *name
)
675 DNLCHASH(name
, dp
, hash
, namlen
);
676 hp
= &nc_hash
[hash
& nc_hashmask
];
678 mutex_enter(&hp
->hash_lock
);
679 if (ncp
= dnlc_search(dp
, name
, namlen
, hash
)) {
684 mutex_exit(&hp
->hash_lock
);
685 VN_RELE_DNLC(ncp
->vp
);
686 VN_RELE_DNLC(ncp
->dp
);
690 mutex_exit(&hp
->hash_lock
);
694 * Purge the entire cache.
703 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
709 ncs
.ncs_purge_all
.value
.ui64
++;
711 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
713 mutex_enter(&nch
->hash_lock
);
714 ncp
= nch
->hash_next
;
715 while (ncp
!= (ncache_t
*)nch
) {
719 nc_rele
[index
++] = ncp
->vp
;
720 nc_rele
[index
++] = ncp
->dp
;
725 ncs
.ncs_purge_total
.value
.ui64
++;
726 if (index
== DNLC_MAX_RELE
)
729 mutex_exit(&nch
->hash_lock
);
731 /* Release holds on all the vnodes now that we have no locks */
732 for (i
= 0; i
< index
; i
++) {
733 VN_RELE_DNLC(nc_rele
[i
]);
735 if (ncp
!= (ncache_t
*)nch
) {
736 nch
--; /* Do current hash chain again */
742 * Purge any cache entries referencing a vnode. Exit as soon as the dnlc
743 * reference count goes to zero (the caller still holds a reference).
746 dnlc_purge_vp(vnode_t
*vp
)
751 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
753 ASSERT(vp
->v_count
> 0);
754 if (vp
->v_count_dnlc
== 0) {
762 ncs
.ncs_purge_vp
.value
.ui64
++;
764 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
766 mutex_enter(&nch
->hash_lock
);
767 ncp
= nch
->hash_next
;
768 while (ncp
!= (ncache_t
*)nch
) {
772 if (ncp
->dp
== vp
|| ncp
->vp
== vp
) {
773 nc_rele
[index
++] = ncp
->vp
;
774 nc_rele
[index
++] = ncp
->dp
;
777 ncs
.ncs_purge_total
.value
.ui64
++;
778 if (index
== DNLC_MAX_RELE
) {
785 mutex_exit(&nch
->hash_lock
);
787 /* Release holds on all the vnodes now that we have no locks */
789 VN_RELE_DNLC(nc_rele
[--index
]);
792 if (vp
->v_count_dnlc
== 0) {
796 if (ncp
!= (ncache_t
*)nch
) {
797 nch
--; /* Do current hash chain again */
803 * Purge cache entries referencing a vfsp. Caller supplies a count
804 * of entries to purge; up to that many will be freed. A count of
805 * zero indicates that all such entries should be purged. Returns
806 * the number of entries that were purged.
809 dnlc_purge_vfsp(vfs_t
*vfsp
, int count
)
816 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
822 ncs
.ncs_purge_vfs
.value
.ui64
++;
824 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
826 mutex_enter(&nch
->hash_lock
);
827 ncp
= nch
->hash_next
;
828 while (ncp
!= (ncache_t
*)nch
) {
832 ASSERT(ncp
->dp
!= NULL
);
833 ASSERT(ncp
->vp
!= NULL
);
834 if ((ncp
->dp
->v_vfsp
== vfsp
) ||
835 (ncp
->vp
->v_vfsp
== vfsp
)) {
837 nc_rele
[index
++] = ncp
->vp
;
838 nc_rele
[index
++] = ncp
->dp
;
841 ncs
.ncs_purge_total
.value
.ui64
++;
842 if (index
== DNLC_MAX_RELE
) {
846 if (count
!= 0 && n
>= count
) {
852 mutex_exit(&nch
->hash_lock
);
853 /* Release holds on all the vnodes now that we have no locks */
854 for (i
= 0; i
< index
; i
++) {
855 VN_RELE_DNLC(nc_rele
[i
]);
857 if (count
!= 0 && n
>= count
) {
860 if (ncp
!= (ncache_t
*)nch
) {
861 nch
--; /* Do current hash chain again */
868 * Purge 1 entry from the dnlc that is part of the filesystem(s)
869 * represented by 'vop'. The purpose of this routine is to allow
870 * users of the dnlc to free a vnode that is being held by the dnlc.
872 * If we find a vnode that we release which will result in
873 * freeing the underlying vnode (count was 1), return 1, 0
874 * if no appropriate vnodes found.
876 * Note, vop is not the 'right' identifier for a filesystem.
879 dnlc_fs_purge1(vnodeops_t
*vop
)
889 ncs
.ncs_purge_fs1
.value
.ui64
++;
892 * Scan the dnlc entries looking for a likely candidate.
894 hp
= end
= dnlc_purge_fs1_rotor
;
897 if (++hp
== &nc_hash
[nc_hashsz
])
899 dnlc_purge_fs1_rotor
= hp
;
900 if (hp
->hash_next
== (ncache_t
*)hp
)
902 mutex_enter(&hp
->hash_lock
);
903 for (ncp
= hp
->hash_prev
;
904 ncp
!= (ncache_t
*)hp
;
905 ncp
= ncp
->hash_prev
) {
907 if (!vn_has_cached_data(vp
) && (vp
->v_count
== 1) &&
908 vn_matchops(vp
, vop
))
911 if (ncp
!= (ncache_t
*)hp
) {
913 mutex_exit(&hp
->hash_lock
);
914 VN_RELE_DNLC(ncp
->dp
);
917 ncs
.ncs_purge_total
.value
.ui64
++;
920 mutex_exit(&hp
->hash_lock
);
926 * Utility routine to search for a cache entry. Return the
927 * ncache entry if found, NULL otherwise.
930 dnlc_search(vnode_t
*dp
, const char *name
, uchar_t namlen
, int hash
)
935 hp
= &nc_hash
[hash
& nc_hashmask
];
937 for (ncp
= hp
->hash_next
; ncp
!= (ncache_t
*)hp
; ncp
= ncp
->hash_next
) {
938 if (ncp
->hash
== hash
&&
940 ncp
->namlen
== namlen
&&
941 bcmp(ncp
->name
, name
, namlen
) == 0)
947 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
948 #error ncache_t name length representation is too small
952 dnlc_reduce_cache(void *reduce_percent
)
954 if (dnlc_reduce_idle
&& (dnlc_nentries
>= ncsize
|| reduce_percent
)) {
955 dnlc_reduce_idle
= 0;
956 if ((taskq_dispatch(system_taskq
, do_dnlc_reduce_cache
,
957 reduce_percent
, TQ_NOSLEEP
)) == NULL
)
958 dnlc_reduce_idle
= 1;
963 * Get a new name cache entry.
964 * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
965 * is short then just return NULL. If we're over ncsize then kick off a
966 * thread to free some in use entries down to dnlc_nentries_low_water.
967 * Caller must initialise all fields except namlen.
968 * Component names are defined to be less than MAXNAMELEN
969 * which includes a null.
972 dnlc_get(uchar_t namlen
)
976 if (dnlc_nentries
> dnlc_max_nentries
) {
977 dnlc_max_nentries_cnt
++; /* keep a statistic */
980 ncp
= kmem_alloc(sizeof (ncache_t
) + namlen
, KM_NOSLEEP
);
984 ncp
->namlen
= namlen
;
985 atomic_inc_32(&dnlc_nentries
);
986 dnlc_reduce_cache(NULL
);
991 * Taskq routine to free up name cache entries to reduce the
992 * cache size to the low water mark if "reduce_percent" is not provided.
993 * If "reduce_percent" is provided, reduce cache size by
994 * (ncsize_onepercent * reduce_percent).
998 do_dnlc_reduce_cache(void *reduce_percent
)
1000 nc_hash_t
*hp
= dnlc_free_rotor
, *start_hp
= hp
;
1004 uint_t low_water
= dnlc_nentries_low_water
;
1006 if (reduce_percent
) {
1010 * Never try to reduce the current number
1011 * of cache entries below 3% of ncsize.
1013 if (dnlc_nentries
<= ncsize_min_percent
) {
1014 dnlc_reduce_idle
= 1;
1017 reduce_cnt
= ncsize_onepercent
*
1018 (uint_t
)(uintptr_t)reduce_percent
;
1020 if (reduce_cnt
> dnlc_nentries
||
1021 dnlc_nentries
- reduce_cnt
< ncsize_min_percent
)
1022 low_water
= ncsize_min_percent
;
1024 low_water
= dnlc_nentries
- reduce_cnt
;
1029 * Find the first non empty hash queue without locking.
1030 * Only look at each hash queue once to avoid an infinite loop.
1033 if (++hp
== &nc_hash
[nc_hashsz
])
1035 } while (hp
->hash_next
== (ncache_t
*)hp
&& hp
!= start_hp
);
1037 /* return if all hash queues are empty. */
1038 if (hp
->hash_next
== (ncache_t
*)hp
) {
1039 dnlc_reduce_idle
= 1;
1043 mutex_enter(&hp
->hash_lock
);
1044 for (cnt
= 0, ncp
= hp
->hash_prev
; ncp
!= (ncache_t
*)hp
;
1045 ncp
= ncp
->hash_prev
, cnt
++) {
1048 * A name cache entry with a reference count
1049 * of one is only referenced by the dnlc.
1050 * Also negative cache entries are purged first.
1052 if (!vn_has_cached_data(vp
) &&
1053 ((vp
->v_count
== 1) || (vp
== DNLC_NO_VNODE
))) {
1054 ncs
.ncs_pick_heur
.value
.ui64
++;
1058 * Remove from the end of the chain if the
1061 if (cnt
> dnlc_long_chain
) {
1062 ncp
= hp
->hash_prev
;
1063 ncs
.ncs_pick_last
.value
.ui64
++;
1068 /* check for race and continue */
1069 if (hp
->hash_next
== (ncache_t
*)hp
) {
1070 mutex_exit(&hp
->hash_lock
);
1074 ncp
= hp
->hash_prev
; /* pick the last one in the hash queue */
1075 ncs
.ncs_pick_last
.value
.ui64
++;
1079 * Remove from hash chain.
1082 mutex_exit(&hp
->hash_lock
);
1084 VN_RELE_DNLC(ncp
->dp
);
1086 } while (dnlc_nentries
> low_water
);
1088 dnlc_free_rotor
= hp
;
1089 dnlc_reduce_idle
= 1;
1093 * Directory caching routines
1094 * ==========================
1096 * See dnlc.h for details of the interfaces below.
1100 * Lookup up an entry in a complete or partial directory cache.
1103 dnlc_dir_lookup(dcanchor_t
*dcap
, const char *name
, uint64_t *handle
)
1112 * can test without lock as we are only a cache
1114 if (!VALID_DIR_CACHE(dcap
->dca_dircache
)) {
1115 ncs
.ncs_dir_misses
.value
.ui64
++;
1119 if (!dnlc_dir_enable
) {
1123 mutex_enter(&dcap
->dca_lock
);
1124 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1125 if (VALID_DIR_CACHE(dcp
)) {
1126 dcp
->dc_actime
= ddi_get_lbolt64();
1127 DNLC_DIR_HASH(name
, hash
, namlen
);
1128 dep
= dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1129 while (dep
!= NULL
) {
1130 if ((dep
->de_hash
== hash
) &&
1131 (namlen
== dep
->de_namelen
) &&
1132 bcmp(dep
->de_name
, name
, namlen
) == 0) {
1133 *handle
= dep
->de_handle
;
1134 mutex_exit(&dcap
->dca_lock
);
1135 ncs
.ncs_dir_hits
.value
.ui64
++;
1140 if (dcp
->dc_complete
) {
1145 mutex_exit(&dcap
->dca_lock
);
1148 mutex_exit(&dcap
->dca_lock
);
1149 ncs
.ncs_dir_misses
.value
.ui64
++;
1155 * Start a new directory cache. An estimate of the number of
1156 * entries is provided to as a quick check to ensure the directory
1160 dnlc_dir_start(dcanchor_t
*dcap
, uint_t num_entries
)
1164 if (!dnlc_dir_enable
||
1165 (num_entries
< dnlc_dir_min_size
)) {
1169 if (num_entries
> dnlc_dir_max_size
) {
1173 mutex_enter(&dc_head
.dch_lock
);
1174 mutex_enter(&dcap
->dca_lock
);
1176 if (dcap
->dca_dircache
== DC_RET_LOW_MEM
) {
1177 dcap
->dca_dircache
= NULL
;
1178 mutex_exit(&dcap
->dca_lock
);
1179 mutex_exit(&dc_head
.dch_lock
);
1184 * Check if there's currently a cache.
1185 * This probably only occurs on a race.
1187 if (dcap
->dca_dircache
!= NULL
) {
1188 mutex_exit(&dcap
->dca_lock
);
1189 mutex_exit(&dc_head
.dch_lock
);
1194 * Allocate the dircache struct, entry and free space hash tables.
1195 * These tables are initially just one entry but dynamically resize
1196 * when entries and free space are added or removed.
1198 if ((dcp
= kmem_zalloc(sizeof (dircache_t
), KM_NOSLEEP
)) == NULL
) {
1201 if ((dcp
->dc_namehash
= kmem_zalloc(sizeof (dcentry_t
*),
1202 KM_NOSLEEP
)) == NULL
) {
1205 if ((dcp
->dc_freehash
= kmem_zalloc(sizeof (dcfree_t
*),
1206 KM_NOSLEEP
)) == NULL
) {
1210 dcp
->dc_anchor
= dcap
; /* set back pointer to anchor */
1211 dcap
->dca_dircache
= dcp
;
1213 /* add into head of global chain */
1214 dcp
->dc_next
= dc_head
.dch_next
;
1215 dcp
->dc_prev
= (dircache_t
*)&dc_head
;
1216 dcp
->dc_next
->dc_prev
= dcp
;
1217 dc_head
.dch_next
= dcp
;
1219 mutex_exit(&dcap
->dca_lock
);
1220 mutex_exit(&dc_head
.dch_lock
);
1221 ncs
.ncs_cur_dirs
.value
.ui64
++;
1222 ncs
.ncs_dirs_cached
.value
.ui64
++;
1226 if (dcp
->dc_namehash
) {
1227 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*));
1229 kmem_free(dcp
, sizeof (dircache_t
));
1232 * Must also kmem_free dcp->dc_freehash if more error cases are added
1234 mutex_exit(&dcap
->dca_lock
);
1235 mutex_exit(&dc_head
.dch_lock
);
1236 ncs
.ncs_dir_start_nm
.value
.ui64
++;
1241 * Add a directopry entry to a partial or complete directory cache.
1244 dnlc_dir_add_entry(dcanchor_t
*dcap
, const char *name
, uint64_t handle
)
1247 dcentry_t
**hp
, *dep
;
1253 * Allocate the dcentry struct, including the variable
1254 * size name. Note, the null terminator is not copied.
1256 * We do this outside the lock to avoid possible deadlock if
1257 * dnlc_dir_reclaim() is called as a result of memory shortage.
1259 DNLC_DIR_HASH(name
, hash
, namlen
);
1260 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1264 * The kmem allocator generates random failures for
1265 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1266 * So try again before we blow away a perfectly good cache.
1267 * This is done not to cover an error but purely for
1268 * performance running a debug kernel.
1269 * This random error only occurs in debug mode.
1271 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1275 ncs
.ncs_dir_add_nm
.value
.ui64
++;
1277 * Free a directory cache. This may be the one we are
1280 dnlc_dir_reclaim(NULL
);
1281 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1284 * still no memory, better delete this cache
1286 mutex_enter(&dcap
->dca_lock
);
1287 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1288 if (VALID_DIR_CACHE(dcp
)) {
1289 dnlc_dir_abort(dcp
);
1290 dcap
->dca_dircache
= DC_RET_LOW_MEM
;
1292 mutex_exit(&dcap
->dca_lock
);
1293 ncs
.ncs_dir_addabort
.value
.ui64
++;
1297 * fall through as if the 1st kmem_alloc had worked
1303 mutex_enter(&dcap
->dca_lock
);
1304 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1305 if (VALID_DIR_CACHE(dcp
)) {
1307 * If the total number of entries goes above the max
1308 * then free this cache
1310 if ((dcp
->dc_num_entries
+ dcp
->dc_num_free
) >
1311 dnlc_dir_max_size
) {
1312 mutex_exit(&dcap
->dca_lock
);
1313 dnlc_dir_purge(dcap
);
1314 kmem_free(dep
, sizeof (dcentry_t
) - 1 + namlen
);
1315 ncs
.ncs_dir_add_max
.value
.ui64
++;
1318 dcp
->dc_num_entries
++;
1319 capacity
= (dcp
->dc_nhash_mask
+ 1) << dnlc_dir_hash_size_shift
;
1320 if (dcp
->dc_num_entries
>=
1321 (capacity
<< dnlc_dir_hash_resize_shift
)) {
1322 dnlc_dir_adjust_nhash(dcp
);
1324 hp
= &dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1327 * Initialise and chain in new entry
1329 dep
->de_handle
= handle
;
1330 dep
->de_hash
= hash
;
1332 * Note de_namelen is a uchar_t to conserve space
1333 * and alignment padding. The max length of any
1334 * pathname component is defined as MAXNAMELEN
1335 * which is 256 (including the terminating null).
1336 * So provided this doesn't change, we don't include the null,
1337 * we always use bcmp to compare strings, and we don't
1338 * start storing full names, then we are ok.
1339 * The space savings is worth it.
1341 dep
->de_namelen
= namlen
;
1342 bcopy(name
, dep
->de_name
, namlen
);
1345 dcp
->dc_actime
= ddi_get_lbolt64();
1346 mutex_exit(&dcap
->dca_lock
);
1347 ncs
.ncs_dir_num_ents
.value
.ui64
++;
1350 mutex_exit(&dcap
->dca_lock
);
1351 kmem_free(dep
, sizeof (dcentry_t
) - 1 + namlen
);
1357 * Add free space to a partial or complete directory cache.
1360 dnlc_dir_add_space(dcanchor_t
*dcap
, uint_t len
, uint64_t handle
)
1363 dcfree_t
*dfp
, **hp
;
1367 * We kmem_alloc outside the lock to avoid possible deadlock if
1368 * dnlc_dir_reclaim() is called as a result of memory shortage.
1370 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1374 * The kmem allocator generates random failures for
1375 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1376 * So try again before we blow away a perfectly good cache.
1377 * This random error only occurs in debug mode
1379 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1383 ncs
.ncs_dir_add_nm
.value
.ui64
++;
1385 * Free a directory cache. This may be the one we are
1388 dnlc_dir_reclaim(NULL
);
1389 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1392 * still no memory, better delete this cache
1394 mutex_enter(&dcap
->dca_lock
);
1395 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1396 if (VALID_DIR_CACHE(dcp
)) {
1397 dnlc_dir_abort(dcp
);
1398 dcap
->dca_dircache
= DC_RET_LOW_MEM
;
1400 mutex_exit(&dcap
->dca_lock
);
1401 ncs
.ncs_dir_addabort
.value
.ui64
++;
1405 * fall through as if the 1st kmem_alloc had worked
1412 mutex_enter(&dcap
->dca_lock
);
1413 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1414 if (VALID_DIR_CACHE(dcp
)) {
1415 if ((dcp
->dc_num_entries
+ dcp
->dc_num_free
) >
1416 dnlc_dir_max_size
) {
1417 mutex_exit(&dcap
->dca_lock
);
1418 dnlc_dir_purge(dcap
);
1419 kmem_cache_free(dnlc_dir_space_cache
, dfp
);
1420 ncs
.ncs_dir_add_max
.value
.ui64
++;
1424 capacity
= (dcp
->dc_fhash_mask
+ 1) << dnlc_dir_hash_size_shift
;
1425 if (dcp
->dc_num_free
>=
1426 (capacity
<< dnlc_dir_hash_resize_shift
)) {
1427 dnlc_dir_adjust_fhash(dcp
);
1430 * Initialise and chain a new entry
1432 dfp
->df_handle
= handle
;
1434 dcp
->dc_actime
= ddi_get_lbolt64();
1435 hp
= &(dcp
->dc_freehash
[DDFHASH(handle
, dcp
)]);
1438 mutex_exit(&dcap
->dca_lock
);
1439 ncs
.ncs_dir_num_ents
.value
.ui64
++;
1442 mutex_exit(&dcap
->dca_lock
);
1443 kmem_cache_free(dnlc_dir_space_cache
, dfp
);
1449 * Mark a directory cache as complete.
1452 dnlc_dir_complete(dcanchor_t
*dcap
)
1456 mutex_enter(&dcap
->dca_lock
);
1457 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1458 if (VALID_DIR_CACHE(dcp
)) {
1459 dcp
->dc_complete
= B_TRUE
;
1461 mutex_exit(&dcap
->dca_lock
);
1465 * Internal routine to delete a partial or full directory cache.
1466 * No additional locking needed.
1469 dnlc_dir_abort(dircache_t
*dcp
)
1471 dcentry_t
*dep
, *nhp
;
1472 dcfree_t
*fep
, *fhp
;
1473 uint_t nhtsize
= dcp
->dc_nhash_mask
+ 1; /* name hash table size */
1474 uint_t fhtsize
= dcp
->dc_fhash_mask
+ 1; /* free hash table size */
1478 * Free up the cached name entries and hash table
1480 for (i
= 0; i
< nhtsize
; i
++) { /* for each hash bucket */
1481 nhp
= dcp
->dc_namehash
[i
];
1482 while (nhp
!= NULL
) { /* for each chained entry */
1484 kmem_free(nhp
, sizeof (dcentry_t
) - 1 +
1489 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*) * nhtsize
);
1492 * Free up the free space entries and hash table
1494 for (i
= 0; i
< fhtsize
; i
++) { /* for each hash bucket */
1495 fhp
= dcp
->dc_freehash
[i
];
1496 while (fhp
!= NULL
) { /* for each chained entry */
1498 kmem_cache_free(dnlc_dir_space_cache
, fhp
);
1502 kmem_free(dcp
->dc_freehash
, sizeof (dcfree_t
*) * fhtsize
);
1505 * Finally free the directory cache structure itself
1507 ncs
.ncs_dir_num_ents
.value
.ui64
-= (dcp
->dc_num_entries
+
1509 kmem_free(dcp
, sizeof (dircache_t
));
1510 ncs
.ncs_cur_dirs
.value
.ui64
--;
1514 * Remove a partial or complete directory cache
1517 dnlc_dir_purge(dcanchor_t
*dcap
)
1521 mutex_enter(&dc_head
.dch_lock
);
1522 mutex_enter(&dcap
->dca_lock
);
1523 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1524 if (!VALID_DIR_CACHE(dcp
)) {
1525 mutex_exit(&dcap
->dca_lock
);
1526 mutex_exit(&dc_head
.dch_lock
);
1529 dcap
->dca_dircache
= NULL
;
1531 * Unchain from global list
1533 dcp
->dc_prev
->dc_next
= dcp
->dc_next
;
1534 dcp
->dc_next
->dc_prev
= dcp
->dc_prev
;
1535 mutex_exit(&dcap
->dca_lock
);
1536 mutex_exit(&dc_head
.dch_lock
);
1537 dnlc_dir_abort(dcp
);
1541 * Remove an entry from a complete or partial directory cache.
1542 * Return the handle if it's non null.
1545 dnlc_dir_rem_entry(dcanchor_t
*dcap
, const char *name
, uint64_t *handlep
)
1548 dcentry_t
**prevpp
, *te
;
1554 if (!dnlc_dir_enable
) {
1558 mutex_enter(&dcap
->dca_lock
);
1559 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1560 if (VALID_DIR_CACHE(dcp
)) {
1561 dcp
->dc_actime
= ddi_get_lbolt64();
1562 if (dcp
->dc_nhash_mask
> 0) { /* ie not minimum */
1563 capacity
= (dcp
->dc_nhash_mask
+ 1) <<
1564 dnlc_dir_hash_size_shift
;
1565 if (dcp
->dc_num_entries
<=
1566 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1567 dnlc_dir_adjust_nhash(dcp
);
1570 DNLC_DIR_HASH(name
, hash
, namlen
);
1571 prevpp
= &dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1572 while (*prevpp
!= NULL
) {
1573 if (((*prevpp
)->de_hash
== hash
) &&
1574 (namlen
== (*prevpp
)->de_namelen
) &&
1575 bcmp((*prevpp
)->de_name
, name
, namlen
) == 0) {
1576 if (handlep
!= NULL
) {
1577 *handlep
= (*prevpp
)->de_handle
;
1580 *prevpp
= (*prevpp
)->de_next
;
1581 kmem_free(te
, sizeof (dcentry_t
) - 1 +
1585 * If the total number of entries
1586 * falls below half the minimum number
1587 * of entries then free this cache.
1589 if (--dcp
->dc_num_entries
<
1590 (dnlc_dir_min_size
>> 1)) {
1591 mutex_exit(&dcap
->dca_lock
);
1592 dnlc_dir_purge(dcap
);
1594 mutex_exit(&dcap
->dca_lock
);
1596 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1599 prevpp
= &((*prevpp
)->de_next
);
1601 if (dcp
->dc_complete
) {
1602 ncs
.ncs_dir_reme_fai
.value
.ui64
++;
1607 mutex_exit(&dcap
->dca_lock
);
1610 mutex_exit(&dcap
->dca_lock
);
1617 * Remove free space of at least the given length from a complete
1618 * or partial directory cache.
1621 dnlc_dir_rem_space_by_len(dcanchor_t
*dcap
, uint_t len
, uint64_t *handlep
)
1624 dcfree_t
**prevpp
, *tfp
;
1625 uint_t fhtsize
; /* free hash table size */
1630 if (!dnlc_dir_enable
) {
1634 mutex_enter(&dcap
->dca_lock
);
1635 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1636 if (VALID_DIR_CACHE(dcp
)) {
1637 dcp
->dc_actime
= ddi_get_lbolt64();
1638 if (dcp
->dc_fhash_mask
> 0) { /* ie not minimum */
1639 capacity
= (dcp
->dc_fhash_mask
+ 1) <<
1640 dnlc_dir_hash_size_shift
;
1641 if (dcp
->dc_num_free
<=
1642 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1643 dnlc_dir_adjust_fhash(dcp
);
1647 * Search for an entry of the appropriate size
1648 * on a first fit basis.
1650 fhtsize
= dcp
->dc_fhash_mask
+ 1;
1651 for (i
= 0; i
< fhtsize
; i
++) { /* for each hash bucket */
1652 prevpp
= &(dcp
->dc_freehash
[i
]);
1653 while (*prevpp
!= NULL
) {
1654 if ((*prevpp
)->df_len
>= len
) {
1655 *handlep
= (*prevpp
)->df_handle
;
1657 *prevpp
= (*prevpp
)->df_next
;
1659 mutex_exit(&dcap
->dca_lock
);
1660 kmem_cache_free(dnlc_dir_space_cache
,
1662 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1665 prevpp
= &((*prevpp
)->df_next
);
1668 if (dcp
->dc_complete
) {
1673 mutex_exit(&dcap
->dca_lock
);
1676 mutex_exit(&dcap
->dca_lock
);
1682 * Remove free space with the given handle from a complete or partial
1686 dnlc_dir_rem_space_by_handle(dcanchor_t
*dcap
, uint64_t handle
)
1689 dcfree_t
**prevpp
, *tfp
;
1693 if (!dnlc_dir_enable
) {
1697 mutex_enter(&dcap
->dca_lock
);
1698 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1699 if (VALID_DIR_CACHE(dcp
)) {
1700 dcp
->dc_actime
= ddi_get_lbolt64();
1701 if (dcp
->dc_fhash_mask
> 0) { /* ie not minimum */
1702 capacity
= (dcp
->dc_fhash_mask
+ 1) <<
1703 dnlc_dir_hash_size_shift
;
1704 if (dcp
->dc_num_free
<=
1705 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1706 dnlc_dir_adjust_fhash(dcp
);
1711 * search for the exact entry
1713 prevpp
= &(dcp
->dc_freehash
[DDFHASH(handle
, dcp
)]);
1714 while (*prevpp
!= NULL
) {
1715 if ((*prevpp
)->df_handle
== handle
) {
1717 *prevpp
= (*prevpp
)->df_next
;
1719 mutex_exit(&dcap
->dca_lock
);
1720 kmem_cache_free(dnlc_dir_space_cache
, tfp
);
1721 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1724 prevpp
= &((*prevpp
)->df_next
);
1726 if (dcp
->dc_complete
) {
1727 ncs
.ncs_dir_rems_fai
.value
.ui64
++;
1732 mutex_exit(&dcap
->dca_lock
);
1735 mutex_exit(&dcap
->dca_lock
);
1741 * Update the handle of an directory cache entry.
1744 dnlc_dir_update(dcanchor_t
*dcap
, const char *name
, uint64_t handle
)
1752 if (!dnlc_dir_enable
) {
1756 mutex_enter(&dcap
->dca_lock
);
1757 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1758 if (VALID_DIR_CACHE(dcp
)) {
1759 dcp
->dc_actime
= ddi_get_lbolt64();
1760 DNLC_DIR_HASH(name
, hash
, namlen
);
1761 dep
= dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1762 while (dep
!= NULL
) {
1763 if ((dep
->de_hash
== hash
) &&
1764 (namlen
== dep
->de_namelen
) &&
1765 bcmp(dep
->de_name
, name
, namlen
) == 0) {
1766 dep
->de_handle
= handle
;
1767 mutex_exit(&dcap
->dca_lock
);
1772 if (dcp
->dc_complete
) {
1773 ncs
.ncs_dir_upd_fail
.value
.ui64
++;
1778 mutex_exit(&dcap
->dca_lock
);
1781 mutex_exit(&dcap
->dca_lock
);
1787 dnlc_dir_fini(dcanchor_t
*dcap
)
1791 mutex_enter(&dc_head
.dch_lock
);
1792 mutex_enter(&dcap
->dca_lock
);
1793 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1794 if (VALID_DIR_CACHE(dcp
)) {
1796 * Unchain from global list
1798 ncs
.ncs_dir_finipurg
.value
.ui64
++;
1799 dcp
->dc_prev
->dc_next
= dcp
->dc_next
;
1800 dcp
->dc_next
->dc_prev
= dcp
->dc_prev
;
1804 dcap
->dca_dircache
= NULL
;
1805 mutex_exit(&dcap
->dca_lock
);
1806 mutex_exit(&dc_head
.dch_lock
);
1807 mutex_destroy(&dcap
->dca_lock
);
1809 dnlc_dir_abort(dcp
);
1814 * Reclaim callback for dnlc directory caching.
1815 * Invoked by the kernel memory allocator when memory gets tight.
1816 * This is a pretty serious condition and can lead easily lead to system
1817 * hangs if not enough space is returned.
1819 * Deciding which directory (or directories) to purge is tricky.
1820 * Purging everything is an overkill, but purging just the oldest used
1821 * was found to lead to hangs. The largest cached directories use the
1822 * most memory, but take the most effort to rebuild, whereas the smaller
1823 * ones have little value and give back little space. So what to do?
1825 * The current policy is to continue purging the oldest used directories
1826 * until at least dnlc_dir_min_reclaim directory entries have been purged.
1830 dnlc_dir_reclaim(void *unused
)
1832 dircache_t
*dcp
, *oldest
;
1833 uint_t dirent_cnt
= 0;
1835 mutex_enter(&dc_head
.dch_lock
);
1836 while (dirent_cnt
< dnlc_dir_min_reclaim
) {
1837 dcp
= dc_head
.dch_next
;
1839 while (dcp
!= (dircache_t
*)&dc_head
) {
1840 if (oldest
== NULL
) {
1843 if (dcp
->dc_actime
< oldest
->dc_actime
) {
1849 if (oldest
== NULL
) {
1850 /* nothing to delete */
1851 mutex_exit(&dc_head
.dch_lock
);
1855 * remove from directory chain and purge
1857 oldest
->dc_prev
->dc_next
= oldest
->dc_next
;
1858 oldest
->dc_next
->dc_prev
= oldest
->dc_prev
;
1859 mutex_enter(&oldest
->dc_anchor
->dca_lock
);
1861 * If this was the last entry then it must be too large.
1862 * Mark it as such by saving a special dircache_t
1863 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1864 * will be presented to the caller of dnlc_dir_start()
1866 if (oldest
->dc_next
== oldest
->dc_prev
) {
1867 oldest
->dc_anchor
->dca_dircache
= DC_RET_LOW_MEM
;
1868 ncs
.ncs_dir_rec_last
.value
.ui64
++;
1870 oldest
->dc_anchor
->dca_dircache
= NULL
;
1871 ncs
.ncs_dir_recl_any
.value
.ui64
++;
1873 mutex_exit(&oldest
->dc_anchor
->dca_lock
);
1874 dirent_cnt
+= oldest
->dc_num_entries
;
1875 dnlc_dir_abort(oldest
);
1877 mutex_exit(&dc_head
.dch_lock
);
1881 * Dynamically grow or shrink the size of the name hash table
1884 dnlc_dir_adjust_nhash(dircache_t
*dcp
)
1886 dcentry_t
**newhash
, *dep
, **nhp
, *tep
;
1893 * Allocate new hash table
1895 newsize
= dcp
->dc_num_entries
>> dnlc_dir_hash_size_shift
;
1896 newhash
= kmem_zalloc(sizeof (dcentry_t
*) * newsize
, KM_NOSLEEP
);
1897 if (newhash
== NULL
) {
1899 * System is short on memory just return
1900 * Note, the old hash table is still usable.
1901 * This return is unlikely to repeatedy occur, because
1902 * either some other directory caches will be reclaimed
1903 * due to memory shortage, thus freeing memory, or this
1904 * directory cahe will be reclaimed.
1908 oldsize
= dcp
->dc_nhash_mask
+ 1;
1909 dcp
->dc_nhash_mask
= newsizemask
= newsize
- 1;
1912 * Move entries from the old table to the new
1914 for (i
= 0; i
< oldsize
; i
++) { /* for each hash bucket */
1915 dep
= dcp
->dc_namehash
[i
];
1916 while (dep
!= NULL
) { /* for each chained entry */
1919 nhp
= &newhash
[tep
->de_hash
& newsizemask
];
1920 tep
->de_next
= *nhp
;
1926 * delete old hash table and set new one in place
1928 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*) * oldsize
);
1929 dcp
->dc_namehash
= newhash
;
1933 * Dynamically grow or shrink the size of the free space hash table
1936 dnlc_dir_adjust_fhash(dircache_t
*dcp
)
1938 dcfree_t
**newhash
, *dfp
, **nhp
, *tfp
;
1944 * Allocate new hash table
1946 newsize
= dcp
->dc_num_free
>> dnlc_dir_hash_size_shift
;
1947 newhash
= kmem_zalloc(sizeof (dcfree_t
*) * newsize
, KM_NOSLEEP
);
1948 if (newhash
== NULL
) {
1950 * System is short on memory just return
1951 * Note, the old hash table is still usable.
1952 * This return is unlikely to repeatedy occur, because
1953 * either some other directory caches will be reclaimed
1954 * due to memory shortage, thus freeing memory, or this
1955 * directory cahe will be reclaimed.
1959 oldsize
= dcp
->dc_fhash_mask
+ 1;
1960 dcp
->dc_fhash_mask
= newsize
- 1;
1963 * Move entries from the old table to the new
1965 for (i
= 0; i
< oldsize
; i
++) { /* for each hash bucket */
1966 dfp
= dcp
->dc_freehash
[i
];
1967 while (dfp
!= NULL
) { /* for each chained entry */
1970 nhp
= &newhash
[DDFHASH(tfp
->df_handle
, dcp
)];
1971 tfp
->df_next
= *nhp
;
1977 * delete old hash table and set new one in place
1979 kmem_free(dcp
->dc_freehash
, sizeof (dcfree_t
*) * oldsize
);
1980 dcp
->dc_freehash
= newhash
;