4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017 by Delphix. All rights reserved.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/param.h>
42 #include <sys/t_lock.h>
43 #include <sys/systm.h>
45 #include <sys/vnode.h>
48 #include <sys/cmn_err.h>
49 #include <sys/vtrace.h>
50 #include <sys/bitmap.h>
52 #include <sys/sysmacros.h>
53 #include <sys/kstat.h>
54 #include <sys/atomic.h>
55 #include <sys/taskq.h>
59 * Directory name lookup cache.
60 * Based on code originally done by Robert Elz at Melbourne.
62 * Names found by directory scans are retained in a cache
63 * for future reference. Each hash chain is ordered by LRU
64 * Cache is indexed by hash value obtained from (vp, name)
65 * where the vp refers to the directory containing the name.
69 * We want to be able to identify files that are referenced only by the DNLC.
70 * When adding a reference from the DNLC, call VN_HOLD_DNLC instead of VN_HOLD,
71 * since multiple DNLC references should only be counted once in v_count. The
72 * VN_HOLD macro itself is aliased to VN_HOLD_CALLER in this file to help
73 * differentiate the behaviors. (Unfortunately it is not possible to #undef
74 * VN_HOLD and retain VN_HOLD_CALLER. Ideally a Makefile rule would grep
75 * uncommented C tokens to check that VN_HOLD is referenced only once in this
76 * file, to define VN_HOLD_CALLER.)
78 #define VN_HOLD_CALLER VN_HOLD
79 #define VN_HOLD_DNLC(vp) { \
80 mutex_enter(&(vp)->v_lock); \
81 if ((vp)->v_count_dnlc == 0) { \
84 (vp)->v_count_dnlc++; \
85 mutex_exit(&(vp)->v_lock); \
87 #define VN_RELE_DNLC(vp) { \
92 * Tunable nc_hashavelen is the average length desired for this chain, from
93 * which the size of the nc_hash table is derived at create time.
95 #define NC_HASHAVELEN_DEFAULT 4
96 int nc_hashavelen
= NC_HASHAVELEN_DEFAULT
;
99 * NC_MOVETOFRONT is the move-to-front threshold: if the hash lookup
100 * depth exceeds this value, we move the looked-up entry to the front of
101 * its hash chain. The idea is to make sure that the most frequently
102 * accessed entries are found most quickly (by keeping them near the
103 * front of their hash chains).
105 #define NC_MOVETOFRONT 2
109 * DNLC_MAX_RELE is used to size an array on the stack when releasing
110 * vnodes. This array is used rather than calling VN_RELE() inline because
111 * all dnlc locks must be dropped by that time in order to avoid a
112 * possible deadlock. This deadlock occurs when the dnlc holds the last
113 * reference to the vnode and so the fop_inactive vector is called which
114 * can in turn call back into the dnlc. A global array was used but had
116 * 1) Actually doesn't have an upper bound on the array size as
117 * entries can be added after starting the purge.
118 * 2) The locking scheme causes a hang.
119 * 3) Caused serialisation on the global lock.
120 * 4) The array was often unnecessarily huge.
122 * Note the current value 8 allows up to 4 cache entries (to be purged
123 * from each hash chain), before having to cycle around and retry.
124 * This ought to be ample given that nc_hashavelen is typically very small.
126 #define DNLC_MAX_RELE 8 /* must be even */
129 * Hash table of name cache entries for fast lookup, dynamically
130 * allocated at startup.
135 * Rotors. Used to select entries on a round-robin basis.
137 static nc_hash_t
*dnlc_purge_fs1_rotor
;
138 static nc_hash_t
*dnlc_free_rotor
;
141 * # of dnlc entries (uninitialized)
143 * the initial value was chosen as being
144 * a random string of bits, probably not
145 * normally chosen by a systems administrator
148 volatile uint32_t dnlc_nentries
= 0; /* current num of name cache entries */
149 static int nc_hashsz
; /* size of hash table */
150 static int nc_hashmask
; /* size of hash table minus 1 */
153 * The dnlc_reduce_cache() taskq queue is activated when there are
154 * ncsize name cache entries and if no parameter is provided, it reduces
155 * the size down to dnlc_nentries_low_water, which is by default one
156 * hundreth less (or 99%) of ncsize.
158 * If a parameter is provided to dnlc_reduce_cache(), then we reduce
159 * the size down based on ncsize_onepercent - where ncsize_onepercent
160 * is 1% of ncsize; however, we never let dnlc_reduce_cache() reduce
161 * the size below 3% of ncsize (ncsize_min_percent).
163 #define DNLC_LOW_WATER_DIVISOR_DEFAULT 100
164 uint_t dnlc_low_water_divisor
= DNLC_LOW_WATER_DIVISOR_DEFAULT
;
165 uint_t dnlc_nentries_low_water
;
166 int dnlc_reduce_idle
= 1; /* no locking needed */
167 uint_t ncsize_onepercent
;
168 uint_t ncsize_min_percent
;
171 * If dnlc_nentries hits dnlc_max_nentries (twice ncsize)
172 * then this means the dnlc_reduce_cache() taskq is failing to
173 * keep up. In this case we refuse to add new entries to the dnlc
174 * until the taskq catches up.
176 uint_t dnlc_max_nentries
; /* twice ncsize */
177 uint64_t dnlc_max_nentries_cnt
= 0; /* statistic on times we failed */
180 * Tunable to define when we should just remove items from
181 * the end of the chain.
183 #define DNLC_LONG_CHAIN 8
184 uint_t dnlc_long_chain
= DNLC_LONG_CHAIN
;
187 * ncstats has been deprecated, due to the integer size of the counters
188 * which can easily overflow in the dnlc.
189 * It is maintained (at some expense) for compatability.
190 * The preferred interface is the kstat accessible nc_stats below.
192 struct ncstats ncstats
;
194 struct nc_stats ncs
= {
195 { "hits", KSTAT_DATA_UINT64
},
196 { "misses", KSTAT_DATA_UINT64
},
197 { "negative_cache_hits", KSTAT_DATA_UINT64
},
198 { "enters", KSTAT_DATA_UINT64
},
199 { "double_enters", KSTAT_DATA_UINT64
},
200 { "purge_total_entries", KSTAT_DATA_UINT64
},
201 { "purge_all", KSTAT_DATA_UINT64
},
202 { "purge_vp", KSTAT_DATA_UINT64
},
203 { "purge_vfs", KSTAT_DATA_UINT64
},
204 { "purge_fs1", KSTAT_DATA_UINT64
},
205 { "pick_free", KSTAT_DATA_UINT64
},
206 { "pick_heuristic", KSTAT_DATA_UINT64
},
207 { "pick_last", KSTAT_DATA_UINT64
},
209 /* directory caching stats */
211 { "dir_hits", KSTAT_DATA_UINT64
},
212 { "dir_misses", KSTAT_DATA_UINT64
},
213 { "dir_cached_current", KSTAT_DATA_UINT64
},
214 { "dir_entries_cached_current", KSTAT_DATA_UINT64
},
215 { "dir_cached_total", KSTAT_DATA_UINT64
},
216 { "dir_start_no_memory", KSTAT_DATA_UINT64
},
217 { "dir_add_no_memory", KSTAT_DATA_UINT64
},
218 { "dir_add_abort", KSTAT_DATA_UINT64
},
219 { "dir_add_max", KSTAT_DATA_UINT64
},
220 { "dir_remove_entry_fail", KSTAT_DATA_UINT64
},
221 { "dir_remove_space_fail", KSTAT_DATA_UINT64
},
222 { "dir_update_fail", KSTAT_DATA_UINT64
},
223 { "dir_fini_purge", KSTAT_DATA_UINT64
},
224 { "dir_reclaim_last", KSTAT_DATA_UINT64
},
225 { "dir_reclaim_any", KSTAT_DATA_UINT64
},
228 static int doingcache
= 1;
230 vnode_t negative_cache_vnode
;
233 * Insert entry at the front of the queue
235 #define nc_inshash(ncp, hp) \
237 (ncp)->hash_next = (hp)->hash_next; \
238 (ncp)->hash_prev = (ncache_t *)(hp); \
239 (hp)->hash_next->hash_prev = (ncp); \
240 (hp)->hash_next = (ncp); \
244 * Remove entry from hash queue
246 #define nc_rmhash(ncp) \
248 (ncp)->hash_prev->hash_next = (ncp)->hash_next; \
249 (ncp)->hash_next->hash_prev = (ncp)->hash_prev; \
250 (ncp)->hash_prev = NULL; \
251 (ncp)->hash_next = NULL; \
257 #define dnlc_free(ncp) \
259 kmem_free((ncp), sizeof (ncache_t) + (ncp)->namlen); \
260 atomic_dec_32(&dnlc_nentries); \
265 * Cached directory info.
266 * ======================
270 * Cached directory free space hash function.
271 * Needs the free space handle and the dcp to get the hash table size
272 * Returns the hash index.
274 #define DDFHASH(handle, dcp) ((handle >> 2) & (dcp)->dc_fhash_mask)
277 * Cached directory name entry hash function.
278 * Uses the name and returns in the input arguments the hash and the name
281 #define DNLC_DIR_HASH(name, hash, namelen) \
286 for (Xcp = (name + 1); (Xc = *Xcp) != 0; Xcp++) \
287 hash = (hash << 4) + hash + Xc; \
288 ASSERT((Xcp - (name)) <= ((1 << NBBY) - 1)); \
289 namelen = Xcp - (name); \
292 /* special dircache_t pointer to indicate error should be returned */
294 * The anchor directory cache pointer can contain 3 types of values,
295 * 1) NULL: No directory cache
296 * 2) DC_RET_LOW_MEM (-1): There was a directory cache that found to be
297 * too big or a memory shortage occurred. This value remains in the
298 * pointer until a dnlc_dir_start() which returns the a DNOMEM error.
299 * This is kludgy but efficient and only visible in this source file.
300 * 3) A valid cache pointer.
302 #define DC_RET_LOW_MEM (dircache_t *)1
303 #define VALID_DIR_CACHE(dcp) ((dircache_t *)(dcp) > DC_RET_LOW_MEM)
306 uint_t dnlc_dir_enable
= 1; /* disable caching directories by setting to 0 */
307 uint_t dnlc_dir_min_size
= 40; /* min no of directory entries before caching */
308 uint_t dnlc_dir_max_size
= UINT_MAX
; /* ditto maximum */
309 uint_t dnlc_dir_hash_size_shift
= 3; /* 8 entries per hash bucket */
310 uint_t dnlc_dir_min_reclaim
= 350000; /* approx 1MB of dcentrys */
312 * dnlc_dir_hash_resize_shift determines when the hash tables
313 * get re-adjusted due to growth or shrinkage
314 * - currently 2 indicating that there can be at most 4
315 * times or at least one quarter the number of entries
316 * before hash table readjustment. Note that with
317 * dnlc_dir_hash_size_shift above set at 3 this would
318 * mean readjustment would occur if the average number
319 * of entries went above 32 or below 2
321 uint_t dnlc_dir_hash_resize_shift
= 2; /* readjust rate */
323 static kmem_cache_t
*dnlc_dir_space_cache
; /* free space entry cache */
324 static dchead_t dc_head
; /* anchor of cached directories */
327 static ncache_t
*dnlc_get(uchar_t namlen
);
328 static ncache_t
*dnlc_search(vnode_t
*dp
, const char *name
, uchar_t namlen
,
330 static void dnlc_dir_reclaim(void *unused
);
331 static void dnlc_dir_abort(dircache_t
*dcp
);
332 static void dnlc_dir_adjust_fhash(dircache_t
*dcp
);
333 static void dnlc_dir_adjust_nhash(dircache_t
*dcp
);
334 static void do_dnlc_reduce_cache(void *);
338 * Initialize the directory cache.
348 * Set up the size of the dnlc (ncsize) and its low water mark.
351 /* calculate a reasonable size for the low water */
352 dnlc_nentries_low_water
= 4 * (v
.v_proc
+ maxusers
) + 320;
353 ncsize
= dnlc_nentries_low_water
+
354 (dnlc_nentries_low_water
/ dnlc_low_water_divisor
);
356 /* don't change the user specified ncsize */
357 dnlc_nentries_low_water
=
358 ncsize
- (ncsize
/ dnlc_low_water_divisor
);
362 dnlc_dir_enable
= 0; /* also disable directory caching */
364 cmn_err(CE_NOTE
, "name cache (dnlc) disabled");
367 dnlc_max_nentries
= ncsize
* 2;
368 ncsize_onepercent
= ncsize
/ 100;
369 ncsize_min_percent
= ncsize_onepercent
* 3;
372 * Initialise the hash table.
373 * Compute hash size rounding to the next power of two.
375 nc_hashsz
= ncsize
/ nc_hashavelen
;
376 nc_hashsz
= 1 << highbit(nc_hashsz
);
377 nc_hashmask
= nc_hashsz
- 1;
378 nc_hash
= kmem_zalloc(nc_hashsz
* sizeof (*nc_hash
), KM_SLEEP
);
379 for (i
= 0; i
< nc_hashsz
; i
++) {
380 hp
= (nc_hash_t
*)&nc_hash
[i
];
381 mutex_init(&hp
->hash_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
382 hp
->hash_next
= (ncache_t
*)hp
;
383 hp
->hash_prev
= (ncache_t
*)hp
;
389 dnlc_free_rotor
= dnlc_purge_fs1_rotor
= &nc_hash
[0];
392 * Set up the directory caching to use kmem_cache_alloc
393 * for its free space entries so that we can get a callback
394 * when the system is short on memory, to allow us to free
395 * up some memory. we don't use the constructor/deconstructor
398 dnlc_dir_space_cache
= kmem_cache_create("dnlc_space_cache",
399 sizeof (dcfree_t
), 0, NULL
, NULL
, dnlc_dir_reclaim
, NULL
,
403 * Initialise the head of the cached directory structures
405 mutex_init(&dc_head
.dch_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
406 dc_head
.dch_next
= (dircache_t
*)&dc_head
;
407 dc_head
.dch_prev
= (dircache_t
*)&dc_head
;
410 * Initialize and put a hold on the negative cache vnode so that it
411 * never goes away (fop_inactive isn't called on it). Note that
412 * vn_reinit doesn't call vmobject_init - that is normally done by
413 * the vn_cache constructor.
415 vmobject_init(&negative_cache_vnode
.v_object
, &negative_cache_vnode
);
416 vn_reinit(&negative_cache_vnode
);
419 * Initialise kstats - both the old compatability raw kind and
420 * the more extensive named stats.
422 ksp
= kstat_create("unix", 0, "ncstats", "misc", KSTAT_TYPE_RAW
,
423 sizeof (struct ncstats
), KSTAT_FLAG_VIRTUAL
);
425 ksp
->ks_data
= (void *) &ncstats
;
428 ksp
= kstat_create("unix", 0, "dnlcstats", "misc", KSTAT_TYPE_NAMED
,
429 sizeof (ncs
) / sizeof (kstat_named_t
), KSTAT_FLAG_VIRTUAL
);
431 ksp
->ks_data
= (void *) &ncs
;
437 * Add a name to the directory cache.
440 dnlc_enter(vnode_t
*dp
, const char *name
, vnode_t
*vp
)
447 TRACE_0(TR_FAC_NFS
, TR_DNLC_ENTER_START
, "dnlc_enter_start:");
450 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
451 "dnlc_enter_end:(%S) %d", "not caching", 0);
456 * Get a new dnlc entry. Assume the entry won't be in the cache
457 * and initialize it now
459 DNLCHASH(name
, dp
, hash
, namlen
);
460 if ((ncp
= dnlc_get(namlen
)) == NULL
)
466 bcopy(name
, ncp
->name
, namlen
+ 1); /* name and null */
468 hp
= &nc_hash
[hash
& nc_hashmask
];
470 mutex_enter(&hp
->hash_lock
);
471 if (dnlc_search(dp
, name
, namlen
, hash
) != NULL
) {
472 mutex_exit(&hp
->hash_lock
);
473 ncstats
.dbl_enters
++;
474 ncs
.ncs_dbl_enters
.value
.ui64
++;
477 dnlc_free(ncp
); /* crfree done here */
478 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
479 "dnlc_enter_end:(%S) %d", "dbl enter", ncstats
.dbl_enters
);
483 * Insert back into the hash chain.
486 mutex_exit(&hp
->hash_lock
);
488 ncs
.ncs_enters
.value
.ui64
++;
489 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
490 "dnlc_enter_end:(%S) %d", "done", ncstats
.enters
);
494 * Add a name to the directory cache.
496 * This function is basically identical with
497 * dnlc_enter(). The difference is that when the
498 * desired dnlc entry is found, the vnode in the
499 * ncache is compared with the vnode passed in.
501 * If they are not equal then the ncache is
502 * updated with the passed in vnode. Otherwise
503 * it just frees up the newly allocated dnlc entry.
506 dnlc_update(vnode_t
*dp
, const char *name
, vnode_t
*vp
)
515 TRACE_0(TR_FAC_NFS
, TR_DNLC_ENTER_START
, "dnlc_update_start:");
518 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
519 "dnlc_update_end:(%S) %d", "not caching", 0);
524 * Get a new dnlc entry and initialize it now.
525 * If we fail to get a new entry, call dnlc_remove() to purge
526 * any existing dnlc entry including negative cache (DNLC_NO_VNODE)
528 * Failure to clear an existing entry could result in false dnlc
529 * lookup (negative/stale entry).
531 DNLCHASH(name
, dp
, hash
, namlen
);
532 if ((ncp
= dnlc_get(namlen
)) == NULL
) {
533 dnlc_remove(dp
, name
);
540 bcopy(name
, ncp
->name
, namlen
+ 1); /* name and null */
542 hp
= &nc_hash
[hash
& nc_hashmask
];
544 mutex_enter(&hp
->hash_lock
);
545 if ((tcp
= dnlc_search(dp
, name
, namlen
, hash
)) != NULL
) {
549 mutex_exit(&hp
->hash_lock
);
552 ncs
.ncs_enters
.value
.ui64
++;
553 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
554 "dnlc_update_end:(%S) %d", "done", ncstats
.enters
);
556 mutex_exit(&hp
->hash_lock
);
558 ncstats
.dbl_enters
++;
559 ncs
.ncs_dbl_enters
.value
.ui64
++;
560 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
561 "dnlc_update_end:(%S) %d",
562 "dbl enter", ncstats
.dbl_enters
);
565 dnlc_free(ncp
); /* crfree done here */
569 * insert the new entry, since it is not in dnlc yet
572 mutex_exit(&hp
->hash_lock
);
574 ncs
.ncs_enters
.value
.ui64
++;
575 TRACE_2(TR_FAC_NFS
, TR_DNLC_ENTER_END
,
576 "dnlc_update_end:(%S) %d", "done", ncstats
.enters
);
580 * Look up a name in the directory name cache.
582 * Return a doubly-held vnode if found: one hold so that it may
583 * remain in the cache for other users, the other hold so that
584 * the cache is not re-cycled and the identity of the vnode is
585 * lost before the caller can use the vnode.
588 dnlc_lookup(vnode_t
*dp
, const char *name
)
596 TRACE_2(TR_FAC_NFS
, TR_DNLC_LOOKUP_START
,
597 "dnlc_lookup_start:dp %x name %s", dp
, name
);
600 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
601 "dnlc_lookup_end:%S %d vp %x name %s",
602 "not_caching", 0, NULL
, name
);
606 DNLCHASH(name
, dp
, hash
, namlen
);
608 hp
= &nc_hash
[hash
& nc_hashmask
];
609 mutex_enter(&hp
->hash_lock
);
611 for (ncp
= hp
->hash_next
; ncp
!= (ncache_t
*)hp
;
612 ncp
= ncp
->hash_next
) {
613 if (ncp
->hash
== hash
&& /* fast signature check */
615 ncp
->namlen
== namlen
&&
616 bcmp(ncp
->name
, name
, namlen
) == 0) {
618 * Move this entry to the head of its hash chain
619 * if it's not already close.
621 if (depth
> NC_MOVETOFRONT
) {
622 ncache_t
*next
= ncp
->hash_next
;
623 ncache_t
*prev
= ncp
->hash_prev
;
625 prev
->hash_next
= next
;
626 next
->hash_prev
= prev
;
627 ncp
->hash_next
= next
= hp
->hash_next
;
628 ncp
->hash_prev
= (ncache_t
*)hp
;
629 next
->hash_prev
= ncp
;
632 ncstats
.move_to_front
++;
636 * Put a hold on the vnode now so its identity
637 * can't change before the caller has a chance to
642 mutex_exit(&hp
->hash_lock
);
644 ncs
.ncs_hits
.value
.ui64
++;
645 if (vp
== DNLC_NO_VNODE
) {
646 ncs
.ncs_neg_hits
.value
.ui64
++;
648 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
649 "dnlc_lookup_end:%S %d vp %x name %s", "hit",
650 ncstats
.hits
, vp
, name
);
656 mutex_exit(&hp
->hash_lock
);
658 ncs
.ncs_misses
.value
.ui64
++;
659 TRACE_4(TR_FAC_NFS
, TR_DNLC_LOOKUP_END
,
660 "dnlc_lookup_end:%S %d vp %x name %s", "miss", ncstats
.misses
,
666 * Remove an entry in the directory name cache.
669 dnlc_remove(vnode_t
*dp
, const char *name
)
678 DNLCHASH(name
, dp
, hash
, namlen
);
679 hp
= &nc_hash
[hash
& nc_hashmask
];
681 mutex_enter(&hp
->hash_lock
);
682 if (ncp
= dnlc_search(dp
, name
, namlen
, hash
)) {
687 mutex_exit(&hp
->hash_lock
);
688 VN_RELE_DNLC(ncp
->vp
);
689 VN_RELE_DNLC(ncp
->dp
);
693 mutex_exit(&hp
->hash_lock
);
697 * Purge the entire cache.
706 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
712 ncs
.ncs_purge_all
.value
.ui64
++;
714 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
716 mutex_enter(&nch
->hash_lock
);
717 ncp
= nch
->hash_next
;
718 while (ncp
!= (ncache_t
*)nch
) {
722 nc_rele
[index
++] = ncp
->vp
;
723 nc_rele
[index
++] = ncp
->dp
;
728 ncs
.ncs_purge_total
.value
.ui64
++;
729 if (index
== DNLC_MAX_RELE
)
732 mutex_exit(&nch
->hash_lock
);
734 /* Release holds on all the vnodes now that we have no locks */
735 for (i
= 0; i
< index
; i
++) {
736 VN_RELE_DNLC(nc_rele
[i
]);
738 if (ncp
!= (ncache_t
*)nch
) {
739 nch
--; /* Do current hash chain again */
745 * Purge any cache entries referencing a vnode. Exit as soon as the dnlc
746 * reference count goes to zero (the caller still holds a reference).
749 dnlc_purge_vp(vnode_t
*vp
)
754 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
756 ASSERT(vp
->v_count
> 0);
757 if (vp
->v_count_dnlc
== 0) {
765 ncs
.ncs_purge_vp
.value
.ui64
++;
767 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
769 mutex_enter(&nch
->hash_lock
);
770 ncp
= nch
->hash_next
;
771 while (ncp
!= (ncache_t
*)nch
) {
775 if (ncp
->dp
== vp
|| ncp
->vp
== vp
) {
776 nc_rele
[index
++] = ncp
->vp
;
777 nc_rele
[index
++] = ncp
->dp
;
780 ncs
.ncs_purge_total
.value
.ui64
++;
781 if (index
== DNLC_MAX_RELE
) {
788 mutex_exit(&nch
->hash_lock
);
790 /* Release holds on all the vnodes now that we have no locks */
792 VN_RELE_DNLC(nc_rele
[--index
]);
795 if (vp
->v_count_dnlc
== 0) {
799 if (ncp
!= (ncache_t
*)nch
) {
800 nch
--; /* Do current hash chain again */
806 * Purge cache entries referencing a vfsp. Caller supplies a count
807 * of entries to purge; up to that many will be freed. A count of
808 * zero indicates that all such entries should be purged. Returns
809 * the number of entries that were purged.
812 dnlc_purge_vfsp(vfs_t
*vfsp
, int count
)
819 vnode_t
*nc_rele
[DNLC_MAX_RELE
];
825 ncs
.ncs_purge_vfs
.value
.ui64
++;
827 for (nch
= nc_hash
; nch
< &nc_hash
[nc_hashsz
]; nch
++) {
829 mutex_enter(&nch
->hash_lock
);
830 ncp
= nch
->hash_next
;
831 while (ncp
!= (ncache_t
*)nch
) {
835 ASSERT(ncp
->dp
!= NULL
);
836 ASSERT(ncp
->vp
!= NULL
);
837 if ((ncp
->dp
->v_vfsp
== vfsp
) ||
838 (ncp
->vp
->v_vfsp
== vfsp
)) {
840 nc_rele
[index
++] = ncp
->vp
;
841 nc_rele
[index
++] = ncp
->dp
;
844 ncs
.ncs_purge_total
.value
.ui64
++;
845 if (index
== DNLC_MAX_RELE
) {
849 if (count
!= 0 && n
>= count
) {
855 mutex_exit(&nch
->hash_lock
);
856 /* Release holds on all the vnodes now that we have no locks */
857 for (i
= 0; i
< index
; i
++) {
858 VN_RELE_DNLC(nc_rele
[i
]);
860 if (count
!= 0 && n
>= count
) {
863 if (ncp
!= (ncache_t
*)nch
) {
864 nch
--; /* Do current hash chain again */
871 * Purge 1 entry from the dnlc that is part of the filesystem(s)
872 * represented by 'vop'. The purpose of this routine is to allow
873 * users of the dnlc to free a vnode that is being held by the dnlc.
875 * If we find a vnode that we release which will result in
876 * freeing the underlying vnode (count was 1), return 1, 0
877 * if no appropriate vnodes found.
879 * Note, vop is not the 'right' identifier for a filesystem.
882 dnlc_fs_purge1(const struct vnodeops
*vop
)
892 ncs
.ncs_purge_fs1
.value
.ui64
++;
895 * Scan the dnlc entries looking for a likely candidate.
897 hp
= end
= dnlc_purge_fs1_rotor
;
900 if (++hp
== &nc_hash
[nc_hashsz
])
902 dnlc_purge_fs1_rotor
= hp
;
903 if (hp
->hash_next
== (ncache_t
*)hp
)
905 mutex_enter(&hp
->hash_lock
);
906 for (ncp
= hp
->hash_prev
;
907 ncp
!= (ncache_t
*)hp
;
908 ncp
= ncp
->hash_prev
) {
910 if (!vn_has_cached_data(vp
) && (vp
->v_count
== 1) &&
911 vn_matchops(vp
, vop
))
914 if (ncp
!= (ncache_t
*)hp
) {
916 mutex_exit(&hp
->hash_lock
);
917 VN_RELE_DNLC(ncp
->dp
);
920 ncs
.ncs_purge_total
.value
.ui64
++;
923 mutex_exit(&hp
->hash_lock
);
929 * Utility routine to search for a cache entry. Return the
930 * ncache entry if found, NULL otherwise.
933 dnlc_search(vnode_t
*dp
, const char *name
, uchar_t namlen
, int hash
)
938 hp
= &nc_hash
[hash
& nc_hashmask
];
940 for (ncp
= hp
->hash_next
; ncp
!= (ncache_t
*)hp
; ncp
= ncp
->hash_next
) {
941 if (ncp
->hash
== hash
&&
943 ncp
->namlen
== namlen
&&
944 bcmp(ncp
->name
, name
, namlen
) == 0)
950 #if ((1 << NBBY) - 1) < (MAXNAMELEN - 1)
951 #error ncache_t name length representation is too small
955 dnlc_reduce_cache(void *reduce_percent
)
957 if (dnlc_reduce_idle
&& (dnlc_nentries
>= ncsize
|| reduce_percent
)) {
958 dnlc_reduce_idle
= 0;
959 if ((taskq_dispatch(system_taskq
, do_dnlc_reduce_cache
,
960 reduce_percent
, TQ_NOSLEEP
)) == (uintptr_t)NULL
)
961 dnlc_reduce_idle
= 1;
966 * Get a new name cache entry.
967 * If the dnlc_reduce_cache() taskq isn't keeping up with demand, or memory
968 * is short then just return NULL. If we're over ncsize then kick off a
969 * thread to free some in use entries down to dnlc_nentries_low_water.
970 * Caller must initialise all fields except namlen.
971 * Component names are defined to be less than MAXNAMELEN
972 * which includes a null.
975 dnlc_get(uchar_t namlen
)
979 if (dnlc_nentries
> dnlc_max_nentries
) {
980 dnlc_max_nentries_cnt
++; /* keep a statistic */
983 ncp
= kmem_alloc(sizeof (ncache_t
) + namlen
, KM_NOSLEEP
);
987 ncp
->namlen
= namlen
;
988 atomic_inc_32(&dnlc_nentries
);
989 dnlc_reduce_cache(NULL
);
994 * Taskq routine to free up name cache entries to reduce the
995 * cache size to the low water mark if "reduce_percent" is not provided.
996 * If "reduce_percent" is provided, reduce cache size by
997 * (ncsize_onepercent * reduce_percent).
1001 do_dnlc_reduce_cache(void *reduce_percent
)
1003 nc_hash_t
*hp
= dnlc_free_rotor
, *start_hp
= hp
;
1007 uint_t low_water
= dnlc_nentries_low_water
;
1009 if (reduce_percent
) {
1013 * Never try to reduce the current number
1014 * of cache entries below 3% of ncsize.
1016 if (dnlc_nentries
<= ncsize_min_percent
) {
1017 dnlc_reduce_idle
= 1;
1020 reduce_cnt
= ncsize_onepercent
*
1021 (uint_t
)(uintptr_t)reduce_percent
;
1023 if (reduce_cnt
> dnlc_nentries
||
1024 dnlc_nentries
- reduce_cnt
< ncsize_min_percent
)
1025 low_water
= ncsize_min_percent
;
1027 low_water
= dnlc_nentries
- reduce_cnt
;
1032 * Find the first non empty hash queue without locking.
1033 * Only look at each hash queue once to avoid an infinite loop.
1036 if (++hp
== &nc_hash
[nc_hashsz
])
1038 } while (hp
->hash_next
== (ncache_t
*)hp
&& hp
!= start_hp
);
1040 /* return if all hash queues are empty. */
1041 if (hp
->hash_next
== (ncache_t
*)hp
) {
1042 dnlc_reduce_idle
= 1;
1046 mutex_enter(&hp
->hash_lock
);
1047 for (cnt
= 0, ncp
= hp
->hash_prev
; ncp
!= (ncache_t
*)hp
;
1048 ncp
= ncp
->hash_prev
, cnt
++) {
1051 * A name cache entry with a reference count
1052 * of one is only referenced by the dnlc.
1053 * Also negative cache entries are purged first.
1055 if (!vn_has_cached_data(vp
) &&
1056 ((vp
->v_count
== 1) || (vp
== DNLC_NO_VNODE
))) {
1057 ncs
.ncs_pick_heur
.value
.ui64
++;
1061 * Remove from the end of the chain if the
1064 if (cnt
> dnlc_long_chain
) {
1065 ncp
= hp
->hash_prev
;
1066 ncs
.ncs_pick_last
.value
.ui64
++;
1071 /* check for race and continue */
1072 if (hp
->hash_next
== (ncache_t
*)hp
) {
1073 mutex_exit(&hp
->hash_lock
);
1077 ncp
= hp
->hash_prev
; /* pick the last one in the hash queue */
1078 ncs
.ncs_pick_last
.value
.ui64
++;
1082 * Remove from hash chain.
1085 mutex_exit(&hp
->hash_lock
);
1087 VN_RELE_DNLC(ncp
->dp
);
1089 } while (dnlc_nentries
> low_water
);
1091 dnlc_free_rotor
= hp
;
1092 dnlc_reduce_idle
= 1;
1096 * Directory caching routines
1097 * ==========================
1099 * See dnlc.h for details of the interfaces below.
1103 * Lookup up an entry in a complete or partial directory cache.
1106 dnlc_dir_lookup(dcanchor_t
*dcap
, const char *name
, uint64_t *handle
)
1115 * can test without lock as we are only a cache
1117 if (!VALID_DIR_CACHE(dcap
->dca_dircache
)) {
1118 ncs
.ncs_dir_misses
.value
.ui64
++;
1122 if (!dnlc_dir_enable
) {
1126 mutex_enter(&dcap
->dca_lock
);
1127 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1128 if (VALID_DIR_CACHE(dcp
)) {
1129 dcp
->dc_actime
= ddi_get_lbolt64();
1130 DNLC_DIR_HASH(name
, hash
, namlen
);
1131 dep
= dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1132 while (dep
!= NULL
) {
1133 if ((dep
->de_hash
== hash
) &&
1134 (namlen
== dep
->de_namelen
) &&
1135 bcmp(dep
->de_name
, name
, namlen
) == 0) {
1136 *handle
= dep
->de_handle
;
1137 mutex_exit(&dcap
->dca_lock
);
1138 ncs
.ncs_dir_hits
.value
.ui64
++;
1143 if (dcp
->dc_complete
) {
1148 mutex_exit(&dcap
->dca_lock
);
1151 mutex_exit(&dcap
->dca_lock
);
1152 ncs
.ncs_dir_misses
.value
.ui64
++;
1158 * Start a new directory cache. An estimate of the number of
1159 * entries is provided to as a quick check to ensure the directory
1163 dnlc_dir_start(dcanchor_t
*dcap
, uint_t num_entries
)
1167 if (!dnlc_dir_enable
||
1168 (num_entries
< dnlc_dir_min_size
)) {
1172 if (num_entries
> dnlc_dir_max_size
) {
1176 mutex_enter(&dc_head
.dch_lock
);
1177 mutex_enter(&dcap
->dca_lock
);
1179 if (dcap
->dca_dircache
== DC_RET_LOW_MEM
) {
1180 dcap
->dca_dircache
= NULL
;
1181 mutex_exit(&dcap
->dca_lock
);
1182 mutex_exit(&dc_head
.dch_lock
);
1187 * Check if there's currently a cache.
1188 * This probably only occurs on a race.
1190 if (dcap
->dca_dircache
!= NULL
) {
1191 mutex_exit(&dcap
->dca_lock
);
1192 mutex_exit(&dc_head
.dch_lock
);
1197 * Allocate the dircache struct, entry and free space hash tables.
1198 * These tables are initially just one entry but dynamically resize
1199 * when entries and free space are added or removed.
1201 if ((dcp
= kmem_zalloc(sizeof (dircache_t
), KM_NOSLEEP
)) == NULL
) {
1204 if ((dcp
->dc_namehash
= kmem_zalloc(sizeof (dcentry_t
*),
1205 KM_NOSLEEP
)) == NULL
) {
1208 if ((dcp
->dc_freehash
= kmem_zalloc(sizeof (dcfree_t
*),
1209 KM_NOSLEEP
)) == NULL
) {
1213 dcp
->dc_anchor
= dcap
; /* set back pointer to anchor */
1214 dcap
->dca_dircache
= dcp
;
1216 /* add into head of global chain */
1217 dcp
->dc_next
= dc_head
.dch_next
;
1218 dcp
->dc_prev
= (dircache_t
*)&dc_head
;
1219 dcp
->dc_next
->dc_prev
= dcp
;
1220 dc_head
.dch_next
= dcp
;
1222 mutex_exit(&dcap
->dca_lock
);
1223 mutex_exit(&dc_head
.dch_lock
);
1224 ncs
.ncs_cur_dirs
.value
.ui64
++;
1225 ncs
.ncs_dirs_cached
.value
.ui64
++;
1229 if (dcp
->dc_namehash
) {
1230 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*));
1232 kmem_free(dcp
, sizeof (dircache_t
));
1235 * Must also kmem_free dcp->dc_freehash if more error cases are added
1237 mutex_exit(&dcap
->dca_lock
);
1238 mutex_exit(&dc_head
.dch_lock
);
1239 ncs
.ncs_dir_start_nm
.value
.ui64
++;
1244 * Add a directopry entry to a partial or complete directory cache.
1247 dnlc_dir_add_entry(dcanchor_t
*dcap
, const char *name
, uint64_t handle
)
1250 dcentry_t
**hp
, *dep
;
1256 * Allocate the dcentry struct, including the variable
1257 * size name. Note, the null terminator is not copied.
1259 * We do this outside the lock to avoid possible deadlock if
1260 * dnlc_dir_reclaim() is called as a result of memory shortage.
1262 DNLC_DIR_HASH(name
, hash
, namlen
);
1263 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1267 * The kmem allocator generates random failures for
1268 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1269 * So try again before we blow away a perfectly good cache.
1270 * This is done not to cover an error but purely for
1271 * performance running a debug kernel.
1272 * This random error only occurs in debug mode.
1274 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1278 ncs
.ncs_dir_add_nm
.value
.ui64
++;
1280 * Free a directory cache. This may be the one we are
1283 dnlc_dir_reclaim(NULL
);
1284 dep
= kmem_alloc(sizeof (dcentry_t
) - 1 + namlen
, KM_NOSLEEP
);
1287 * still no memory, better delete this cache
1289 mutex_enter(&dcap
->dca_lock
);
1290 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1291 if (VALID_DIR_CACHE(dcp
)) {
1292 dnlc_dir_abort(dcp
);
1293 dcap
->dca_dircache
= DC_RET_LOW_MEM
;
1295 mutex_exit(&dcap
->dca_lock
);
1296 ncs
.ncs_dir_addabort
.value
.ui64
++;
1300 * fall through as if the 1st kmem_alloc had worked
1306 mutex_enter(&dcap
->dca_lock
);
1307 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1308 if (VALID_DIR_CACHE(dcp
)) {
1310 * If the total number of entries goes above the max
1311 * then free this cache
1313 if ((dcp
->dc_num_entries
+ dcp
->dc_num_free
) >
1314 dnlc_dir_max_size
) {
1315 mutex_exit(&dcap
->dca_lock
);
1316 dnlc_dir_purge(dcap
);
1317 kmem_free(dep
, sizeof (dcentry_t
) - 1 + namlen
);
1318 ncs
.ncs_dir_add_max
.value
.ui64
++;
1321 dcp
->dc_num_entries
++;
1322 capacity
= (dcp
->dc_nhash_mask
+ 1) << dnlc_dir_hash_size_shift
;
1323 if (dcp
->dc_num_entries
>=
1324 (capacity
<< dnlc_dir_hash_resize_shift
)) {
1325 dnlc_dir_adjust_nhash(dcp
);
1327 hp
= &dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1330 * Initialise and chain in new entry
1332 dep
->de_handle
= handle
;
1333 dep
->de_hash
= hash
;
1335 * Note de_namelen is a uchar_t to conserve space
1336 * and alignment padding. The max length of any
1337 * pathname component is defined as MAXNAMELEN
1338 * which is 256 (including the terminating null).
1339 * So provided this doesn't change, we don't include the null,
1340 * we always use bcmp to compare strings, and we don't
1341 * start storing full names, then we are ok.
1342 * The space savings is worth it.
1344 dep
->de_namelen
= namlen
;
1345 bcopy(name
, dep
->de_name
, namlen
);
1348 dcp
->dc_actime
= ddi_get_lbolt64();
1349 mutex_exit(&dcap
->dca_lock
);
1350 ncs
.ncs_dir_num_ents
.value
.ui64
++;
1353 mutex_exit(&dcap
->dca_lock
);
1354 kmem_free(dep
, sizeof (dcentry_t
) - 1 + namlen
);
1360 * Add free space to a partial or complete directory cache.
1363 dnlc_dir_add_space(dcanchor_t
*dcap
, uint_t len
, uint64_t handle
)
1366 dcfree_t
*dfp
, **hp
;
1370 * We kmem_alloc outside the lock to avoid possible deadlock if
1371 * dnlc_dir_reclaim() is called as a result of memory shortage.
1373 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1377 * The kmem allocator generates random failures for
1378 * KM_NOSLEEP calls (see KMEM_RANDOM_ALLOCATION_FAILURE)
1379 * So try again before we blow away a perfectly good cache.
1380 * This random error only occurs in debug mode
1382 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1386 ncs
.ncs_dir_add_nm
.value
.ui64
++;
1388 * Free a directory cache. This may be the one we are
1391 dnlc_dir_reclaim(NULL
);
1392 dfp
= kmem_cache_alloc(dnlc_dir_space_cache
, KM_NOSLEEP
);
1395 * still no memory, better delete this cache
1397 mutex_enter(&dcap
->dca_lock
);
1398 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1399 if (VALID_DIR_CACHE(dcp
)) {
1400 dnlc_dir_abort(dcp
);
1401 dcap
->dca_dircache
= DC_RET_LOW_MEM
;
1403 mutex_exit(&dcap
->dca_lock
);
1404 ncs
.ncs_dir_addabort
.value
.ui64
++;
1408 * fall through as if the 1st kmem_alloc had worked
1415 mutex_enter(&dcap
->dca_lock
);
1416 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1417 if (VALID_DIR_CACHE(dcp
)) {
1418 if ((dcp
->dc_num_entries
+ dcp
->dc_num_free
) >
1419 dnlc_dir_max_size
) {
1420 mutex_exit(&dcap
->dca_lock
);
1421 dnlc_dir_purge(dcap
);
1422 kmem_cache_free(dnlc_dir_space_cache
, dfp
);
1423 ncs
.ncs_dir_add_max
.value
.ui64
++;
1427 capacity
= (dcp
->dc_fhash_mask
+ 1) << dnlc_dir_hash_size_shift
;
1428 if (dcp
->dc_num_free
>=
1429 (capacity
<< dnlc_dir_hash_resize_shift
)) {
1430 dnlc_dir_adjust_fhash(dcp
);
1433 * Initialise and chain a new entry
1435 dfp
->df_handle
= handle
;
1437 dcp
->dc_actime
= ddi_get_lbolt64();
1438 hp
= &(dcp
->dc_freehash
[DDFHASH(handle
, dcp
)]);
1441 mutex_exit(&dcap
->dca_lock
);
1442 ncs
.ncs_dir_num_ents
.value
.ui64
++;
1445 mutex_exit(&dcap
->dca_lock
);
1446 kmem_cache_free(dnlc_dir_space_cache
, dfp
);
1452 * Mark a directory cache as complete.
1455 dnlc_dir_complete(dcanchor_t
*dcap
)
1459 mutex_enter(&dcap
->dca_lock
);
1460 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1461 if (VALID_DIR_CACHE(dcp
)) {
1462 dcp
->dc_complete
= B_TRUE
;
1464 mutex_exit(&dcap
->dca_lock
);
1468 * Internal routine to delete a partial or full directory cache.
1469 * No additional locking needed.
1472 dnlc_dir_abort(dircache_t
*dcp
)
1474 dcentry_t
*dep
, *nhp
;
1475 dcfree_t
*fep
, *fhp
;
1476 uint_t nhtsize
= dcp
->dc_nhash_mask
+ 1; /* name hash table size */
1477 uint_t fhtsize
= dcp
->dc_fhash_mask
+ 1; /* free hash table size */
1481 * Free up the cached name entries and hash table
1483 for (i
= 0; i
< nhtsize
; i
++) { /* for each hash bucket */
1484 nhp
= dcp
->dc_namehash
[i
];
1485 while (nhp
!= NULL
) { /* for each chained entry */
1487 kmem_free(nhp
, sizeof (dcentry_t
) - 1 +
1492 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*) * nhtsize
);
1495 * Free up the free space entries and hash table
1497 for (i
= 0; i
< fhtsize
; i
++) { /* for each hash bucket */
1498 fhp
= dcp
->dc_freehash
[i
];
1499 while (fhp
!= NULL
) { /* for each chained entry */
1501 kmem_cache_free(dnlc_dir_space_cache
, fhp
);
1505 kmem_free(dcp
->dc_freehash
, sizeof (dcfree_t
*) * fhtsize
);
1508 * Finally free the directory cache structure itself
1510 ncs
.ncs_dir_num_ents
.value
.ui64
-= (dcp
->dc_num_entries
+
1512 kmem_free(dcp
, sizeof (dircache_t
));
1513 ncs
.ncs_cur_dirs
.value
.ui64
--;
1517 * Remove a partial or complete directory cache
1520 dnlc_dir_purge(dcanchor_t
*dcap
)
1524 mutex_enter(&dc_head
.dch_lock
);
1525 mutex_enter(&dcap
->dca_lock
);
1526 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1527 if (!VALID_DIR_CACHE(dcp
)) {
1528 mutex_exit(&dcap
->dca_lock
);
1529 mutex_exit(&dc_head
.dch_lock
);
1532 dcap
->dca_dircache
= NULL
;
1534 * Unchain from global list
1536 dcp
->dc_prev
->dc_next
= dcp
->dc_next
;
1537 dcp
->dc_next
->dc_prev
= dcp
->dc_prev
;
1538 mutex_exit(&dcap
->dca_lock
);
1539 mutex_exit(&dc_head
.dch_lock
);
1540 dnlc_dir_abort(dcp
);
1544 * Remove an entry from a complete or partial directory cache.
1545 * Return the handle if it's non null.
1548 dnlc_dir_rem_entry(dcanchor_t
*dcap
, const char *name
, uint64_t *handlep
)
1551 dcentry_t
**prevpp
, *te
;
1557 if (!dnlc_dir_enable
) {
1561 mutex_enter(&dcap
->dca_lock
);
1562 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1563 if (VALID_DIR_CACHE(dcp
)) {
1564 dcp
->dc_actime
= ddi_get_lbolt64();
1565 if (dcp
->dc_nhash_mask
> 0) { /* ie not minimum */
1566 capacity
= (dcp
->dc_nhash_mask
+ 1) <<
1567 dnlc_dir_hash_size_shift
;
1568 if (dcp
->dc_num_entries
<=
1569 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1570 dnlc_dir_adjust_nhash(dcp
);
1573 DNLC_DIR_HASH(name
, hash
, namlen
);
1574 prevpp
= &dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1575 while (*prevpp
!= NULL
) {
1576 if (((*prevpp
)->de_hash
== hash
) &&
1577 (namlen
== (*prevpp
)->de_namelen
) &&
1578 bcmp((*prevpp
)->de_name
, name
, namlen
) == 0) {
1579 if (handlep
!= NULL
) {
1580 *handlep
= (*prevpp
)->de_handle
;
1583 *prevpp
= (*prevpp
)->de_next
;
1584 kmem_free(te
, sizeof (dcentry_t
) - 1 +
1588 * If the total number of entries
1589 * falls below half the minimum number
1590 * of entries then free this cache.
1592 if (--dcp
->dc_num_entries
<
1593 (dnlc_dir_min_size
>> 1)) {
1594 mutex_exit(&dcap
->dca_lock
);
1595 dnlc_dir_purge(dcap
);
1597 mutex_exit(&dcap
->dca_lock
);
1599 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1602 prevpp
= &((*prevpp
)->de_next
);
1604 if (dcp
->dc_complete
) {
1605 ncs
.ncs_dir_reme_fai
.value
.ui64
++;
1610 mutex_exit(&dcap
->dca_lock
);
1613 mutex_exit(&dcap
->dca_lock
);
1620 * Remove free space of at least the given length from a complete
1621 * or partial directory cache.
1624 dnlc_dir_rem_space_by_len(dcanchor_t
*dcap
, uint_t len
, uint64_t *handlep
)
1627 dcfree_t
**prevpp
, *tfp
;
1628 uint_t fhtsize
; /* free hash table size */
1633 if (!dnlc_dir_enable
) {
1637 mutex_enter(&dcap
->dca_lock
);
1638 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1639 if (VALID_DIR_CACHE(dcp
)) {
1640 dcp
->dc_actime
= ddi_get_lbolt64();
1641 if (dcp
->dc_fhash_mask
> 0) { /* ie not minimum */
1642 capacity
= (dcp
->dc_fhash_mask
+ 1) <<
1643 dnlc_dir_hash_size_shift
;
1644 if (dcp
->dc_num_free
<=
1645 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1646 dnlc_dir_adjust_fhash(dcp
);
1650 * Search for an entry of the appropriate size
1651 * on a first fit basis.
1653 fhtsize
= dcp
->dc_fhash_mask
+ 1;
1654 for (i
= 0; i
< fhtsize
; i
++) { /* for each hash bucket */
1655 prevpp
= &(dcp
->dc_freehash
[i
]);
1656 while (*prevpp
!= NULL
) {
1657 if ((*prevpp
)->df_len
>= len
) {
1658 *handlep
= (*prevpp
)->df_handle
;
1660 *prevpp
= (*prevpp
)->df_next
;
1662 mutex_exit(&dcap
->dca_lock
);
1663 kmem_cache_free(dnlc_dir_space_cache
,
1665 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1668 prevpp
= &((*prevpp
)->df_next
);
1671 if (dcp
->dc_complete
) {
1676 mutex_exit(&dcap
->dca_lock
);
1679 mutex_exit(&dcap
->dca_lock
);
1685 * Remove free space with the given handle from a complete or partial
1689 dnlc_dir_rem_space_by_handle(dcanchor_t
*dcap
, uint64_t handle
)
1692 dcfree_t
**prevpp
, *tfp
;
1696 if (!dnlc_dir_enable
) {
1700 mutex_enter(&dcap
->dca_lock
);
1701 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1702 if (VALID_DIR_CACHE(dcp
)) {
1703 dcp
->dc_actime
= ddi_get_lbolt64();
1704 if (dcp
->dc_fhash_mask
> 0) { /* ie not minimum */
1705 capacity
= (dcp
->dc_fhash_mask
+ 1) <<
1706 dnlc_dir_hash_size_shift
;
1707 if (dcp
->dc_num_free
<=
1708 (capacity
>> dnlc_dir_hash_resize_shift
)) {
1709 dnlc_dir_adjust_fhash(dcp
);
1714 * search for the exact entry
1716 prevpp
= &(dcp
->dc_freehash
[DDFHASH(handle
, dcp
)]);
1717 while (*prevpp
!= NULL
) {
1718 if ((*prevpp
)->df_handle
== handle
) {
1720 *prevpp
= (*prevpp
)->df_next
;
1722 mutex_exit(&dcap
->dca_lock
);
1723 kmem_cache_free(dnlc_dir_space_cache
, tfp
);
1724 ncs
.ncs_dir_num_ents
.value
.ui64
--;
1727 prevpp
= &((*prevpp
)->df_next
);
1729 if (dcp
->dc_complete
) {
1730 ncs
.ncs_dir_rems_fai
.value
.ui64
++;
1735 mutex_exit(&dcap
->dca_lock
);
1738 mutex_exit(&dcap
->dca_lock
);
1744 * Update the handle of an directory cache entry.
1747 dnlc_dir_update(dcanchor_t
*dcap
, const char *name
, uint64_t handle
)
1755 if (!dnlc_dir_enable
) {
1759 mutex_enter(&dcap
->dca_lock
);
1760 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1761 if (VALID_DIR_CACHE(dcp
)) {
1762 dcp
->dc_actime
= ddi_get_lbolt64();
1763 DNLC_DIR_HASH(name
, hash
, namlen
);
1764 dep
= dcp
->dc_namehash
[hash
& dcp
->dc_nhash_mask
];
1765 while (dep
!= NULL
) {
1766 if ((dep
->de_hash
== hash
) &&
1767 (namlen
== dep
->de_namelen
) &&
1768 bcmp(dep
->de_name
, name
, namlen
) == 0) {
1769 dep
->de_handle
= handle
;
1770 mutex_exit(&dcap
->dca_lock
);
1775 if (dcp
->dc_complete
) {
1776 ncs
.ncs_dir_upd_fail
.value
.ui64
++;
1781 mutex_exit(&dcap
->dca_lock
);
1784 mutex_exit(&dcap
->dca_lock
);
1790 dnlc_dir_fini(dcanchor_t
*dcap
)
1794 mutex_enter(&dc_head
.dch_lock
);
1795 mutex_enter(&dcap
->dca_lock
);
1796 dcp
= (dircache_t
*)dcap
->dca_dircache
;
1797 if (VALID_DIR_CACHE(dcp
)) {
1799 * Unchain from global list
1801 ncs
.ncs_dir_finipurg
.value
.ui64
++;
1802 dcp
->dc_prev
->dc_next
= dcp
->dc_next
;
1803 dcp
->dc_next
->dc_prev
= dcp
->dc_prev
;
1807 dcap
->dca_dircache
= NULL
;
1808 mutex_exit(&dcap
->dca_lock
);
1809 mutex_exit(&dc_head
.dch_lock
);
1810 mutex_destroy(&dcap
->dca_lock
);
1812 dnlc_dir_abort(dcp
);
1817 * Reclaim callback for dnlc directory caching.
1818 * Invoked by the kernel memory allocator when memory gets tight.
1819 * This is a pretty serious condition and can lead easily lead to system
1820 * hangs if not enough space is returned.
1822 * Deciding which directory (or directories) to purge is tricky.
1823 * Purging everything is an overkill, but purging just the oldest used
1824 * was found to lead to hangs. The largest cached directories use the
1825 * most memory, but take the most effort to rebuild, whereas the smaller
1826 * ones have little value and give back little space. So what to do?
1828 * The current policy is to continue purging the oldest used directories
1829 * until at least dnlc_dir_min_reclaim directory entries have been purged.
1833 dnlc_dir_reclaim(void *unused
)
1835 dircache_t
*dcp
, *oldest
;
1836 uint_t dirent_cnt
= 0;
1838 mutex_enter(&dc_head
.dch_lock
);
1839 while (dirent_cnt
< dnlc_dir_min_reclaim
) {
1840 dcp
= dc_head
.dch_next
;
1842 while (dcp
!= (dircache_t
*)&dc_head
) {
1843 if (oldest
== NULL
) {
1846 if (dcp
->dc_actime
< oldest
->dc_actime
) {
1852 if (oldest
== NULL
) {
1853 /* nothing to delete */
1854 mutex_exit(&dc_head
.dch_lock
);
1858 * remove from directory chain and purge
1860 oldest
->dc_prev
->dc_next
= oldest
->dc_next
;
1861 oldest
->dc_next
->dc_prev
= oldest
->dc_prev
;
1862 mutex_enter(&oldest
->dc_anchor
->dca_lock
);
1864 * If this was the last entry then it must be too large.
1865 * Mark it as such by saving a special dircache_t
1866 * pointer (DC_RET_LOW_MEM) in the anchor. The error DNOMEM
1867 * will be presented to the caller of dnlc_dir_start()
1869 if (oldest
->dc_next
== oldest
->dc_prev
) {
1870 oldest
->dc_anchor
->dca_dircache
= DC_RET_LOW_MEM
;
1871 ncs
.ncs_dir_rec_last
.value
.ui64
++;
1873 oldest
->dc_anchor
->dca_dircache
= NULL
;
1874 ncs
.ncs_dir_recl_any
.value
.ui64
++;
1876 mutex_exit(&oldest
->dc_anchor
->dca_lock
);
1877 dirent_cnt
+= oldest
->dc_num_entries
;
1878 dnlc_dir_abort(oldest
);
1880 mutex_exit(&dc_head
.dch_lock
);
1884 * Dynamically grow or shrink the size of the name hash table
1887 dnlc_dir_adjust_nhash(dircache_t
*dcp
)
1889 dcentry_t
**newhash
, *dep
, **nhp
, *tep
;
1896 * Allocate new hash table
1898 newsize
= dcp
->dc_num_entries
>> dnlc_dir_hash_size_shift
;
1899 newhash
= kmem_zalloc(sizeof (dcentry_t
*) * newsize
, KM_NOSLEEP
);
1900 if (newhash
== NULL
) {
1902 * System is short on memory just return
1903 * Note, the old hash table is still usable.
1904 * This return is unlikely to repeatedy occur, because
1905 * either some other directory caches will be reclaimed
1906 * due to memory shortage, thus freeing memory, or this
1907 * directory cahe will be reclaimed.
1911 oldsize
= dcp
->dc_nhash_mask
+ 1;
1912 dcp
->dc_nhash_mask
= newsizemask
= newsize
- 1;
1915 * Move entries from the old table to the new
1917 for (i
= 0; i
< oldsize
; i
++) { /* for each hash bucket */
1918 dep
= dcp
->dc_namehash
[i
];
1919 while (dep
!= NULL
) { /* for each chained entry */
1922 nhp
= &newhash
[tep
->de_hash
& newsizemask
];
1923 tep
->de_next
= *nhp
;
1929 * delete old hash table and set new one in place
1931 kmem_free(dcp
->dc_namehash
, sizeof (dcentry_t
*) * oldsize
);
1932 dcp
->dc_namehash
= newhash
;
1936 * Dynamically grow or shrink the size of the free space hash table
1939 dnlc_dir_adjust_fhash(dircache_t
*dcp
)
1941 dcfree_t
**newhash
, *dfp
, **nhp
, *tfp
;
1947 * Allocate new hash table
1949 newsize
= dcp
->dc_num_free
>> dnlc_dir_hash_size_shift
;
1950 newhash
= kmem_zalloc(sizeof (dcfree_t
*) * newsize
, KM_NOSLEEP
);
1951 if (newhash
== NULL
) {
1953 * System is short on memory just return
1954 * Note, the old hash table is still usable.
1955 * This return is unlikely to repeatedy occur, because
1956 * either some other directory caches will be reclaimed
1957 * due to memory shortage, thus freeing memory, or this
1958 * directory cahe will be reclaimed.
1962 oldsize
= dcp
->dc_fhash_mask
+ 1;
1963 dcp
->dc_fhash_mask
= newsize
- 1;
1966 * Move entries from the old table to the new
1968 for (i
= 0; i
< oldsize
; i
++) { /* for each hash bucket */
1969 dfp
= dcp
->dc_freehash
[i
];
1970 while (dfp
!= NULL
) { /* for each chained entry */
1973 nhp
= &newhash
[DDFHASH(tfp
->df_handle
, dcp
)];
1974 tfp
->df_next
= *nhp
;
1980 * delete old hash table and set new one in place
1982 kmem_free(dcp
->dc_freehash
, sizeof (dcfree_t
*) * oldsize
);
1983 dcp
->dc_freehash
= newhash
;