uts: make emu10k non-verbose
[unleashed.git] / kernel / vm / page_lock.c
blob288b0a454d45dd2861817b706a6a540c5b694f11
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
27 * VM - page locking primitives
29 #include <sys/param.h>
30 #include <sys/t_lock.h>
31 #include <sys/vtrace.h>
32 #include <sys/debug.h>
33 #include <sys/cmn_err.h>
34 #include <sys/bitmap.h>
35 #include <sys/lockstat.h>
36 #include <sys/sysmacros.h>
37 #include <sys/condvar_impl.h>
38 #include <vm/page.h>
39 #include <vm/seg_enum.h>
40 #include <vm/vm_dep.h>
41 #include <vm/seg_kmem.h>
44 * This global mutex array is for logical page locking.
45 * The following fields in the page structure are protected
46 * by this lock:
48 * p_lckcnt
49 * p_cowcnt
51 pad_mutex_t page_llocks[8 * NCPU_P2];
54 * This is a global lock for the logical page free list. The
55 * logical free list, in this implementation, is maintained as two
56 * separate physical lists - the cache list and the free list.
58 kmutex_t page_freelock;
61 * The hash table, page_hash[], the p_selock fields, and the
62 * list of pages associated with vnodes are protected by arrays of mutexes.
64 * Unless the hashes are changed radically, the table sizes must be
65 * a power of two. Also, we typically need more mutexes for the
66 * vnodes since these locks are occasionally held for long periods.
67 * And since there seem to be two special vnodes (kvp and swapvp),
68 * we make room for private mutexes for them.
70 * The pse_mutex[] array holds the mutexes to protect the p_selock
71 * fields of all page_t structures.
73 * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex
74 * when given a pointer to a page_t.
76 * PIO_TABLE_SIZE must be a power of two. One could argue that we
77 * should go to the trouble of setting it up at run time and base it
78 * on memory size rather than the number of compile time CPUs.
80 * XX64 We should be using physmem size to calculate PIO_SHIFT.
82 * These might break in 64 bit world.
84 #define PIO_SHIFT 7 /* log2(sizeof(page_t)) */
85 #define PIO_TABLE_SIZE 128 /* number of io mutexes to have */
87 kmutex_t pio_mutex[PIO_TABLE_SIZE];
89 #define PAGE_IO_MUTEX(pp) \
90 &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)]
93 * The pse_mutex[] array is allocated in the platform startup code
94 * based on the size of the machine at startup.
96 extern pad_mutex_t *pse_mutex; /* Locks protecting pp->p_selock */
97 extern size_t pse_table_size; /* Number of mutexes in pse_mutex[] */
98 extern int pse_shift; /* log2(pse_table_size) */
99 #define PAGE_SE_MUTEX(pp) &pse_mutex[ \
100 ((((uintptr_t)(pp) >> pse_shift) ^ ((uintptr_t)(pp))) >> 7) & \
101 (pse_table_size - 1)].pad_mutex
103 #define PSZC_MTX_TABLE_SIZE 128
104 #define PSZC_MTX_TABLE_SHIFT 7
106 static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE];
108 #define PAGE_SZC_MUTEX(_pp) \
109 &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \
110 ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \
111 ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \
112 (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex
115 * Initialize the locks used by the Virtual Memory Management system.
117 void
118 page_lock_init()
123 * Return a value for pse_shift based on npg (the number of physical pages)
124 * and ncpu (the maximum number of CPUs). This is called by platform startup
125 * code.
127 * Lockstat data from TPC-H runs showed that contention on the pse_mutex[]
128 * locks grew approximately as the square of the number of threads executing.
129 * So the primary scaling factor used is NCPU^2. The size of the machine in
130 * megabytes is used as an upper bound, particularly for sun4v machines which
131 * all claim to have 256 CPUs maximum, and the old value of PSE_TABLE_SIZE
132 * (128) is used as a minimum. Since the size of the table has to be a power
133 * of two, the calculated size is rounded up to the next power of two.
135 /*ARGSUSED*/
137 size_pse_array(pgcnt_t npg, int ncpu)
139 size_t size;
140 pgcnt_t pp_per_mb = (1024 * 1024) / PAGESIZE;
142 size = MAX(128, MIN(npg / pp_per_mb, 2 * ncpu * ncpu));
143 size += (1 << (highbit(size) - 1)) - 1;
144 return (highbit(size) - 1);
148 * At present we only use page ownership to aid debugging, so it's
149 * OK if the owner field isn't exact. In the 32-bit world two thread ids
150 * can map to the same owner because we just 'or' in 0x80000000 and
151 * then clear the second highest bit, so that (for example) 0x2faced00
152 * and 0xafaced00 both map to 0xafaced00.
153 * In the 64-bit world, p_selock may not be large enough to hold a full
154 * thread pointer. If we ever need precise ownership (e.g. if we implement
155 * priority inheritance for page locks) then p_selock should become a
156 * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2).
158 #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED)
159 #define SE_READER 1
162 * A page that is deleted must be marked as such using the
163 * page_lock_delete() function. The page must be exclusively locked.
164 * The SE_DELETED marker is put in p_selock when this function is called.
165 * SE_DELETED must be distinct from any SE_WRITER value.
167 #define SE_DELETED (1 | INT_MIN)
169 #ifdef VM_STATS
170 uint_t vph_kvp_count;
171 uint_t vph_swapfsvp_count;
172 uint_t vph_other;
174 uint_t page_lock_count;
175 uint_t page_lock_miss;
176 uint_t page_lock_miss_lock;
177 uint_t page_lock_reclaim;
178 uint_t page_lock_bad_reclaim;
179 uint_t page_lock_same_page;
180 uint_t page_lock_upgrade;
181 uint_t page_lock_retired;
182 uint_t page_lock_upgrade_failed;
183 uint_t page_lock_deleted;
185 uint_t page_trylock_locked;
186 uint_t page_trylock_failed;
187 uint_t page_trylock_missed;
189 uint_t page_try_reclaim_upgrade;
190 #endif /* VM_STATS */
193 * Acquire the "shared/exclusive" lock on a page.
195 * Returns 1 on success and locks the page appropriately.
196 * 0 on failure and does not lock the page.
198 * If `lock' is non-NULL, it will be dropped and reacquired in the
199 * failure case. This routine can block, and if it does
200 * it will always return a failure since the page identity [vp, off]
201 * or state may have changed.
205 page_lock(page_t *pp, se_t se, vnode_t *vnode, reclaim_t reclaim)
207 return (page_lock_es(pp, se, vnode, reclaim, 0));
211 * With the addition of reader-writer lock semantics to page_lock_es,
212 * callers wanting an exclusive (writer) lock may prevent shared-lock
213 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
214 * In this case, when an exclusive lock cannot be acquired, p_selock's
215 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
216 * if the page is slated for retirement.
218 * The se and es parameters determine if the lock should be granted
219 * based on the following decision table:
221 * Lock wanted es flags p_selock/SE_EWANTED Action
222 * ----------- -------------- ------------------- ---------
223 * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED
224 * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED
225 * SE_EXCL none any lock/any deny
226 * SE_SHARED n/a [2] shared/0 grant
227 * SE_SHARED n/a [2] unlocked/0 grant
228 * SE_SHARED n/a shared/1 deny
229 * SE_SHARED n/a unlocked/1 deny
230 * SE_SHARED n/a excl/any deny
232 * Notes:
233 * [1] The code grants an exclusive lock to the caller and clears the bit
234 * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
235 * bit's value. This was deemed acceptable as we are not concerned about
236 * exclusive-lock starvation. If this ever becomes an issue, a priority or
237 * fifo mechanism should also be implemented. Meantime, the thread that
238 * set SE_EWANTED should be prepared to catch this condition and reset it
240 * [2] Retired pages may not be locked at any time, regardless of the
241 * dispostion of se, unless the es parameter has SE_RETIRED flag set.
243 * Notes on values of "es":
245 * es & 1: page_lookup_create will attempt page relocation
246 * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
247 * memory thread); this prevents reader-starvation of waiting
248 * writer thread(s) by giving priority to writers over readers.
249 * es & SE_RETIRED: caller wants to lock pages even if they are
250 * retired. Default is to deny the lock if the page is retired.
252 * And yes, we know, the semantics of this function are too complicated.
253 * It's on the list to be cleaned up.
256 page_lock_es(page_t *pp, se_t se, vnode_t *vnode, reclaim_t reclaim, int es)
258 int retval;
259 kmutex_t *pse = PAGE_SE_MUTEX(pp);
260 int upgraded;
261 int reclaim_it;
263 ASSERT(vnode != NULL ? MUTEX_HELD(page_vnode_mutex(vnode)) : 1);
265 VM_STAT_ADD(page_lock_count);
267 upgraded = 0;
268 reclaim_it = 0;
270 mutex_enter(pse);
272 ASSERT(((es & SE_EXCL_WANTED) == 0) ||
273 ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));
275 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
276 mutex_exit(pse);
277 VM_STAT_ADD(page_lock_retired);
278 return (0);
281 if (se == SE_SHARED && es == 1 && pp->p_selock == 0) {
282 se = SE_EXCL;
285 if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) {
287 reclaim_it = 1;
288 if (se == SE_SHARED) {
290 * This is an interesting situation.
292 * Remember that p_free can only change if
293 * p_selock < 0.
294 * p_free does not depend on our holding `pse'.
295 * And, since we hold `pse', p_selock can not change.
296 * So, if p_free changes on us, the page is already
297 * exclusively held, and we would fail to get p_selock
298 * regardless.
300 * We want to avoid getting the share
301 * lock on a free page that needs to be reclaimed.
302 * It is possible that some other thread has the share
303 * lock and has left the free page on the cache list.
304 * pvn_vplist_dirty() does this for brief periods.
305 * If the se_share is currently SE_EXCL, we will fail
306 * to acquire p_selock anyway. Blocking is the
307 * right thing to do.
308 * If we need to reclaim this page, we must get
309 * exclusive access to it, force the upgrade now.
310 * Again, we will fail to acquire p_selock if the
311 * page is not free and block.
313 upgraded = 1;
314 se = SE_EXCL;
315 VM_STAT_ADD(page_lock_upgrade);
319 if (se == SE_EXCL) {
320 if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) {
322 * if the caller wants a writer lock (but did not
323 * specify exclusive access), and there is a pending
324 * writer that wants exclusive access, return failure
326 retval = 0;
327 } else if ((pp->p_selock & ~SE_EWANTED) == 0) {
328 /* no reader/writer lock held */
329 THREAD_KPRI_REQUEST();
330 /* this clears our setting of the SE_EWANTED bit */
331 pp->p_selock = SE_WRITER;
332 retval = 1;
333 } else {
334 /* page is locked */
335 if (es & SE_EXCL_WANTED) {
336 /* set the SE_EWANTED bit */
337 pp->p_selock |= SE_EWANTED;
339 retval = 0;
341 } else {
342 retval = 0;
343 if (pp->p_selock >= 0) {
344 if ((pp->p_selock & SE_EWANTED) == 0) {
345 pp->p_selock += SE_READER;
346 retval = 1;
351 if (retval == 0) {
352 if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) {
353 VM_STAT_ADD(page_lock_deleted);
354 mutex_exit(pse);
355 return (retval);
358 VM_STAT_ADD(page_lock_miss);
359 VM_STAT_COND_ADD(upgraded, page_lock_upgrade_failed);
361 if (vnode != NULL) {
362 VM_STAT_ADD(page_lock_miss_lock);
363 mutex_exit(page_vnode_mutex(vnode));
367 * Now, wait for the page to be unlocked and
368 * release the lock protecting p_cv and p_selock.
370 cv_wait(&pp->p_cv, pse);
371 mutex_exit(pse);
374 * The page identity may have changed while we were
375 * blocked. If we are willing to depend on "pp"
376 * still pointing to a valid page structure (i.e.,
377 * assuming page structures are not dynamically allocated
378 * or freed), we could try to lock the page if its
379 * identity hasn't changed.
381 * This needs to be measured, since we come back from
382 * cv_wait holding pse (the expensive part of this
383 * operation) we might as well try the cheap part.
384 * Though we would also have to confirm that dropping
385 * vnode page lock did not cause any grief to the callers.
387 if (vnode != NULL)
388 mutex_enter(page_vnode_mutex(vnode));
389 } else {
391 * We have the page lock.
392 * If we needed to reclaim the page, and the page
393 * needed reclaiming (ie, it was free), then we
394 * have the page exclusively locked. We may need
395 * to downgrade the page.
397 ASSERT((upgraded) ?
398 ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1);
399 mutex_exit(pse);
402 * We now hold this page's lock, either shared or
403 * exclusive. This will prevent its identity from changing.
404 * The page, however, may or may not be free. If the caller
405 * requested, and it is free, go reclaim it from the
406 * free list. If the page can't be reclaimed, return failure
407 * so that the caller can start all over again.
409 * NOTE:page_reclaim() releases the page lock (p_selock)
410 * if it can't be reclaimed.
412 if (reclaim_it) {
413 if (!page_reclaim(pp, vnode)) {
414 VM_STAT_ADD(page_lock_bad_reclaim);
415 retval = 0;
416 } else {
417 VM_STAT_ADD(page_lock_reclaim);
418 if (upgraded) {
419 page_downgrade(pp);
424 return (retval);
428 * Clear the SE_EWANTED bit from p_selock. This function allows
429 * callers of page_lock_es and page_try_reclaim_lock to clear
430 * their setting of this bit if they decide they no longer wish
431 * to gain exclusive access to the page. Currently only
432 * delete_memory_thread uses this when the delete memory
433 * operation is cancelled.
435 void
436 page_lock_clr_exclwanted(page_t *pp)
438 kmutex_t *pse = PAGE_SE_MUTEX(pp);
440 mutex_enter(pse);
441 pp->p_selock &= ~SE_EWANTED;
442 if (CV_HAS_WAITERS(&pp->p_cv))
443 cv_broadcast(&pp->p_cv);
444 mutex_exit(pse);
448 * Read the comments inside of page_lock_es() carefully.
450 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the
451 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained.
452 * This is used by threads subject to reader-starvation (eg. memory delete).
454 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock,
455 * it is expected that it will retry at a later time. Threads that will
456 * not retry the lock *must* call page_lock_clr_exclwanted to clear the
457 * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock,
458 * the bit is cleared.)
461 page_try_reclaim_lock(page_t *pp, se_t se, int es)
463 kmutex_t *pse = PAGE_SE_MUTEX(pp);
464 selock_t old;
466 mutex_enter(pse);
468 old = pp->p_selock;
470 ASSERT(((es & SE_EXCL_WANTED) == 0) ||
471 ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));
473 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
474 mutex_exit(pse);
475 VM_STAT_ADD(page_trylock_failed);
476 return (0);
479 if (se == SE_SHARED && es == 1 && old == 0) {
480 se = SE_EXCL;
483 if (se == SE_SHARED) {
484 if (!PP_ISFREE(pp)) {
485 if (old >= 0) {
487 * Readers are not allowed when excl wanted
489 if ((old & SE_EWANTED) == 0) {
490 pp->p_selock = old + SE_READER;
491 mutex_exit(pse);
492 return (1);
495 mutex_exit(pse);
496 return (0);
499 * The page is free, so we really want SE_EXCL (below)
501 VM_STAT_ADD(page_try_reclaim_upgrade);
505 * The caller wants a writer lock. We try for it only if
506 * SE_EWANTED is not set, or if the caller specified
507 * SE_EXCL_WANTED.
509 if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) {
510 if ((old & ~SE_EWANTED) == 0) {
511 /* no reader/writer lock held */
512 THREAD_KPRI_REQUEST();
513 /* this clears out our setting of the SE_EWANTED bit */
514 pp->p_selock = SE_WRITER;
515 mutex_exit(pse);
516 return (1);
519 if (es & SE_EXCL_WANTED) {
520 /* page is locked, set the SE_EWANTED bit */
521 pp->p_selock |= SE_EWANTED;
523 mutex_exit(pse);
524 return (0);
528 * Acquire a page's "shared/exclusive" lock, but never block.
529 * Returns 1 on success, 0 on failure.
532 page_trylock(page_t *pp, se_t se)
534 kmutex_t *pse = PAGE_SE_MUTEX(pp);
536 mutex_enter(pse);
537 if (pp->p_selock & SE_EWANTED || PP_RETIRED(pp) ||
538 (se == SE_SHARED && PP_PR_NOSHARE(pp))) {
540 * Fail if a thread wants exclusive access and page is
541 * retired, if the page is slated for retirement, or a
542 * share lock is requested.
544 mutex_exit(pse);
545 VM_STAT_ADD(page_trylock_failed);
546 return (0);
549 if (se == SE_EXCL) {
550 if (pp->p_selock == 0) {
551 THREAD_KPRI_REQUEST();
552 pp->p_selock = SE_WRITER;
553 mutex_exit(pse);
554 return (1);
556 } else {
557 if (pp->p_selock >= 0) {
558 pp->p_selock += SE_READER;
559 mutex_exit(pse);
560 return (1);
563 mutex_exit(pse);
564 return (0);
568 * Variant of page_unlock() specifically for the page freelist
569 * code. The mere existence of this code is a vile hack that
570 * has resulted due to the backwards locking order of the page
571 * freelist manager; please don't call it.
573 void
574 page_unlock_nocapture(page_t *pp)
576 kmutex_t *pse = PAGE_SE_MUTEX(pp);
577 selock_t old;
579 mutex_enter(pse);
581 old = pp->p_selock;
582 if ((old & ~SE_EWANTED) == SE_READER) {
583 pp->p_selock = old & ~SE_READER;
584 if (CV_HAS_WAITERS(&pp->p_cv))
585 cv_broadcast(&pp->p_cv);
586 } else if ((old & ~SE_EWANTED) == SE_DELETED) {
587 panic("page_unlock_nocapture: page %p is deleted", (void *)pp);
588 } else if (old < 0) {
589 THREAD_KPRI_RELEASE();
590 pp->p_selock &= SE_EWANTED;
591 if (CV_HAS_WAITERS(&pp->p_cv))
592 cv_broadcast(&pp->p_cv);
593 } else if ((old & ~SE_EWANTED) > SE_READER) {
594 pp->p_selock = old - SE_READER;
595 } else {
596 panic("page_unlock_nocapture: page %p is not locked",
597 (void *)pp);
600 mutex_exit(pse);
604 * Release the page's "shared/exclusive" lock and wake up anyone
605 * who might be waiting for it.
607 void
608 page_unlock(page_t *pp)
610 kmutex_t *pse = PAGE_SE_MUTEX(pp);
611 selock_t old;
613 mutex_enter(pse);
615 old = pp->p_selock;
616 if ((old & ~SE_EWANTED) == SE_READER) {
617 pp->p_selock = old & ~SE_READER;
618 if (CV_HAS_WAITERS(&pp->p_cv))
619 cv_broadcast(&pp->p_cv);
620 } else if ((old & ~SE_EWANTED) == SE_DELETED) {
621 panic("page_unlock: page %p is deleted", (void *)pp);
622 } else if (old < 0) {
623 THREAD_KPRI_RELEASE();
624 pp->p_selock &= SE_EWANTED;
625 if (CV_HAS_WAITERS(&pp->p_cv))
626 cv_broadcast(&pp->p_cv);
627 } else if ((old & ~SE_EWANTED) > SE_READER) {
628 pp->p_selock = old - SE_READER;
629 } else {
630 panic("page_unlock: page %p is not locked", (void *)pp);
633 if (pp->p_selock == 0) {
635 * If the T_CAPTURING bit is set, that means that we should
636 * not try and capture the page again as we could recurse
637 * which could lead to a stack overflow panic or spending a
638 * relatively long time in the kernel making no progress.
640 if ((pp->p_toxic & PR_CAPTURE) &&
641 !(curthread->t_flag & T_CAPTURING) &&
642 !PP_RETIRED(pp)) {
643 THREAD_KPRI_REQUEST();
644 pp->p_selock = SE_WRITER;
645 mutex_exit(pse);
646 page_unlock_capture(pp);
647 } else {
648 mutex_exit(pse);
650 } else {
651 mutex_exit(pse);
656 * Try to upgrade the lock on the page from a "shared" to an
657 * "exclusive" lock. Since this upgrade operation is done while
658 * holding the mutex protecting this page, no one else can acquire this page's
659 * lock and change the page. Thus, it is safe to drop the "shared"
660 * lock and attempt to acquire the "exclusive" lock.
662 * Returns 1 on success, 0 on failure.
665 page_tryupgrade(page_t *pp)
667 kmutex_t *pse = PAGE_SE_MUTEX(pp);
669 mutex_enter(pse);
670 if (!(pp->p_selock & SE_EWANTED)) {
671 /* no threads want exclusive access, try upgrade */
672 if (pp->p_selock == SE_READER) {
673 THREAD_KPRI_REQUEST();
674 /* convert to exclusive lock */
675 pp->p_selock = SE_WRITER;
676 mutex_exit(pse);
677 return (1);
680 mutex_exit(pse);
681 return (0);
685 * Downgrade the "exclusive" lock on the page to a "shared" lock
686 * while holding the mutex protecting this page's p_selock field.
688 void
689 page_downgrade(page_t *pp)
691 kmutex_t *pse = PAGE_SE_MUTEX(pp);
692 int excl_waiting;
694 ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED);
695 ASSERT(PAGE_EXCL(pp));
697 mutex_enter(pse);
698 excl_waiting = pp->p_selock & SE_EWANTED;
699 THREAD_KPRI_RELEASE();
700 pp->p_selock = SE_READER | excl_waiting;
701 if (CV_HAS_WAITERS(&pp->p_cv))
702 cv_broadcast(&pp->p_cv);
703 mutex_exit(pse);
706 void
707 page_lock_delete(page_t *pp)
709 kmutex_t *pse = PAGE_SE_MUTEX(pp);
711 ASSERT(PAGE_EXCL(pp));
712 ASSERT(pp->p_vnode == NULL);
713 ASSERT(pp->p_offset == (uoff_t)-1);
714 ASSERT(!PP_ISFREE(pp));
716 mutex_enter(pse);
717 THREAD_KPRI_RELEASE();
718 pp->p_selock = SE_DELETED;
719 if (CV_HAS_WAITERS(&pp->p_cv))
720 cv_broadcast(&pp->p_cv);
721 mutex_exit(pse);
725 page_deleted(page_t *pp)
727 return (pp->p_selock == SE_DELETED);
731 * Implement the io lock for pages
733 void
734 page_iolock_init(page_t *pp)
736 pp->p_iolock_state = 0;
737 cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL);
741 * Acquire the i/o lock on a page.
743 void
744 page_io_lock(page_t *pp)
746 kmutex_t *pio;
748 pio = PAGE_IO_MUTEX(pp);
749 mutex_enter(pio);
750 while (pp->p_iolock_state & PAGE_IO_INUSE) {
751 cv_wait(&(pp->p_io_cv), pio);
753 pp->p_iolock_state |= PAGE_IO_INUSE;
754 mutex_exit(pio);
758 * Release the i/o lock on a page.
760 void
761 page_io_unlock(page_t *pp)
763 kmutex_t *pio;
765 pio = PAGE_IO_MUTEX(pp);
766 mutex_enter(pio);
767 cv_broadcast(&pp->p_io_cv);
768 pp->p_iolock_state &= ~PAGE_IO_INUSE;
769 mutex_exit(pio);
773 * Try to acquire the i/o lock on a page without blocking.
774 * Returns 1 on success, 0 on failure.
777 page_io_trylock(page_t *pp)
779 kmutex_t *pio;
781 if (pp->p_iolock_state & PAGE_IO_INUSE)
782 return (0);
784 pio = PAGE_IO_MUTEX(pp);
785 mutex_enter(pio);
787 if (pp->p_iolock_state & PAGE_IO_INUSE) {
788 mutex_exit(pio);
789 return (0);
791 pp->p_iolock_state |= PAGE_IO_INUSE;
792 mutex_exit(pio);
794 return (1);
798 * Wait until the i/o lock is not held.
800 void
801 page_io_wait(page_t *pp)
803 kmutex_t *pio;
805 pio = PAGE_IO_MUTEX(pp);
806 mutex_enter(pio);
807 while (pp->p_iolock_state & PAGE_IO_INUSE) {
808 cv_wait(&(pp->p_io_cv), pio);
810 mutex_exit(pio);
814 * Returns 1 on success, 0 on failure.
817 page_io_locked(page_t *pp)
819 return (pp->p_iolock_state & PAGE_IO_INUSE);
823 * Assert that the i/o lock on a page is held.
824 * Returns 1 on success, 0 on failure.
827 page_iolock_assert(page_t *pp)
829 return (page_io_locked(pp));
832 kmutex_t *
833 page_vnode_mutex(vnode_t *vp)
835 return (&vp->v_pagecache_lock);
838 kmutex_t *
839 page_se_mutex(page_t *pp)
841 return (PAGE_SE_MUTEX(pp));
844 #ifdef VM_STATS
845 uint_t pszclck_stat[4];
846 #endif
848 * Find, take and return a mutex held by hat_page_demote().
849 * Called by page_demote_vp_pages() before hat_page_demote() call and by
850 * routines that want to block hat_page_demote() but can't do it
851 * via locking all constituent pages.
853 * Return NULL if p_szc is 0.
855 * It should only be used for pages that can be demoted by hat_page_demote()
856 * i.e. non swapfs file system pages. The logic here is lifted from
857 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase
858 * since the page is locked and not free.
860 * Hash of the root page is used to find the lock.
861 * To find the root in the presense of hat_page_demote() chageing the location
862 * of the root this routine relies on the fact that hat_page_demote() changes
863 * root last.
865 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is
866 * returned pp's p_szc may be any value.
868 kmutex_t *
869 page_szc_lock(page_t *pp)
871 kmutex_t *mtx;
872 page_t *rootpp;
873 uint_t szc;
874 uint_t rszc;
875 uint_t pszc = pp->p_szc;
877 ASSERT(pp != NULL);
878 ASSERT(PAGE_LOCKED(pp));
879 ASSERT(!PP_ISFREE(pp));
880 ASSERT(pp->p_vnode != NULL);
881 ASSERT(!IS_SWAPFSVP(pp->p_vnode));
882 ASSERT(!PP_ISKAS(pp));
884 again:
885 if (pszc == 0) {
886 VM_STAT_ADD(pszclck_stat[0]);
887 return (NULL);
890 /* The lock lives in the root page */
892 rootpp = PP_GROUPLEADER(pp, pszc);
893 mtx = PAGE_SZC_MUTEX(rootpp);
894 mutex_enter(mtx);
897 * since p_szc can only decrease if pp == rootpp
898 * rootpp will be always the same i.e we have the right root
899 * regardless of rootpp->p_szc.
900 * If location of pp's root didn't change after we took
901 * the lock we have the right root. return mutex hashed off it.
903 if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) {
904 VM_STAT_ADD(pszclck_stat[1]);
905 return (mtx);
909 * root location changed because page got demoted.
910 * locate the new root.
912 if (rszc < pszc) {
913 szc = pp->p_szc;
914 ASSERT(szc < pszc);
915 mutex_exit(mtx);
916 pszc = szc;
917 VM_STAT_ADD(pszclck_stat[2]);
918 goto again;
921 VM_STAT_ADD(pszclck_stat[3]);
923 * current hat_page_demote not done yet.
924 * wait for it to finish.
926 mutex_exit(mtx);
927 rootpp = PP_GROUPLEADER(rootpp, rszc);
928 mtx = PAGE_SZC_MUTEX(rootpp);
929 mutex_enter(mtx);
930 mutex_exit(mtx);
931 ASSERT(rootpp->p_szc < rszc);
932 goto again;
936 page_szc_lock_assert(page_t *pp)
938 page_t *rootpp = PP_PAGEROOT(pp);
939 kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp);
941 return (MUTEX_HELD(mtx));
945 * memseg locking
947 static krwlock_t memsegslock;
950 * memlist (phys_install, phys_avail) locking.
952 static krwlock_t memlists_lock;
955 memsegs_trylock(int writer)
957 return (rw_tryenter(&memsegslock, writer ? RW_WRITER : RW_READER));
960 void
961 memsegs_lock(int writer)
963 rw_enter(&memsegslock, writer ? RW_WRITER : RW_READER);
966 /*ARGSUSED*/
967 void
968 memsegs_unlock(int writer)
970 rw_exit(&memsegslock);
974 memsegs_lock_held(void)
976 return (RW_LOCK_HELD(&memsegslock));
979 void
980 memlist_read_lock(void)
982 rw_enter(&memlists_lock, RW_READER);
985 void
986 memlist_read_unlock(void)
988 rw_exit(&memlists_lock);
991 void
992 memlist_write_lock(void)
994 rw_enter(&memlists_lock, RW_WRITER);
997 void
998 memlist_write_unlock(void)
1000 rw_exit(&memlists_lock);