4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
27 * VM - page locking primitives
29 #include <sys/param.h>
30 #include <sys/t_lock.h>
31 #include <sys/vtrace.h>
32 #include <sys/debug.h>
33 #include <sys/cmn_err.h>
34 #include <sys/bitmap.h>
35 #include <sys/lockstat.h>
36 #include <sys/sysmacros.h>
37 #include <sys/condvar_impl.h>
39 #include <vm/seg_enum.h>
40 #include <vm/vm_dep.h>
41 #include <vm/seg_kmem.h>
44 * This global mutex array is for logical page locking.
45 * The following fields in the page structure are protected
51 pad_mutex_t page_llocks
[8 * NCPU_P2
];
54 * This is a global lock for the logical page free list. The
55 * logical free list, in this implementation, is maintained as two
56 * separate physical lists - the cache list and the free list.
58 kmutex_t page_freelock
;
61 * The hash table, page_hash[], the p_selock fields, and the
62 * list of pages associated with vnodes are protected by arrays of mutexes.
64 * Unless the hashes are changed radically, the table sizes must be
65 * a power of two. Also, we typically need more mutexes for the
66 * vnodes since these locks are occasionally held for long periods.
67 * And since there seem to be two special vnodes (kvp and swapvp),
68 * we make room for private mutexes for them.
70 * The pse_mutex[] array holds the mutexes to protect the p_selock
71 * fields of all page_t structures.
73 * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex
74 * when given a pointer to a page_t.
76 * PIO_TABLE_SIZE must be a power of two. One could argue that we
77 * should go to the trouble of setting it up at run time and base it
78 * on memory size rather than the number of compile time CPUs.
80 * XX64 We should be using physmem size to calculate PIO_SHIFT.
82 * These might break in 64 bit world.
84 #define PIO_SHIFT 7 /* log2(sizeof(page_t)) */
85 #define PIO_TABLE_SIZE 128 /* number of io mutexes to have */
87 kmutex_t pio_mutex
[PIO_TABLE_SIZE
];
89 #define PAGE_IO_MUTEX(pp) \
90 &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)]
93 * The pse_mutex[] array is allocated in the platform startup code
94 * based on the size of the machine at startup.
96 extern pad_mutex_t
*pse_mutex
; /* Locks protecting pp->p_selock */
97 extern size_t pse_table_size
; /* Number of mutexes in pse_mutex[] */
98 extern int pse_shift
; /* log2(pse_table_size) */
99 #define PAGE_SE_MUTEX(pp) &pse_mutex[ \
100 ((((uintptr_t)(pp) >> pse_shift) ^ ((uintptr_t)(pp))) >> 7) & \
101 (pse_table_size - 1)].pad_mutex
103 #define PSZC_MTX_TABLE_SIZE 128
104 #define PSZC_MTX_TABLE_SHIFT 7
106 static pad_mutex_t pszc_mutex
[PSZC_MTX_TABLE_SIZE
];
108 #define PAGE_SZC_MUTEX(_pp) \
109 &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \
110 ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \
111 ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \
112 (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex
115 * Initialize the locks used by the Virtual Memory Management system.
123 * Return a value for pse_shift based on npg (the number of physical pages)
124 * and ncpu (the maximum number of CPUs). This is called by platform startup
127 * Lockstat data from TPC-H runs showed that contention on the pse_mutex[]
128 * locks grew approximately as the square of the number of threads executing.
129 * So the primary scaling factor used is NCPU^2. The size of the machine in
130 * megabytes is used as an upper bound, particularly for sun4v machines which
131 * all claim to have 256 CPUs maximum, and the old value of PSE_TABLE_SIZE
132 * (128) is used as a minimum. Since the size of the table has to be a power
133 * of two, the calculated size is rounded up to the next power of two.
137 size_pse_array(pgcnt_t npg
, int ncpu
)
140 pgcnt_t pp_per_mb
= (1024 * 1024) / PAGESIZE
;
142 size
= MAX(128, MIN(npg
/ pp_per_mb
, 2 * ncpu
* ncpu
));
143 size
+= (1 << (highbit(size
) - 1)) - 1;
144 return (highbit(size
) - 1);
148 * At present we only use page ownership to aid debugging, so it's
149 * OK if the owner field isn't exact. In the 32-bit world two thread ids
150 * can map to the same owner because we just 'or' in 0x80000000 and
151 * then clear the second highest bit, so that (for example) 0x2faced00
152 * and 0xafaced00 both map to 0xafaced00.
153 * In the 64-bit world, p_selock may not be large enough to hold a full
154 * thread pointer. If we ever need precise ownership (e.g. if we implement
155 * priority inheritance for page locks) then p_selock should become a
156 * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2).
158 #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED)
162 * A page that is deleted must be marked as such using the
163 * page_lock_delete() function. The page must be exclusively locked.
164 * The SE_DELETED marker is put in p_selock when this function is called.
165 * SE_DELETED must be distinct from any SE_WRITER value.
167 #define SE_DELETED (1 | INT_MIN)
170 uint_t vph_kvp_count
;
171 uint_t vph_swapfsvp_count
;
174 uint_t page_lock_count
;
175 uint_t page_lock_miss
;
176 uint_t page_lock_miss_lock
;
177 uint_t page_lock_reclaim
;
178 uint_t page_lock_bad_reclaim
;
179 uint_t page_lock_same_page
;
180 uint_t page_lock_upgrade
;
181 uint_t page_lock_retired
;
182 uint_t page_lock_upgrade_failed
;
183 uint_t page_lock_deleted
;
185 uint_t page_trylock_locked
;
186 uint_t page_trylock_failed
;
187 uint_t page_trylock_missed
;
189 uint_t page_try_reclaim_upgrade
;
190 #endif /* VM_STATS */
193 * Acquire the "shared/exclusive" lock on a page.
195 * Returns 1 on success and locks the page appropriately.
196 * 0 on failure and does not lock the page.
198 * If `lock' is non-NULL, it will be dropped and reacquired in the
199 * failure case. This routine can block, and if it does
200 * it will always return a failure since the page identity [vp, off]
201 * or state may have changed.
205 page_lock(struct page
*page
, se_t se
, struct vmobject
*obj
, reclaim_t reclaim
)
207 return (page_lock_es(page
, se
, obj
, reclaim
, 0));
211 * With the addition of reader-writer lock semantics to page_lock_es,
212 * callers wanting an exclusive (writer) lock may prevent shared-lock
213 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
214 * In this case, when an exclusive lock cannot be acquired, p_selock's
215 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
216 * if the page is slated for retirement.
218 * The se and es parameters determine if the lock should be granted
219 * based on the following decision table:
221 * Lock wanted es flags p_selock/SE_EWANTED Action
222 * ----------- -------------- ------------------- ---------
223 * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED
224 * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED
225 * SE_EXCL none any lock/any deny
226 * SE_SHARED n/a [2] shared/0 grant
227 * SE_SHARED n/a [2] unlocked/0 grant
228 * SE_SHARED n/a shared/1 deny
229 * SE_SHARED n/a unlocked/1 deny
230 * SE_SHARED n/a excl/any deny
233 * [1] The code grants an exclusive lock to the caller and clears the bit
234 * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
235 * bit's value. This was deemed acceptable as we are not concerned about
236 * exclusive-lock starvation. If this ever becomes an issue, a priority or
237 * fifo mechanism should also be implemented. Meantime, the thread that
238 * set SE_EWANTED should be prepared to catch this condition and reset it
240 * [2] Retired pages may not be locked at any time, regardless of the
241 * dispostion of se, unless the es parameter has SE_RETIRED flag set.
243 * Notes on values of "es":
245 * es & 1: page_lookup_create will attempt page relocation
246 * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
247 * memory thread); this prevents reader-starvation of waiting
248 * writer thread(s) by giving priority to writers over readers.
249 * es & SE_RETIRED: caller wants to lock pages even if they are
250 * retired. Default is to deny the lock if the page is retired.
252 * And yes, we know, the semantics of this function are too complicated.
253 * It's on the list to be cleaned up.
256 page_lock_es(struct page
*pp
, se_t se
, struct vmobject
*obj
, reclaim_t reclaim
,
260 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
264 ASSERT(obj
!= NULL
? VMOBJECT_LOCKED(obj
) : 1);
266 VM_STAT_ADD(page_lock_count
);
273 ASSERT(((es
& SE_EXCL_WANTED
) == 0) ||
274 ((es
& SE_EXCL_WANTED
) && (se
== SE_EXCL
)));
276 if (PP_RETIRED(pp
) && !(es
& SE_RETIRED
)) {
278 VM_STAT_ADD(page_lock_retired
);
282 if (se
== SE_SHARED
&& es
== 1 && pp
->p_selock
== 0) {
286 if ((reclaim
== P_RECLAIM
) && (PP_ISFREE(pp
))) {
289 if (se
== SE_SHARED
) {
291 * This is an interesting situation.
293 * Remember that p_free can only change if
295 * p_free does not depend on our holding `pse'.
296 * And, since we hold `pse', p_selock can not change.
297 * So, if p_free changes on us, the page is already
298 * exclusively held, and we would fail to get p_selock
301 * We want to avoid getting the share
302 * lock on a free page that needs to be reclaimed.
303 * It is possible that some other thread has the share
304 * lock and has left the free page on the cache list.
305 * pvn_vplist_dirty() does this for brief periods.
306 * If the se_share is currently SE_EXCL, we will fail
307 * to acquire p_selock anyway. Blocking is the
309 * If we need to reclaim this page, we must get
310 * exclusive access to it, force the upgrade now.
311 * Again, we will fail to acquire p_selock if the
312 * page is not free and block.
316 VM_STAT_ADD(page_lock_upgrade
);
321 if (!(es
& SE_EXCL_WANTED
) && (pp
->p_selock
& SE_EWANTED
)) {
323 * if the caller wants a writer lock (but did not
324 * specify exclusive access), and there is a pending
325 * writer that wants exclusive access, return failure
328 } else if ((pp
->p_selock
& ~SE_EWANTED
) == 0) {
329 /* no reader/writer lock held */
330 THREAD_KPRI_REQUEST();
331 /* this clears our setting of the SE_EWANTED bit */
332 pp
->p_selock
= SE_WRITER
;
336 if (es
& SE_EXCL_WANTED
) {
337 /* set the SE_EWANTED bit */
338 pp
->p_selock
|= SE_EWANTED
;
344 if (pp
->p_selock
>= 0) {
345 if ((pp
->p_selock
& SE_EWANTED
) == 0) {
346 pp
->p_selock
+= SE_READER
;
353 if ((pp
->p_selock
& ~SE_EWANTED
) == SE_DELETED
) {
354 VM_STAT_ADD(page_lock_deleted
);
359 VM_STAT_ADD(page_lock_miss
);
360 VM_STAT_COND_ADD(upgraded
, page_lock_upgrade_failed
);
363 VM_STAT_ADD(page_lock_miss_lock
);
364 vmobject_unlock(obj
);
368 * Now, wait for the page to be unlocked and
369 * release the lock protecting p_cv and p_selock.
371 cv_wait(&pp
->p_cv
, pse
);
375 * The page identity may have changed while we were
376 * blocked. If we are willing to depend on "pp"
377 * still pointing to a valid page structure (i.e.,
378 * assuming page structures are not dynamically allocated
379 * or freed), we could try to lock the page if its
380 * identity hasn't changed.
382 * This needs to be measured, since we come back from
383 * cv_wait holding pse (the expensive part of this
384 * operation) we might as well try the cheap part.
385 * Though we would also have to confirm that dropping
386 * vmobject page lock did not cause any grief to the
393 * We have the page lock.
394 * If we needed to reclaim the page, and the page
395 * needed reclaiming (ie, it was free), then we
396 * have the page exclusively locked. We may need
397 * to downgrade the page.
400 ((PP_ISFREE(pp
)) && PAGE_EXCL(pp
)) : 1);
404 * We now hold this page's lock, either shared or
405 * exclusive. This will prevent its identity from changing.
406 * The page, however, may or may not be free. If the caller
407 * requested, and it is free, go reclaim it from the
408 * free list. If the page can't be reclaimed, return failure
409 * so that the caller can start all over again.
411 * NOTE:page_reclaim() releases the page lock (p_selock)
412 * if it can't be reclaimed.
415 if (!page_reclaim(pp
, obj
)) {
416 VM_STAT_ADD(page_lock_bad_reclaim
);
419 VM_STAT_ADD(page_lock_reclaim
);
430 * Clear the SE_EWANTED bit from p_selock. This function allows
431 * callers of page_lock_es and page_try_reclaim_lock to clear
432 * their setting of this bit if they decide they no longer wish
433 * to gain exclusive access to the page. Currently only
434 * delete_memory_thread uses this when the delete memory
435 * operation is cancelled.
438 page_lock_clr_exclwanted(page_t
*pp
)
440 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
443 pp
->p_selock
&= ~SE_EWANTED
;
444 if (CV_HAS_WAITERS(&pp
->p_cv
))
445 cv_broadcast(&pp
->p_cv
);
450 * Read the comments inside of page_lock_es() carefully.
452 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the
453 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained.
454 * This is used by threads subject to reader-starvation (eg. memory delete).
456 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock,
457 * it is expected that it will retry at a later time. Threads that will
458 * not retry the lock *must* call page_lock_clr_exclwanted to clear the
459 * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock,
460 * the bit is cleared.)
463 page_try_reclaim_lock(page_t
*pp
, se_t se
, int es
)
465 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
472 ASSERT(((es
& SE_EXCL_WANTED
) == 0) ||
473 ((es
& SE_EXCL_WANTED
) && (se
== SE_EXCL
)));
475 if (PP_RETIRED(pp
) && !(es
& SE_RETIRED
)) {
477 VM_STAT_ADD(page_trylock_failed
);
481 if (se
== SE_SHARED
&& es
== 1 && old
== 0) {
485 if (se
== SE_SHARED
) {
486 if (!PP_ISFREE(pp
)) {
489 * Readers are not allowed when excl wanted
491 if ((old
& SE_EWANTED
) == 0) {
492 pp
->p_selock
= old
+ SE_READER
;
501 * The page is free, so we really want SE_EXCL (below)
503 VM_STAT_ADD(page_try_reclaim_upgrade
);
507 * The caller wants a writer lock. We try for it only if
508 * SE_EWANTED is not set, or if the caller specified
511 if (!(old
& SE_EWANTED
) || (es
& SE_EXCL_WANTED
)) {
512 if ((old
& ~SE_EWANTED
) == 0) {
513 /* no reader/writer lock held */
514 THREAD_KPRI_REQUEST();
515 /* this clears out our setting of the SE_EWANTED bit */
516 pp
->p_selock
= SE_WRITER
;
521 if (es
& SE_EXCL_WANTED
) {
522 /* page is locked, set the SE_EWANTED bit */
523 pp
->p_selock
|= SE_EWANTED
;
530 * Acquire a page's "shared/exclusive" lock, but never block.
531 * Returns 1 on success, 0 on failure.
534 page_trylock(page_t
*pp
, se_t se
)
536 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
539 if (pp
->p_selock
& SE_EWANTED
|| PP_RETIRED(pp
) ||
540 (se
== SE_SHARED
&& PP_PR_NOSHARE(pp
))) {
542 * Fail if a thread wants exclusive access and page is
543 * retired, if the page is slated for retirement, or a
544 * share lock is requested.
547 VM_STAT_ADD(page_trylock_failed
);
552 if (pp
->p_selock
== 0) {
553 THREAD_KPRI_REQUEST();
554 pp
->p_selock
= SE_WRITER
;
559 if (pp
->p_selock
>= 0) {
560 pp
->p_selock
+= SE_READER
;
570 * Variant of page_unlock() specifically for the page freelist
571 * code. The mere existence of this code is a vile hack that
572 * has resulted due to the backwards locking order of the page
573 * freelist manager; please don't call it.
576 page_unlock_nocapture(page_t
*pp
)
578 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
584 if ((old
& ~SE_EWANTED
) == SE_READER
) {
585 pp
->p_selock
= old
& ~SE_READER
;
586 if (CV_HAS_WAITERS(&pp
->p_cv
))
587 cv_broadcast(&pp
->p_cv
);
588 } else if ((old
& ~SE_EWANTED
) == SE_DELETED
) {
589 panic("page_unlock_nocapture: page %p is deleted", (void *)pp
);
590 } else if (old
< 0) {
591 THREAD_KPRI_RELEASE();
592 pp
->p_selock
&= SE_EWANTED
;
593 if (CV_HAS_WAITERS(&pp
->p_cv
))
594 cv_broadcast(&pp
->p_cv
);
595 } else if ((old
& ~SE_EWANTED
) > SE_READER
) {
596 pp
->p_selock
= old
- SE_READER
;
598 panic("page_unlock_nocapture: page %p is not locked",
606 * Release the page's "shared/exclusive" lock and wake up anyone
607 * who might be waiting for it.
610 page_unlock(page_t
*pp
)
612 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
618 if ((old
& ~SE_EWANTED
) == SE_READER
) {
619 pp
->p_selock
= old
& ~SE_READER
;
620 if (CV_HAS_WAITERS(&pp
->p_cv
))
621 cv_broadcast(&pp
->p_cv
);
622 } else if ((old
& ~SE_EWANTED
) == SE_DELETED
) {
623 panic("page_unlock: page %p is deleted", (void *)pp
);
624 } else if (old
< 0) {
625 THREAD_KPRI_RELEASE();
626 pp
->p_selock
&= SE_EWANTED
;
627 if (CV_HAS_WAITERS(&pp
->p_cv
))
628 cv_broadcast(&pp
->p_cv
);
629 } else if ((old
& ~SE_EWANTED
) > SE_READER
) {
630 pp
->p_selock
= old
- SE_READER
;
632 panic("page_unlock: page %p is not locked", (void *)pp
);
635 if (pp
->p_selock
== 0) {
637 * If the T_CAPTURING bit is set, that means that we should
638 * not try and capture the page again as we could recurse
639 * which could lead to a stack overflow panic or spending a
640 * relatively long time in the kernel making no progress.
642 if ((pp
->p_toxic
& PR_CAPTURE
) &&
643 !(curthread
->t_flag
& T_CAPTURING
) &&
645 THREAD_KPRI_REQUEST();
646 pp
->p_selock
= SE_WRITER
;
648 page_unlock_capture(pp
);
658 * Try to upgrade the lock on the page from a "shared" to an
659 * "exclusive" lock. Since this upgrade operation is done while
660 * holding the mutex protecting this page, no one else can acquire this page's
661 * lock and change the page. Thus, it is safe to drop the "shared"
662 * lock and attempt to acquire the "exclusive" lock.
664 * Returns 1 on success, 0 on failure.
667 page_tryupgrade(page_t
*pp
)
669 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
672 if (!(pp
->p_selock
& SE_EWANTED
)) {
673 /* no threads want exclusive access, try upgrade */
674 if (pp
->p_selock
== SE_READER
) {
675 THREAD_KPRI_REQUEST();
676 /* convert to exclusive lock */
677 pp
->p_selock
= SE_WRITER
;
687 * Downgrade the "exclusive" lock on the page to a "shared" lock
688 * while holding the mutex protecting this page's p_selock field.
691 page_downgrade(page_t
*pp
)
693 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
696 ASSERT((pp
->p_selock
& ~SE_EWANTED
) != SE_DELETED
);
697 ASSERT(PAGE_EXCL(pp
));
700 excl_waiting
= pp
->p_selock
& SE_EWANTED
;
701 THREAD_KPRI_RELEASE();
702 pp
->p_selock
= SE_READER
| excl_waiting
;
703 if (CV_HAS_WAITERS(&pp
->p_cv
))
704 cv_broadcast(&pp
->p_cv
);
709 page_lock_delete(page_t
*pp
)
711 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
713 ASSERT(PAGE_EXCL(pp
));
714 VERIFY(pp
->p_object
== NULL
);
715 ASSERT(pp
->p_vnode
== NULL
);
716 ASSERT(pp
->p_offset
== (uoff_t
)-1);
717 ASSERT(!PP_ISFREE(pp
));
720 THREAD_KPRI_RELEASE();
721 pp
->p_selock
= SE_DELETED
;
722 if (CV_HAS_WAITERS(&pp
->p_cv
))
723 cv_broadcast(&pp
->p_cv
);
728 page_deleted(page_t
*pp
)
730 return (pp
->p_selock
== SE_DELETED
);
734 * Implement the io lock for pages
737 page_iolock_init(page_t
*pp
)
739 pp
->p_iolock_state
= 0;
740 cv_init(&pp
->p_io_cv
, NULL
, CV_DEFAULT
, NULL
);
744 * Acquire the i/o lock on a page.
747 page_io_lock(page_t
*pp
)
751 pio
= PAGE_IO_MUTEX(pp
);
753 while (pp
->p_iolock_state
& PAGE_IO_INUSE
) {
754 cv_wait(&(pp
->p_io_cv
), pio
);
756 pp
->p_iolock_state
|= PAGE_IO_INUSE
;
761 * Release the i/o lock on a page.
764 page_io_unlock(page_t
*pp
)
768 pio
= PAGE_IO_MUTEX(pp
);
770 cv_broadcast(&pp
->p_io_cv
);
771 pp
->p_iolock_state
&= ~PAGE_IO_INUSE
;
776 * Try to acquire the i/o lock on a page without blocking.
777 * Returns 1 on success, 0 on failure.
780 page_io_trylock(page_t
*pp
)
784 if (pp
->p_iolock_state
& PAGE_IO_INUSE
)
787 pio
= PAGE_IO_MUTEX(pp
);
790 if (pp
->p_iolock_state
& PAGE_IO_INUSE
) {
794 pp
->p_iolock_state
|= PAGE_IO_INUSE
;
801 * Wait until the i/o lock is not held.
804 page_io_wait(page_t
*pp
)
808 pio
= PAGE_IO_MUTEX(pp
);
810 while (pp
->p_iolock_state
& PAGE_IO_INUSE
) {
811 cv_wait(&(pp
->p_io_cv
), pio
);
817 * Returns 1 on success, 0 on failure.
820 page_io_locked(page_t
*pp
)
822 return (pp
->p_iolock_state
& PAGE_IO_INUSE
);
826 * Assert that the i/o lock on a page is held.
827 * Returns 1 on success, 0 on failure.
830 page_iolock_assert(page_t
*pp
)
832 return (page_io_locked(pp
));
836 page_se_mutex(page_t
*pp
)
838 return (PAGE_SE_MUTEX(pp
));
842 uint_t pszclck_stat
[4];
845 * Find, take and return a mutex held by hat_page_demote().
846 * Called by page_demote_vp_pages() before hat_page_demote() call and by
847 * routines that want to block hat_page_demote() but can't do it
848 * via locking all constituent pages.
850 * Return NULL if p_szc is 0.
852 * It should only be used for pages that can be demoted by hat_page_demote()
853 * i.e. non swapfs file system pages. The logic here is lifted from
854 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase
855 * since the page is locked and not free.
857 * Hash of the root page is used to find the lock.
858 * To find the root in the presense of hat_page_demote() chageing the location
859 * of the root this routine relies on the fact that hat_page_demote() changes
862 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is
863 * returned pp's p_szc may be any value.
866 page_szc_lock(page_t
*pp
)
872 uint_t pszc
= pp
->p_szc
;
875 ASSERT(PAGE_LOCKED(pp
));
876 ASSERT(!PP_ISFREE(pp
));
877 VERIFY(pp
->p_object
!= NULL
);
878 ASSERT(pp
->p_vnode
!= NULL
);
879 ASSERT(!IS_SWAPFSVP(pp
->p_vnode
));
880 ASSERT(!PP_ISKAS(pp
));
884 VM_STAT_ADD(pszclck_stat
[0]);
888 /* The lock lives in the root page */
890 rootpp
= PP_GROUPLEADER(pp
, pszc
);
891 mtx
= PAGE_SZC_MUTEX(rootpp
);
895 * since p_szc can only decrease if pp == rootpp
896 * rootpp will be always the same i.e we have the right root
897 * regardless of rootpp->p_szc.
898 * If location of pp's root didn't change after we took
899 * the lock we have the right root. return mutex hashed off it.
901 if (pp
== rootpp
|| (rszc
= rootpp
->p_szc
) == pszc
) {
902 VM_STAT_ADD(pszclck_stat
[1]);
907 * root location changed because page got demoted.
908 * locate the new root.
915 VM_STAT_ADD(pszclck_stat
[2]);
919 VM_STAT_ADD(pszclck_stat
[3]);
921 * current hat_page_demote not done yet.
922 * wait for it to finish.
925 rootpp
= PP_GROUPLEADER(rootpp
, rszc
);
926 mtx
= PAGE_SZC_MUTEX(rootpp
);
929 ASSERT(rootpp
->p_szc
< rszc
);
934 page_szc_lock_assert(page_t
*pp
)
936 page_t
*rootpp
= PP_PAGEROOT(pp
);
937 kmutex_t
*mtx
= PAGE_SZC_MUTEX(rootpp
);
939 return (MUTEX_HELD(mtx
));
945 static krwlock_t memsegslock
;
948 * memlist (phys_install, phys_avail) locking.
950 static krwlock_t memlists_lock
;
953 memsegs_trylock(int writer
)
955 return (rw_tryenter(&memsegslock
, writer
? RW_WRITER
: RW_READER
));
959 memsegs_lock(int writer
)
961 rw_enter(&memsegslock
, writer
? RW_WRITER
: RW_READER
);
966 memsegs_unlock(int writer
)
968 rw_exit(&memsegslock
);
972 memsegs_lock_held(void)
974 return (RW_LOCK_HELD(&memsegslock
));
978 memlist_read_lock(void)
980 rw_enter(&memlists_lock
, RW_READER
);
984 memlist_read_unlock(void)
986 rw_exit(&memlists_lock
);
990 memlist_write_lock(void)
992 rw_enter(&memlists_lock
, RW_WRITER
);
996 memlist_write_unlock(void)
998 rw_exit(&memlists_lock
);