4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * VM - page locking primitives
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/vtrace.h>
34 #include <sys/debug.h>
35 #include <sys/cmn_err.h>
36 #include <sys/vnode.h>
37 #include <sys/bitmap.h>
38 #include <sys/lockstat.h>
39 #include <sys/sysmacros.h>
40 #include <sys/condvar_impl.h>
42 #include <vm/seg_enum.h>
43 #include <vm/vm_dep.h>
46 * This global mutex is for logical page locking.
47 * The following fields in the page structure are protected
56 * This is a global lock for the logical page free list. The
57 * logical free list, in this implementation, is maintained as two
58 * separate physical lists - the cache list and the free list.
60 kmutex_t page_freelock
;
63 * The hash table, page_hash[], the p_selock fields, and the
64 * list of pages associated with vnodes are protected by arrays of mutexes.
66 * Unless the hashes are changed radically, the table sizes must be
67 * a power of two. Also, we typically need more mutexes for the
68 * vnodes since these locks are occasionally held for long periods.
69 * And since there seem to be two special vnodes (kvp and swapvp),
70 * we make room for private mutexes for them.
72 * The pse_mutex[] array holds the mutexes to protect the p_selock
73 * fields of all page_t structures.
75 * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex
76 * when given a pointer to a page_t.
78 * PIO_TABLE_SIZE must be a power of two. One could argue that we
79 * should go to the trouble of setting it up at run time and base it
80 * on memory size rather than the number of compile time CPUs.
82 * XX64 We should be using physmem size to calculate PIO_SHIFT.
84 * These might break in 64 bit world.
86 #define PIO_SHIFT 7 /* log2(sizeof(page_t)) */
87 #define PIO_TABLE_SIZE 128 /* number of io mutexes to have */
89 pad_mutex_t ph_mutex
[PH_TABLE_SIZE
];
90 kmutex_t pio_mutex
[PIO_TABLE_SIZE
];
92 #define PAGE_IO_MUTEX(pp) \
93 &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)]
96 * The pse_mutex[] array is allocated in the platform startup code
97 * based on the size of the machine at startup.
99 extern pad_mutex_t
*pse_mutex
; /* Locks protecting pp->p_selock */
100 extern size_t pse_table_size
; /* Number of mutexes in pse_mutex[] */
101 extern int pse_shift
; /* log2(pse_table_size) */
102 #define PAGE_SE_MUTEX(pp) &pse_mutex[ \
103 ((((uintptr_t)(pp) >> pse_shift) ^ ((uintptr_t)(pp))) >> 7) & \
104 (pse_table_size - 1)].pad_mutex
106 #define PSZC_MTX_TABLE_SIZE 128
107 #define PSZC_MTX_TABLE_SHIFT 7
109 static pad_mutex_t pszc_mutex
[PSZC_MTX_TABLE_SIZE
];
111 #define PAGE_SZC_MUTEX(_pp) \
112 &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \
113 ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \
114 ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \
115 (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex
118 * The vph_mutex[] array holds the mutexes to protect the vnode chains,
119 * (i.e., the list of pages anchored by v_pages and connected via p_vpprev
122 * The page_vnode_mutex(vp) function returns the address of the appropriate
123 * mutex from this array given a pointer to a vnode. It is complicated
124 * by the fact that the kernel's vnode and the swapfs vnode are referenced
125 * frequently enough to warrent their own mutexes.
127 * The VP_HASH_FUNC returns the index into the vph_mutex array given
128 * an address of a vnode.
132 * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world.
133 * Need to review again.
136 #define VPH_TABLE_SIZE (1 << (VP_SHIFT + 3))
138 #define VPH_TABLE_SIZE (2 << VP_SHIFT)
141 #define VP_HASH_FUNC(vp) \
142 ((((uintptr_t)(vp) >> 6) + \
143 ((uintptr_t)(vp) >> 8) + \
144 ((uintptr_t)(vp) >> 10) + \
145 ((uintptr_t)(vp) >> 12)) \
146 & (VPH_TABLE_SIZE - 1))
148 extern struct vnode kvp
;
151 * Two slots after VPH_TABLE_SIZE are reserved in vph_mutex for kernel vnodes.
152 * The lock for kvp is VPH_TABLE_SIZE + 0, and the lock for zvp is
153 * VPH_TABLE_SIZE + 1.
156 kmutex_t vph_mutex
[VPH_TABLE_SIZE
+ 2];
159 * Initialize the locks used by the Virtual Memory Management system.
167 * Return a value for pse_shift based on npg (the number of physical pages)
168 * and ncpu (the maximum number of CPUs). This is called by platform startup
171 * Lockstat data from TPC-H runs showed that contention on the pse_mutex[]
172 * locks grew approximately as the square of the number of threads executing.
173 * So the primary scaling factor used is NCPU^2. The size of the machine in
174 * megabytes is used as an upper bound, particularly for sun4v machines which
175 * all claim to have 256 CPUs maximum, and the old value of PSE_TABLE_SIZE
176 * (128) is used as a minimum. Since the size of the table has to be a power
177 * of two, the calculated size is rounded up to the next power of two.
181 size_pse_array(pgcnt_t npg
, int ncpu
)
184 pgcnt_t pp_per_mb
= (1024 * 1024) / PAGESIZE
;
186 size
= MAX(128, MIN(npg
/ pp_per_mb
, 2 * ncpu
* ncpu
));
187 size
+= (1 << (highbit(size
) - 1)) - 1;
188 return (highbit(size
) - 1);
192 * At present we only use page ownership to aid debugging, so it's
193 * OK if the owner field isn't exact. In the 32-bit world two thread ids
194 * can map to the same owner because we just 'or' in 0x80000000 and
195 * then clear the second highest bit, so that (for example) 0x2faced00
196 * and 0xafaced00 both map to 0xafaced00.
197 * In the 64-bit world, p_selock may not be large enough to hold a full
198 * thread pointer. If we ever need precise ownership (e.g. if we implement
199 * priority inheritance for page locks) then p_selock should become a
200 * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2).
202 #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED)
206 * A page that is deleted must be marked as such using the
207 * page_lock_delete() function. The page must be exclusively locked.
208 * The SE_DELETED marker is put in p_selock when this function is called.
209 * SE_DELETED must be distinct from any SE_WRITER value.
211 #define SE_DELETED (1 | INT_MIN)
214 uint_t vph_kvp_count
;
215 uint_t vph_swapfsvp_count
;
217 #endif /* VM_STATS */
220 uint_t page_lock_count
;
221 uint_t page_lock_miss
;
222 uint_t page_lock_miss_lock
;
223 uint_t page_lock_reclaim
;
224 uint_t page_lock_bad_reclaim
;
225 uint_t page_lock_same_page
;
226 uint_t page_lock_upgrade
;
227 uint_t page_lock_retired
;
228 uint_t page_lock_upgrade_failed
;
229 uint_t page_lock_deleted
;
231 uint_t page_trylock_locked
;
232 uint_t page_trylock_failed
;
233 uint_t page_trylock_missed
;
235 uint_t page_try_reclaim_upgrade
;
236 #endif /* VM_STATS */
239 * Acquire the "shared/exclusive" lock on a page.
241 * Returns 1 on success and locks the page appropriately.
242 * 0 on failure and does not lock the page.
244 * If `lock' is non-NULL, it will be dropped and reacquired in the
245 * failure case. This routine can block, and if it does
246 * it will always return a failure since the page identity [vp, off]
247 * or state may have changed.
251 page_lock(page_t
*pp
, se_t se
, kmutex_t
*lock
, reclaim_t reclaim
)
253 return (page_lock_es(pp
, se
, lock
, reclaim
, 0));
257 * With the addition of reader-writer lock semantics to page_lock_es,
258 * callers wanting an exclusive (writer) lock may prevent shared-lock
259 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
260 * In this case, when an exclusive lock cannot be acquired, p_selock's
261 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
262 * if the page is slated for retirement.
264 * The se and es parameters determine if the lock should be granted
265 * based on the following decision table:
267 * Lock wanted es flags p_selock/SE_EWANTED Action
268 * ----------- -------------- ------------------- ---------
269 * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED
270 * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED
271 * SE_EXCL none any lock/any deny
272 * SE_SHARED n/a [2] shared/0 grant
273 * SE_SHARED n/a [2] unlocked/0 grant
274 * SE_SHARED n/a shared/1 deny
275 * SE_SHARED n/a unlocked/1 deny
276 * SE_SHARED n/a excl/any deny
279 * [1] The code grants an exclusive lock to the caller and clears the bit
280 * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
281 * bit's value. This was deemed acceptable as we are not concerned about
282 * exclusive-lock starvation. If this ever becomes an issue, a priority or
283 * fifo mechanism should also be implemented. Meantime, the thread that
284 * set SE_EWANTED should be prepared to catch this condition and reset it
286 * [2] Retired pages may not be locked at any time, regardless of the
287 * dispostion of se, unless the es parameter has SE_RETIRED flag set.
289 * Notes on values of "es":
291 * es & 1: page_lookup_create will attempt page relocation
292 * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
293 * memory thread); this prevents reader-starvation of waiting
294 * writer thread(s) by giving priority to writers over readers.
295 * es & SE_RETIRED: caller wants to lock pages even if they are
296 * retired. Default is to deny the lock if the page is retired.
298 * And yes, we know, the semantics of this function are too complicated.
299 * It's on the list to be cleaned up.
302 page_lock_es(page_t
*pp
, se_t se
, kmutex_t
*lock
, reclaim_t reclaim
, int es
)
305 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
309 ASSERT(lock
!= NULL
? MUTEX_HELD(lock
) : 1);
311 VM_STAT_ADD(page_lock_count
);
318 ASSERT(((es
& SE_EXCL_WANTED
) == 0) ||
319 ((es
& SE_EXCL_WANTED
) && (se
== SE_EXCL
)));
321 if (PP_RETIRED(pp
) && !(es
& SE_RETIRED
)) {
323 VM_STAT_ADD(page_lock_retired
);
327 if (se
== SE_SHARED
&& es
== 1 && pp
->p_selock
== 0) {
331 if ((reclaim
== P_RECLAIM
) && (PP_ISFREE(pp
))) {
334 if (se
== SE_SHARED
) {
336 * This is an interesting situation.
338 * Remember that p_free can only change if
340 * p_free does not depend on our holding `pse'.
341 * And, since we hold `pse', p_selock can not change.
342 * So, if p_free changes on us, the page is already
343 * exclusively held, and we would fail to get p_selock
346 * We want to avoid getting the share
347 * lock on a free page that needs to be reclaimed.
348 * It is possible that some other thread has the share
349 * lock and has left the free page on the cache list.
350 * pvn_vplist_dirty() does this for brief periods.
351 * If the se_share is currently SE_EXCL, we will fail
352 * to acquire p_selock anyway. Blocking is the
354 * If we need to reclaim this page, we must get
355 * exclusive access to it, force the upgrade now.
356 * Again, we will fail to acquire p_selock if the
357 * page is not free and block.
361 VM_STAT_ADD(page_lock_upgrade
);
366 if (!(es
& SE_EXCL_WANTED
) && (pp
->p_selock
& SE_EWANTED
)) {
368 * if the caller wants a writer lock (but did not
369 * specify exclusive access), and there is a pending
370 * writer that wants exclusive access, return failure
373 } else if ((pp
->p_selock
& ~SE_EWANTED
) == 0) {
374 /* no reader/writer lock held */
375 THREAD_KPRI_REQUEST();
376 /* this clears our setting of the SE_EWANTED bit */
377 pp
->p_selock
= SE_WRITER
;
381 if (es
& SE_EXCL_WANTED
) {
382 /* set the SE_EWANTED bit */
383 pp
->p_selock
|= SE_EWANTED
;
389 if (pp
->p_selock
>= 0) {
390 if ((pp
->p_selock
& SE_EWANTED
) == 0) {
391 pp
->p_selock
+= SE_READER
;
398 if ((pp
->p_selock
& ~SE_EWANTED
) == SE_DELETED
) {
399 VM_STAT_ADD(page_lock_deleted
);
405 VM_STAT_ADD(page_lock_miss
);
407 VM_STAT_ADD(page_lock_upgrade_failed
);
411 VM_STAT_ADD(page_lock_miss_lock
);
416 * Now, wait for the page to be unlocked and
417 * release the lock protecting p_cv and p_selock.
419 cv_wait(&pp
->p_cv
, pse
);
423 * The page identity may have changed while we were
424 * blocked. If we are willing to depend on "pp"
425 * still pointing to a valid page structure (i.e.,
426 * assuming page structures are not dynamically allocated
427 * or freed), we could try to lock the page if its
428 * identity hasn't changed.
430 * This needs to be measured, since we come back from
431 * cv_wait holding pse (the expensive part of this
432 * operation) we might as well try the cheap part.
433 * Though we would also have to confirm that dropping
434 * `lock' did not cause any grief to the callers.
441 * We have the page lock.
442 * If we needed to reclaim the page, and the page
443 * needed reclaiming (ie, it was free), then we
444 * have the page exclusively locked. We may need
445 * to downgrade the page.
448 ((PP_ISFREE(pp
)) && PAGE_EXCL(pp
)) : 1);
452 * We now hold this page's lock, either shared or
453 * exclusive. This will prevent its identity from changing.
454 * The page, however, may or may not be free. If the caller
455 * requested, and it is free, go reclaim it from the
456 * free list. If the page can't be reclaimed, return failure
457 * so that the caller can start all over again.
459 * NOTE:page_reclaim() releases the page lock (p_selock)
460 * if it can't be reclaimed.
463 if (!page_reclaim(pp
, lock
)) {
464 VM_STAT_ADD(page_lock_bad_reclaim
);
467 VM_STAT_ADD(page_lock_reclaim
);
478 * Clear the SE_EWANTED bit from p_selock. This function allows
479 * callers of page_lock_es and page_try_reclaim_lock to clear
480 * their setting of this bit if they decide they no longer wish
481 * to gain exclusive access to the page. Currently only
482 * delete_memory_thread uses this when the delete memory
483 * operation is cancelled.
486 page_lock_clr_exclwanted(page_t
*pp
)
488 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
491 pp
->p_selock
&= ~SE_EWANTED
;
492 if (CV_HAS_WAITERS(&pp
->p_cv
))
493 cv_broadcast(&pp
->p_cv
);
498 * Read the comments inside of page_lock_es() carefully.
500 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the
501 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained.
502 * This is used by threads subject to reader-starvation (eg. memory delete).
504 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock,
505 * it is expected that it will retry at a later time. Threads that will
506 * not retry the lock *must* call page_lock_clr_exclwanted to clear the
507 * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock,
508 * the bit is cleared.)
511 page_try_reclaim_lock(page_t
*pp
, se_t se
, int es
)
513 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
520 ASSERT(((es
& SE_EXCL_WANTED
) == 0) ||
521 ((es
& SE_EXCL_WANTED
) && (se
== SE_EXCL
)));
523 if (PP_RETIRED(pp
) && !(es
& SE_RETIRED
)) {
525 VM_STAT_ADD(page_trylock_failed
);
529 if (se
== SE_SHARED
&& es
== 1 && old
== 0) {
533 if (se
== SE_SHARED
) {
534 if (!PP_ISFREE(pp
)) {
537 * Readers are not allowed when excl wanted
539 if ((old
& SE_EWANTED
) == 0) {
540 pp
->p_selock
= old
+ SE_READER
;
549 * The page is free, so we really want SE_EXCL (below)
551 VM_STAT_ADD(page_try_reclaim_upgrade
);
555 * The caller wants a writer lock. We try for it only if
556 * SE_EWANTED is not set, or if the caller specified
559 if (!(old
& SE_EWANTED
) || (es
& SE_EXCL_WANTED
)) {
560 if ((old
& ~SE_EWANTED
) == 0) {
561 /* no reader/writer lock held */
562 THREAD_KPRI_REQUEST();
563 /* this clears out our setting of the SE_EWANTED bit */
564 pp
->p_selock
= SE_WRITER
;
569 if (es
& SE_EXCL_WANTED
) {
570 /* page is locked, set the SE_EWANTED bit */
571 pp
->p_selock
|= SE_EWANTED
;
578 * Acquire a page's "shared/exclusive" lock, but never block.
579 * Returns 1 on success, 0 on failure.
582 page_trylock(page_t
*pp
, se_t se
)
584 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
587 if (pp
->p_selock
& SE_EWANTED
|| PP_RETIRED(pp
) ||
588 (se
== SE_SHARED
&& PP_PR_NOSHARE(pp
))) {
590 * Fail if a thread wants exclusive access and page is
591 * retired, if the page is slated for retirement, or a
592 * share lock is requested.
595 VM_STAT_ADD(page_trylock_failed
);
600 if (pp
->p_selock
== 0) {
601 THREAD_KPRI_REQUEST();
602 pp
->p_selock
= SE_WRITER
;
607 if (pp
->p_selock
>= 0) {
608 pp
->p_selock
+= SE_READER
;
618 * Variant of page_unlock() specifically for the page freelist
619 * code. The mere existence of this code is a vile hack that
620 * has resulted due to the backwards locking order of the page
621 * freelist manager; please don't call it.
624 page_unlock_nocapture(page_t
*pp
)
626 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
632 if ((old
& ~SE_EWANTED
) == SE_READER
) {
633 pp
->p_selock
= old
& ~SE_READER
;
634 if (CV_HAS_WAITERS(&pp
->p_cv
))
635 cv_broadcast(&pp
->p_cv
);
636 } else if ((old
& ~SE_EWANTED
) == SE_DELETED
) {
637 panic("page_unlock_nocapture: page %p is deleted", (void *)pp
);
638 } else if (old
< 0) {
639 THREAD_KPRI_RELEASE();
640 pp
->p_selock
&= SE_EWANTED
;
641 if (CV_HAS_WAITERS(&pp
->p_cv
))
642 cv_broadcast(&pp
->p_cv
);
643 } else if ((old
& ~SE_EWANTED
) > SE_READER
) {
644 pp
->p_selock
= old
- SE_READER
;
646 panic("page_unlock_nocapture: page %p is not locked",
654 * Release the page's "shared/exclusive" lock and wake up anyone
655 * who might be waiting for it.
658 page_unlock(page_t
*pp
)
660 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
666 if ((old
& ~SE_EWANTED
) == SE_READER
) {
667 pp
->p_selock
= old
& ~SE_READER
;
668 if (CV_HAS_WAITERS(&pp
->p_cv
))
669 cv_broadcast(&pp
->p_cv
);
670 } else if ((old
& ~SE_EWANTED
) == SE_DELETED
) {
671 panic("page_unlock: page %p is deleted", (void *)pp
);
672 } else if (old
< 0) {
673 THREAD_KPRI_RELEASE();
674 pp
->p_selock
&= SE_EWANTED
;
675 if (CV_HAS_WAITERS(&pp
->p_cv
))
676 cv_broadcast(&pp
->p_cv
);
677 } else if ((old
& ~SE_EWANTED
) > SE_READER
) {
678 pp
->p_selock
= old
- SE_READER
;
680 panic("page_unlock: page %p is not locked", (void *)pp
);
683 if (pp
->p_selock
== 0) {
685 * If the T_CAPTURING bit is set, that means that we should
686 * not try and capture the page again as we could recurse
687 * which could lead to a stack overflow panic or spending a
688 * relatively long time in the kernel making no progress.
690 if ((pp
->p_toxic
& PR_CAPTURE
) &&
691 !(curthread
->t_flag
& T_CAPTURING
) &&
693 THREAD_KPRI_REQUEST();
694 pp
->p_selock
= SE_WRITER
;
696 page_unlock_capture(pp
);
706 * Try to upgrade the lock on the page from a "shared" to an
707 * "exclusive" lock. Since this upgrade operation is done while
708 * holding the mutex protecting this page, no one else can acquire this page's
709 * lock and change the page. Thus, it is safe to drop the "shared"
710 * lock and attempt to acquire the "exclusive" lock.
712 * Returns 1 on success, 0 on failure.
715 page_tryupgrade(page_t
*pp
)
717 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
720 if (!(pp
->p_selock
& SE_EWANTED
)) {
721 /* no threads want exclusive access, try upgrade */
722 if (pp
->p_selock
== SE_READER
) {
723 THREAD_KPRI_REQUEST();
724 /* convert to exclusive lock */
725 pp
->p_selock
= SE_WRITER
;
735 * Downgrade the "exclusive" lock on the page to a "shared" lock
736 * while holding the mutex protecting this page's p_selock field.
739 page_downgrade(page_t
*pp
)
741 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
744 ASSERT((pp
->p_selock
& ~SE_EWANTED
) != SE_DELETED
);
745 ASSERT(PAGE_EXCL(pp
));
748 excl_waiting
= pp
->p_selock
& SE_EWANTED
;
749 THREAD_KPRI_RELEASE();
750 pp
->p_selock
= SE_READER
| excl_waiting
;
751 if (CV_HAS_WAITERS(&pp
->p_cv
))
752 cv_broadcast(&pp
->p_cv
);
757 page_lock_delete(page_t
*pp
)
759 kmutex_t
*pse
= PAGE_SE_MUTEX(pp
);
761 ASSERT(PAGE_EXCL(pp
));
762 ASSERT(pp
->p_vnode
== NULL
);
763 ASSERT(pp
->p_offset
== (u_offset_t
)-1);
764 ASSERT(!PP_ISFREE(pp
));
767 THREAD_KPRI_RELEASE();
768 pp
->p_selock
= SE_DELETED
;
769 if (CV_HAS_WAITERS(&pp
->p_cv
))
770 cv_broadcast(&pp
->p_cv
);
775 page_deleted(page_t
*pp
)
777 return (pp
->p_selock
== SE_DELETED
);
781 * Implement the io lock for pages
784 page_iolock_init(page_t
*pp
)
786 pp
->p_iolock_state
= 0;
787 cv_init(&pp
->p_io_cv
, NULL
, CV_DEFAULT
, NULL
);
791 * Acquire the i/o lock on a page.
794 page_io_lock(page_t
*pp
)
798 pio
= PAGE_IO_MUTEX(pp
);
800 while (pp
->p_iolock_state
& PAGE_IO_INUSE
) {
801 cv_wait(&(pp
->p_io_cv
), pio
);
803 pp
->p_iolock_state
|= PAGE_IO_INUSE
;
808 * Release the i/o lock on a page.
811 page_io_unlock(page_t
*pp
)
815 pio
= PAGE_IO_MUTEX(pp
);
817 cv_broadcast(&pp
->p_io_cv
);
818 pp
->p_iolock_state
&= ~PAGE_IO_INUSE
;
823 * Try to acquire the i/o lock on a page without blocking.
824 * Returns 1 on success, 0 on failure.
827 page_io_trylock(page_t
*pp
)
831 if (pp
->p_iolock_state
& PAGE_IO_INUSE
)
834 pio
= PAGE_IO_MUTEX(pp
);
837 if (pp
->p_iolock_state
& PAGE_IO_INUSE
) {
841 pp
->p_iolock_state
|= PAGE_IO_INUSE
;
848 * Wait until the i/o lock is not held.
851 page_io_wait(page_t
*pp
)
855 pio
= PAGE_IO_MUTEX(pp
);
857 while (pp
->p_iolock_state
& PAGE_IO_INUSE
) {
858 cv_wait(&(pp
->p_io_cv
), pio
);
864 * Returns 1 on success, 0 on failure.
867 page_io_locked(page_t
*pp
)
869 return (pp
->p_iolock_state
& PAGE_IO_INUSE
);
873 * Assert that the i/o lock on a page is held.
874 * Returns 1 on success, 0 on failure.
877 page_iolock_assert(page_t
*pp
)
879 return (page_io_locked(pp
));
883 * Wrapper exported to kernel routines that are built
884 * platform-independent (the macro is platform-dependent;
885 * the size of vph_mutex[] is based on NCPU).
887 * Note that you can do stress testing on this by setting the
888 * variable page_vnode_mutex_stress to something other than
889 * zero in a DEBUG kernel in a debugger after loading the kernel.
890 * Setting it after the kernel is running may not work correctly.
893 static int page_vnode_mutex_stress
= 0;
897 page_vnode_mutex(vnode_t
*vp
)
900 return (&vph_mutex
[VPH_TABLE_SIZE
+ 0]);
903 return (&vph_mutex
[VPH_TABLE_SIZE
+ 1]);
905 if (page_vnode_mutex_stress
!= 0)
906 return (&vph_mutex
[0]);
909 return (&vph_mutex
[VP_HASH_FUNC(vp
)]);
913 page_se_mutex(page_t
*pp
)
915 return (PAGE_SE_MUTEX(pp
));
919 uint_t pszclck_stat
[4];
922 * Find, take and return a mutex held by hat_page_demote().
923 * Called by page_demote_vp_pages() before hat_page_demote() call and by
924 * routines that want to block hat_page_demote() but can't do it
925 * via locking all constituent pages.
927 * Return NULL if p_szc is 0.
929 * It should only be used for pages that can be demoted by hat_page_demote()
930 * i.e. non swapfs file system pages. The logic here is lifted from
931 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase
932 * since the page is locked and not free.
934 * Hash of the root page is used to find the lock.
935 * To find the root in the presense of hat_page_demote() chageing the location
936 * of the root this routine relies on the fact that hat_page_demote() changes
939 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is
940 * returned pp's p_szc may be any value.
943 page_szc_lock(page_t
*pp
)
949 uint_t pszc
= pp
->p_szc
;
952 ASSERT(PAGE_LOCKED(pp
));
953 ASSERT(!PP_ISFREE(pp
));
954 ASSERT(pp
->p_vnode
!= NULL
);
955 ASSERT(!IS_SWAPFSVP(pp
->p_vnode
));
956 ASSERT(!PP_ISKAS(pp
));
960 VM_STAT_ADD(pszclck_stat
[0]);
964 /* The lock lives in the root page */
966 rootpp
= PP_GROUPLEADER(pp
, pszc
);
967 mtx
= PAGE_SZC_MUTEX(rootpp
);
971 * since p_szc can only decrease if pp == rootpp
972 * rootpp will be always the same i.e we have the right root
973 * regardless of rootpp->p_szc.
974 * If location of pp's root didn't change after we took
975 * the lock we have the right root. return mutex hashed off it.
977 if (pp
== rootpp
|| (rszc
= rootpp
->p_szc
) == pszc
) {
978 VM_STAT_ADD(pszclck_stat
[1]);
983 * root location changed because page got demoted.
984 * locate the new root.
991 VM_STAT_ADD(pszclck_stat
[2]);
995 VM_STAT_ADD(pszclck_stat
[3]);
997 * current hat_page_demote not done yet.
998 * wait for it to finish.
1001 rootpp
= PP_GROUPLEADER(rootpp
, rszc
);
1002 mtx
= PAGE_SZC_MUTEX(rootpp
);
1005 ASSERT(rootpp
->p_szc
< rszc
);
1010 page_szc_lock_assert(page_t
*pp
)
1012 page_t
*rootpp
= PP_PAGEROOT(pp
);
1013 kmutex_t
*mtx
= PAGE_SZC_MUTEX(rootpp
);
1015 return (MUTEX_HELD(mtx
));
1021 static krwlock_t memsegslock
;
1024 * memlist (phys_install, phys_avail) locking.
1026 static krwlock_t memlists_lock
;
1029 memsegs_lock(int writer
)
1031 rw_enter(&memsegslock
, writer
? RW_WRITER
: RW_READER
);
1036 memsegs_unlock(int writer
)
1038 rw_exit(&memsegslock
);
1042 memsegs_lock_held(void)
1044 return (RW_LOCK_HELD(&memsegslock
));
1048 memlist_read_lock(void)
1050 rw_enter(&memlists_lock
, RW_READER
);
1054 memlist_read_unlock(void)
1056 rw_exit(&memlists_lock
);
1060 memlist_write_lock(void)
1062 rw_enter(&memlists_lock
, RW_WRITER
);
1066 memlist_write_unlock(void)
1068 rw_exit(&memlists_lock
);