6811333 Remove prom_printf() message in emlxs driver
[opensolaris.git] / usr / src / uts / common / vm / page_lock.c
blob7696178019b983c02eda214936833ab0704c0d76
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * VM - page locking primitives
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/vtrace.h>
34 #include <sys/debug.h>
35 #include <sys/cmn_err.h>
36 #include <sys/vnode.h>
37 #include <sys/bitmap.h>
38 #include <sys/lockstat.h>
39 #include <sys/sysmacros.h>
40 #include <sys/condvar_impl.h>
41 #include <vm/page.h>
42 #include <vm/seg_enum.h>
43 #include <vm/vm_dep.h>
46 * This global mutex is for logical page locking.
47 * The following fields in the page structure are protected
48 * by this lock:
50 * p_lckcnt
51 * p_cowcnt
53 kmutex_t page_llock;
56 * This is a global lock for the logical page free list. The
57 * logical free list, in this implementation, is maintained as two
58 * separate physical lists - the cache list and the free list.
60 kmutex_t page_freelock;
63 * The hash table, page_hash[], the p_selock fields, and the
64 * list of pages associated with vnodes are protected by arrays of mutexes.
66 * Unless the hashes are changed radically, the table sizes must be
67 * a power of two. Also, we typically need more mutexes for the
68 * vnodes since these locks are occasionally held for long periods.
69 * And since there seem to be two special vnodes (kvp and swapvp),
70 * we make room for private mutexes for them.
72 * The pse_mutex[] array holds the mutexes to protect the p_selock
73 * fields of all page_t structures.
75 * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex
76 * when given a pointer to a page_t.
78 * PIO_TABLE_SIZE must be a power of two. One could argue that we
79 * should go to the trouble of setting it up at run time and base it
80 * on memory size rather than the number of compile time CPUs.
82 * XX64 We should be using physmem size to calculate PIO_SHIFT.
84 * These might break in 64 bit world.
86 #define PIO_SHIFT 7 /* log2(sizeof(page_t)) */
87 #define PIO_TABLE_SIZE 128 /* number of io mutexes to have */
89 pad_mutex_t ph_mutex[PH_TABLE_SIZE];
90 kmutex_t pio_mutex[PIO_TABLE_SIZE];
92 #define PAGE_IO_MUTEX(pp) \
93 &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)]
96 * The pse_mutex[] array is allocated in the platform startup code
97 * based on the size of the machine at startup.
99 extern pad_mutex_t *pse_mutex; /* Locks protecting pp->p_selock */
100 extern size_t pse_table_size; /* Number of mutexes in pse_mutex[] */
101 extern int pse_shift; /* log2(pse_table_size) */
102 #define PAGE_SE_MUTEX(pp) &pse_mutex[ \
103 ((((uintptr_t)(pp) >> pse_shift) ^ ((uintptr_t)(pp))) >> 7) & \
104 (pse_table_size - 1)].pad_mutex
106 #define PSZC_MTX_TABLE_SIZE 128
107 #define PSZC_MTX_TABLE_SHIFT 7
109 static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE];
111 #define PAGE_SZC_MUTEX(_pp) \
112 &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \
113 ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \
114 ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \
115 (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex
118 * The vph_mutex[] array holds the mutexes to protect the vnode chains,
119 * (i.e., the list of pages anchored by v_pages and connected via p_vpprev
120 * and p_vpnext).
122 * The page_vnode_mutex(vp) function returns the address of the appropriate
123 * mutex from this array given a pointer to a vnode. It is complicated
124 * by the fact that the kernel's vnode and the swapfs vnode are referenced
125 * frequently enough to warrent their own mutexes.
127 * The VP_HASH_FUNC returns the index into the vph_mutex array given
128 * an address of a vnode.
132 * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world.
133 * Need to review again.
135 #if defined(_LP64)
136 #define VPH_TABLE_SIZE (1 << (VP_SHIFT + 3))
137 #else /* 32 bits */
138 #define VPH_TABLE_SIZE (2 << VP_SHIFT)
139 #endif
141 #define VP_HASH_FUNC(vp) \
142 ((((uintptr_t)(vp) >> 6) + \
143 ((uintptr_t)(vp) >> 8) + \
144 ((uintptr_t)(vp) >> 10) + \
145 ((uintptr_t)(vp) >> 12)) \
146 & (VPH_TABLE_SIZE - 1))
148 extern struct vnode kvp;
151 * Two slots after VPH_TABLE_SIZE are reserved in vph_mutex for kernel vnodes.
152 * The lock for kvp is VPH_TABLE_SIZE + 0, and the lock for zvp is
153 * VPH_TABLE_SIZE + 1.
156 kmutex_t vph_mutex[VPH_TABLE_SIZE + 2];
159 * Initialize the locks used by the Virtual Memory Management system.
161 void
162 page_lock_init()
167 * Return a value for pse_shift based on npg (the number of physical pages)
168 * and ncpu (the maximum number of CPUs). This is called by platform startup
169 * code.
171 * Lockstat data from TPC-H runs showed that contention on the pse_mutex[]
172 * locks grew approximately as the square of the number of threads executing.
173 * So the primary scaling factor used is NCPU^2. The size of the machine in
174 * megabytes is used as an upper bound, particularly for sun4v machines which
175 * all claim to have 256 CPUs maximum, and the old value of PSE_TABLE_SIZE
176 * (128) is used as a minimum. Since the size of the table has to be a power
177 * of two, the calculated size is rounded up to the next power of two.
179 /*ARGSUSED*/
181 size_pse_array(pgcnt_t npg, int ncpu)
183 size_t size;
184 pgcnt_t pp_per_mb = (1024 * 1024) / PAGESIZE;
186 size = MAX(128, MIN(npg / pp_per_mb, 2 * ncpu * ncpu));
187 size += (1 << (highbit(size) - 1)) - 1;
188 return (highbit(size) - 1);
192 * At present we only use page ownership to aid debugging, so it's
193 * OK if the owner field isn't exact. In the 32-bit world two thread ids
194 * can map to the same owner because we just 'or' in 0x80000000 and
195 * then clear the second highest bit, so that (for example) 0x2faced00
196 * and 0xafaced00 both map to 0xafaced00.
197 * In the 64-bit world, p_selock may not be large enough to hold a full
198 * thread pointer. If we ever need precise ownership (e.g. if we implement
199 * priority inheritance for page locks) then p_selock should become a
200 * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2).
202 #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED)
203 #define SE_READER 1
206 * A page that is deleted must be marked as such using the
207 * page_lock_delete() function. The page must be exclusively locked.
208 * The SE_DELETED marker is put in p_selock when this function is called.
209 * SE_DELETED must be distinct from any SE_WRITER value.
211 #define SE_DELETED (1 | INT_MIN)
213 #ifdef VM_STATS
214 uint_t vph_kvp_count;
215 uint_t vph_swapfsvp_count;
216 uint_t vph_other;
217 #endif /* VM_STATS */
219 #ifdef VM_STATS
220 uint_t page_lock_count;
221 uint_t page_lock_miss;
222 uint_t page_lock_miss_lock;
223 uint_t page_lock_reclaim;
224 uint_t page_lock_bad_reclaim;
225 uint_t page_lock_same_page;
226 uint_t page_lock_upgrade;
227 uint_t page_lock_retired;
228 uint_t page_lock_upgrade_failed;
229 uint_t page_lock_deleted;
231 uint_t page_trylock_locked;
232 uint_t page_trylock_failed;
233 uint_t page_trylock_missed;
235 uint_t page_try_reclaim_upgrade;
236 #endif /* VM_STATS */
239 * Acquire the "shared/exclusive" lock on a page.
241 * Returns 1 on success and locks the page appropriately.
242 * 0 on failure and does not lock the page.
244 * If `lock' is non-NULL, it will be dropped and reacquired in the
245 * failure case. This routine can block, and if it does
246 * it will always return a failure since the page identity [vp, off]
247 * or state may have changed.
251 page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim)
253 return (page_lock_es(pp, se, lock, reclaim, 0));
257 * With the addition of reader-writer lock semantics to page_lock_es,
258 * callers wanting an exclusive (writer) lock may prevent shared-lock
259 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
260 * In this case, when an exclusive lock cannot be acquired, p_selock's
261 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
262 * if the page is slated for retirement.
264 * The se and es parameters determine if the lock should be granted
265 * based on the following decision table:
267 * Lock wanted es flags p_selock/SE_EWANTED Action
268 * ----------- -------------- ------------------- ---------
269 * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED
270 * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED
271 * SE_EXCL none any lock/any deny
272 * SE_SHARED n/a [2] shared/0 grant
273 * SE_SHARED n/a [2] unlocked/0 grant
274 * SE_SHARED n/a shared/1 deny
275 * SE_SHARED n/a unlocked/1 deny
276 * SE_SHARED n/a excl/any deny
278 * Notes:
279 * [1] The code grants an exclusive lock to the caller and clears the bit
280 * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
281 * bit's value. This was deemed acceptable as we are not concerned about
282 * exclusive-lock starvation. If this ever becomes an issue, a priority or
283 * fifo mechanism should also be implemented. Meantime, the thread that
284 * set SE_EWANTED should be prepared to catch this condition and reset it
286 * [2] Retired pages may not be locked at any time, regardless of the
287 * dispostion of se, unless the es parameter has SE_RETIRED flag set.
289 * Notes on values of "es":
291 * es & 1: page_lookup_create will attempt page relocation
292 * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
293 * memory thread); this prevents reader-starvation of waiting
294 * writer thread(s) by giving priority to writers over readers.
295 * es & SE_RETIRED: caller wants to lock pages even if they are
296 * retired. Default is to deny the lock if the page is retired.
298 * And yes, we know, the semantics of this function are too complicated.
299 * It's on the list to be cleaned up.
302 page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es)
304 int retval;
305 kmutex_t *pse = PAGE_SE_MUTEX(pp);
306 int upgraded;
307 int reclaim_it;
309 ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);
311 VM_STAT_ADD(page_lock_count);
313 upgraded = 0;
314 reclaim_it = 0;
316 mutex_enter(pse);
318 ASSERT(((es & SE_EXCL_WANTED) == 0) ||
319 ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));
321 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
322 mutex_exit(pse);
323 VM_STAT_ADD(page_lock_retired);
324 return (0);
327 if (se == SE_SHARED && es == 1 && pp->p_selock == 0) {
328 se = SE_EXCL;
331 if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) {
333 reclaim_it = 1;
334 if (se == SE_SHARED) {
336 * This is an interesting situation.
338 * Remember that p_free can only change if
339 * p_selock < 0.
340 * p_free does not depend on our holding `pse'.
341 * And, since we hold `pse', p_selock can not change.
342 * So, if p_free changes on us, the page is already
343 * exclusively held, and we would fail to get p_selock
344 * regardless.
346 * We want to avoid getting the share
347 * lock on a free page that needs to be reclaimed.
348 * It is possible that some other thread has the share
349 * lock and has left the free page on the cache list.
350 * pvn_vplist_dirty() does this for brief periods.
351 * If the se_share is currently SE_EXCL, we will fail
352 * to acquire p_selock anyway. Blocking is the
353 * right thing to do.
354 * If we need to reclaim this page, we must get
355 * exclusive access to it, force the upgrade now.
356 * Again, we will fail to acquire p_selock if the
357 * page is not free and block.
359 upgraded = 1;
360 se = SE_EXCL;
361 VM_STAT_ADD(page_lock_upgrade);
365 if (se == SE_EXCL) {
366 if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) {
368 * if the caller wants a writer lock (but did not
369 * specify exclusive access), and there is a pending
370 * writer that wants exclusive access, return failure
372 retval = 0;
373 } else if ((pp->p_selock & ~SE_EWANTED) == 0) {
374 /* no reader/writer lock held */
375 THREAD_KPRI_REQUEST();
376 /* this clears our setting of the SE_EWANTED bit */
377 pp->p_selock = SE_WRITER;
378 retval = 1;
379 } else {
380 /* page is locked */
381 if (es & SE_EXCL_WANTED) {
382 /* set the SE_EWANTED bit */
383 pp->p_selock |= SE_EWANTED;
385 retval = 0;
387 } else {
388 retval = 0;
389 if (pp->p_selock >= 0) {
390 if ((pp->p_selock & SE_EWANTED) == 0) {
391 pp->p_selock += SE_READER;
392 retval = 1;
397 if (retval == 0) {
398 if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) {
399 VM_STAT_ADD(page_lock_deleted);
400 mutex_exit(pse);
401 return (retval);
404 #ifdef VM_STATS
405 VM_STAT_ADD(page_lock_miss);
406 if (upgraded) {
407 VM_STAT_ADD(page_lock_upgrade_failed);
409 #endif
410 if (lock) {
411 VM_STAT_ADD(page_lock_miss_lock);
412 mutex_exit(lock);
416 * Now, wait for the page to be unlocked and
417 * release the lock protecting p_cv and p_selock.
419 cv_wait(&pp->p_cv, pse);
420 mutex_exit(pse);
423 * The page identity may have changed while we were
424 * blocked. If we are willing to depend on "pp"
425 * still pointing to a valid page structure (i.e.,
426 * assuming page structures are not dynamically allocated
427 * or freed), we could try to lock the page if its
428 * identity hasn't changed.
430 * This needs to be measured, since we come back from
431 * cv_wait holding pse (the expensive part of this
432 * operation) we might as well try the cheap part.
433 * Though we would also have to confirm that dropping
434 * `lock' did not cause any grief to the callers.
436 if (lock) {
437 mutex_enter(lock);
439 } else {
441 * We have the page lock.
442 * If we needed to reclaim the page, and the page
443 * needed reclaiming (ie, it was free), then we
444 * have the page exclusively locked. We may need
445 * to downgrade the page.
447 ASSERT((upgraded) ?
448 ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1);
449 mutex_exit(pse);
452 * We now hold this page's lock, either shared or
453 * exclusive. This will prevent its identity from changing.
454 * The page, however, may or may not be free. If the caller
455 * requested, and it is free, go reclaim it from the
456 * free list. If the page can't be reclaimed, return failure
457 * so that the caller can start all over again.
459 * NOTE:page_reclaim() releases the page lock (p_selock)
460 * if it can't be reclaimed.
462 if (reclaim_it) {
463 if (!page_reclaim(pp, lock)) {
464 VM_STAT_ADD(page_lock_bad_reclaim);
465 retval = 0;
466 } else {
467 VM_STAT_ADD(page_lock_reclaim);
468 if (upgraded) {
469 page_downgrade(pp);
474 return (retval);
478 * Clear the SE_EWANTED bit from p_selock. This function allows
479 * callers of page_lock_es and page_try_reclaim_lock to clear
480 * their setting of this bit if they decide they no longer wish
481 * to gain exclusive access to the page. Currently only
482 * delete_memory_thread uses this when the delete memory
483 * operation is cancelled.
485 void
486 page_lock_clr_exclwanted(page_t *pp)
488 kmutex_t *pse = PAGE_SE_MUTEX(pp);
490 mutex_enter(pse);
491 pp->p_selock &= ~SE_EWANTED;
492 if (CV_HAS_WAITERS(&pp->p_cv))
493 cv_broadcast(&pp->p_cv);
494 mutex_exit(pse);
498 * Read the comments inside of page_lock_es() carefully.
500 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the
501 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained.
502 * This is used by threads subject to reader-starvation (eg. memory delete).
504 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock,
505 * it is expected that it will retry at a later time. Threads that will
506 * not retry the lock *must* call page_lock_clr_exclwanted to clear the
507 * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock,
508 * the bit is cleared.)
511 page_try_reclaim_lock(page_t *pp, se_t se, int es)
513 kmutex_t *pse = PAGE_SE_MUTEX(pp);
514 selock_t old;
516 mutex_enter(pse);
518 old = pp->p_selock;
520 ASSERT(((es & SE_EXCL_WANTED) == 0) ||
521 ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));
523 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
524 mutex_exit(pse);
525 VM_STAT_ADD(page_trylock_failed);
526 return (0);
529 if (se == SE_SHARED && es == 1 && old == 0) {
530 se = SE_EXCL;
533 if (se == SE_SHARED) {
534 if (!PP_ISFREE(pp)) {
535 if (old >= 0) {
537 * Readers are not allowed when excl wanted
539 if ((old & SE_EWANTED) == 0) {
540 pp->p_selock = old + SE_READER;
541 mutex_exit(pse);
542 return (1);
545 mutex_exit(pse);
546 return (0);
549 * The page is free, so we really want SE_EXCL (below)
551 VM_STAT_ADD(page_try_reclaim_upgrade);
555 * The caller wants a writer lock. We try for it only if
556 * SE_EWANTED is not set, or if the caller specified
557 * SE_EXCL_WANTED.
559 if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) {
560 if ((old & ~SE_EWANTED) == 0) {
561 /* no reader/writer lock held */
562 THREAD_KPRI_REQUEST();
563 /* this clears out our setting of the SE_EWANTED bit */
564 pp->p_selock = SE_WRITER;
565 mutex_exit(pse);
566 return (1);
569 if (es & SE_EXCL_WANTED) {
570 /* page is locked, set the SE_EWANTED bit */
571 pp->p_selock |= SE_EWANTED;
573 mutex_exit(pse);
574 return (0);
578 * Acquire a page's "shared/exclusive" lock, but never block.
579 * Returns 1 on success, 0 on failure.
582 page_trylock(page_t *pp, se_t se)
584 kmutex_t *pse = PAGE_SE_MUTEX(pp);
586 mutex_enter(pse);
587 if (pp->p_selock & SE_EWANTED || PP_RETIRED(pp) ||
588 (se == SE_SHARED && PP_PR_NOSHARE(pp))) {
590 * Fail if a thread wants exclusive access and page is
591 * retired, if the page is slated for retirement, or a
592 * share lock is requested.
594 mutex_exit(pse);
595 VM_STAT_ADD(page_trylock_failed);
596 return (0);
599 if (se == SE_EXCL) {
600 if (pp->p_selock == 0) {
601 THREAD_KPRI_REQUEST();
602 pp->p_selock = SE_WRITER;
603 mutex_exit(pse);
604 return (1);
606 } else {
607 if (pp->p_selock >= 0) {
608 pp->p_selock += SE_READER;
609 mutex_exit(pse);
610 return (1);
613 mutex_exit(pse);
614 return (0);
618 * Variant of page_unlock() specifically for the page freelist
619 * code. The mere existence of this code is a vile hack that
620 * has resulted due to the backwards locking order of the page
621 * freelist manager; please don't call it.
623 void
624 page_unlock_nocapture(page_t *pp)
626 kmutex_t *pse = PAGE_SE_MUTEX(pp);
627 selock_t old;
629 mutex_enter(pse);
631 old = pp->p_selock;
632 if ((old & ~SE_EWANTED) == SE_READER) {
633 pp->p_selock = old & ~SE_READER;
634 if (CV_HAS_WAITERS(&pp->p_cv))
635 cv_broadcast(&pp->p_cv);
636 } else if ((old & ~SE_EWANTED) == SE_DELETED) {
637 panic("page_unlock_nocapture: page %p is deleted", (void *)pp);
638 } else if (old < 0) {
639 THREAD_KPRI_RELEASE();
640 pp->p_selock &= SE_EWANTED;
641 if (CV_HAS_WAITERS(&pp->p_cv))
642 cv_broadcast(&pp->p_cv);
643 } else if ((old & ~SE_EWANTED) > SE_READER) {
644 pp->p_selock = old - SE_READER;
645 } else {
646 panic("page_unlock_nocapture: page %p is not locked",
647 (void *)pp);
650 mutex_exit(pse);
654 * Release the page's "shared/exclusive" lock and wake up anyone
655 * who might be waiting for it.
657 void
658 page_unlock(page_t *pp)
660 kmutex_t *pse = PAGE_SE_MUTEX(pp);
661 selock_t old;
663 mutex_enter(pse);
665 old = pp->p_selock;
666 if ((old & ~SE_EWANTED) == SE_READER) {
667 pp->p_selock = old & ~SE_READER;
668 if (CV_HAS_WAITERS(&pp->p_cv))
669 cv_broadcast(&pp->p_cv);
670 } else if ((old & ~SE_EWANTED) == SE_DELETED) {
671 panic("page_unlock: page %p is deleted", (void *)pp);
672 } else if (old < 0) {
673 THREAD_KPRI_RELEASE();
674 pp->p_selock &= SE_EWANTED;
675 if (CV_HAS_WAITERS(&pp->p_cv))
676 cv_broadcast(&pp->p_cv);
677 } else if ((old & ~SE_EWANTED) > SE_READER) {
678 pp->p_selock = old - SE_READER;
679 } else {
680 panic("page_unlock: page %p is not locked", (void *)pp);
683 if (pp->p_selock == 0) {
685 * If the T_CAPTURING bit is set, that means that we should
686 * not try and capture the page again as we could recurse
687 * which could lead to a stack overflow panic or spending a
688 * relatively long time in the kernel making no progress.
690 if ((pp->p_toxic & PR_CAPTURE) &&
691 !(curthread->t_flag & T_CAPTURING) &&
692 !PP_RETIRED(pp)) {
693 THREAD_KPRI_REQUEST();
694 pp->p_selock = SE_WRITER;
695 mutex_exit(pse);
696 page_unlock_capture(pp);
697 } else {
698 mutex_exit(pse);
700 } else {
701 mutex_exit(pse);
706 * Try to upgrade the lock on the page from a "shared" to an
707 * "exclusive" lock. Since this upgrade operation is done while
708 * holding the mutex protecting this page, no one else can acquire this page's
709 * lock and change the page. Thus, it is safe to drop the "shared"
710 * lock and attempt to acquire the "exclusive" lock.
712 * Returns 1 on success, 0 on failure.
715 page_tryupgrade(page_t *pp)
717 kmutex_t *pse = PAGE_SE_MUTEX(pp);
719 mutex_enter(pse);
720 if (!(pp->p_selock & SE_EWANTED)) {
721 /* no threads want exclusive access, try upgrade */
722 if (pp->p_selock == SE_READER) {
723 THREAD_KPRI_REQUEST();
724 /* convert to exclusive lock */
725 pp->p_selock = SE_WRITER;
726 mutex_exit(pse);
727 return (1);
730 mutex_exit(pse);
731 return (0);
735 * Downgrade the "exclusive" lock on the page to a "shared" lock
736 * while holding the mutex protecting this page's p_selock field.
738 void
739 page_downgrade(page_t *pp)
741 kmutex_t *pse = PAGE_SE_MUTEX(pp);
742 int excl_waiting;
744 ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED);
745 ASSERT(PAGE_EXCL(pp));
747 mutex_enter(pse);
748 excl_waiting = pp->p_selock & SE_EWANTED;
749 THREAD_KPRI_RELEASE();
750 pp->p_selock = SE_READER | excl_waiting;
751 if (CV_HAS_WAITERS(&pp->p_cv))
752 cv_broadcast(&pp->p_cv);
753 mutex_exit(pse);
756 void
757 page_lock_delete(page_t *pp)
759 kmutex_t *pse = PAGE_SE_MUTEX(pp);
761 ASSERT(PAGE_EXCL(pp));
762 ASSERT(pp->p_vnode == NULL);
763 ASSERT(pp->p_offset == (u_offset_t)-1);
764 ASSERT(!PP_ISFREE(pp));
766 mutex_enter(pse);
767 THREAD_KPRI_RELEASE();
768 pp->p_selock = SE_DELETED;
769 if (CV_HAS_WAITERS(&pp->p_cv))
770 cv_broadcast(&pp->p_cv);
771 mutex_exit(pse);
775 page_deleted(page_t *pp)
777 return (pp->p_selock == SE_DELETED);
781 * Implement the io lock for pages
783 void
784 page_iolock_init(page_t *pp)
786 pp->p_iolock_state = 0;
787 cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL);
791 * Acquire the i/o lock on a page.
793 void
794 page_io_lock(page_t *pp)
796 kmutex_t *pio;
798 pio = PAGE_IO_MUTEX(pp);
799 mutex_enter(pio);
800 while (pp->p_iolock_state & PAGE_IO_INUSE) {
801 cv_wait(&(pp->p_io_cv), pio);
803 pp->p_iolock_state |= PAGE_IO_INUSE;
804 mutex_exit(pio);
808 * Release the i/o lock on a page.
810 void
811 page_io_unlock(page_t *pp)
813 kmutex_t *pio;
815 pio = PAGE_IO_MUTEX(pp);
816 mutex_enter(pio);
817 cv_broadcast(&pp->p_io_cv);
818 pp->p_iolock_state &= ~PAGE_IO_INUSE;
819 mutex_exit(pio);
823 * Try to acquire the i/o lock on a page without blocking.
824 * Returns 1 on success, 0 on failure.
827 page_io_trylock(page_t *pp)
829 kmutex_t *pio;
831 if (pp->p_iolock_state & PAGE_IO_INUSE)
832 return (0);
834 pio = PAGE_IO_MUTEX(pp);
835 mutex_enter(pio);
837 if (pp->p_iolock_state & PAGE_IO_INUSE) {
838 mutex_exit(pio);
839 return (0);
841 pp->p_iolock_state |= PAGE_IO_INUSE;
842 mutex_exit(pio);
844 return (1);
848 * Wait until the i/o lock is not held.
850 void
851 page_io_wait(page_t *pp)
853 kmutex_t *pio;
855 pio = PAGE_IO_MUTEX(pp);
856 mutex_enter(pio);
857 while (pp->p_iolock_state & PAGE_IO_INUSE) {
858 cv_wait(&(pp->p_io_cv), pio);
860 mutex_exit(pio);
864 * Returns 1 on success, 0 on failure.
867 page_io_locked(page_t *pp)
869 return (pp->p_iolock_state & PAGE_IO_INUSE);
873 * Assert that the i/o lock on a page is held.
874 * Returns 1 on success, 0 on failure.
877 page_iolock_assert(page_t *pp)
879 return (page_io_locked(pp));
883 * Wrapper exported to kernel routines that are built
884 * platform-independent (the macro is platform-dependent;
885 * the size of vph_mutex[] is based on NCPU).
887 * Note that you can do stress testing on this by setting the
888 * variable page_vnode_mutex_stress to something other than
889 * zero in a DEBUG kernel in a debugger after loading the kernel.
890 * Setting it after the kernel is running may not work correctly.
892 #ifdef DEBUG
893 static int page_vnode_mutex_stress = 0;
894 #endif
896 kmutex_t *
897 page_vnode_mutex(vnode_t *vp)
899 if (vp == &kvp)
900 return (&vph_mutex[VPH_TABLE_SIZE + 0]);
902 if (vp == &zvp)
903 return (&vph_mutex[VPH_TABLE_SIZE + 1]);
904 #ifdef DEBUG
905 if (page_vnode_mutex_stress != 0)
906 return (&vph_mutex[0]);
907 #endif
909 return (&vph_mutex[VP_HASH_FUNC(vp)]);
912 kmutex_t *
913 page_se_mutex(page_t *pp)
915 return (PAGE_SE_MUTEX(pp));
918 #ifdef VM_STATS
919 uint_t pszclck_stat[4];
920 #endif
922 * Find, take and return a mutex held by hat_page_demote().
923 * Called by page_demote_vp_pages() before hat_page_demote() call and by
924 * routines that want to block hat_page_demote() but can't do it
925 * via locking all constituent pages.
927 * Return NULL if p_szc is 0.
929 * It should only be used for pages that can be demoted by hat_page_demote()
930 * i.e. non swapfs file system pages. The logic here is lifted from
931 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase
932 * since the page is locked and not free.
934 * Hash of the root page is used to find the lock.
935 * To find the root in the presense of hat_page_demote() chageing the location
936 * of the root this routine relies on the fact that hat_page_demote() changes
937 * root last.
939 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is
940 * returned pp's p_szc may be any value.
942 kmutex_t *
943 page_szc_lock(page_t *pp)
945 kmutex_t *mtx;
946 page_t *rootpp;
947 uint_t szc;
948 uint_t rszc;
949 uint_t pszc = pp->p_szc;
951 ASSERT(pp != NULL);
952 ASSERT(PAGE_LOCKED(pp));
953 ASSERT(!PP_ISFREE(pp));
954 ASSERT(pp->p_vnode != NULL);
955 ASSERT(!IS_SWAPFSVP(pp->p_vnode));
956 ASSERT(!PP_ISKAS(pp));
958 again:
959 if (pszc == 0) {
960 VM_STAT_ADD(pszclck_stat[0]);
961 return (NULL);
964 /* The lock lives in the root page */
966 rootpp = PP_GROUPLEADER(pp, pszc);
967 mtx = PAGE_SZC_MUTEX(rootpp);
968 mutex_enter(mtx);
971 * since p_szc can only decrease if pp == rootpp
972 * rootpp will be always the same i.e we have the right root
973 * regardless of rootpp->p_szc.
974 * If location of pp's root didn't change after we took
975 * the lock we have the right root. return mutex hashed off it.
977 if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) {
978 VM_STAT_ADD(pszclck_stat[1]);
979 return (mtx);
983 * root location changed because page got demoted.
984 * locate the new root.
986 if (rszc < pszc) {
987 szc = pp->p_szc;
988 ASSERT(szc < pszc);
989 mutex_exit(mtx);
990 pszc = szc;
991 VM_STAT_ADD(pszclck_stat[2]);
992 goto again;
995 VM_STAT_ADD(pszclck_stat[3]);
997 * current hat_page_demote not done yet.
998 * wait for it to finish.
1000 mutex_exit(mtx);
1001 rootpp = PP_GROUPLEADER(rootpp, rszc);
1002 mtx = PAGE_SZC_MUTEX(rootpp);
1003 mutex_enter(mtx);
1004 mutex_exit(mtx);
1005 ASSERT(rootpp->p_szc < rszc);
1006 goto again;
1010 page_szc_lock_assert(page_t *pp)
1012 page_t *rootpp = PP_PAGEROOT(pp);
1013 kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp);
1015 return (MUTEX_HELD(mtx));
1019 * memseg locking
1021 static krwlock_t memsegslock;
1024 * memlist (phys_install, phys_avail) locking.
1026 static krwlock_t memlists_lock;
1028 void
1029 memsegs_lock(int writer)
1031 rw_enter(&memsegslock, writer ? RW_WRITER : RW_READER);
1034 /*ARGSUSED*/
1035 void
1036 memsegs_unlock(int writer)
1038 rw_exit(&memsegslock);
1042 memsegs_lock_held(void)
1044 return (RW_LOCK_HELD(&memsegslock));
1047 void
1048 memlist_read_lock(void)
1050 rw_enter(&memlists_lock, RW_READER);
1053 void
1054 memlist_read_unlock(void)
1056 rw_exit(&memlists_lock);
1059 void
1060 memlist_write_lock(void)
1062 rw_enter(&memlists_lock, RW_WRITER);
1065 void
1066 memlist_write_unlock(void)
1068 rw_exit(&memlists_lock);