Merge illumos-gate
[unleashed.git] / kernel / vm / vm_anon.c
blob1444517ff5bbf227779c777e62846a1741fd3a92
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
40 * VM - anonymous pages.
42 * This layer sits immediately above the vm_swap layer. It manages
43 * physical pages that have no permanent identity in the file system
44 * name space, using the services of the vm_swap layer to allocate
45 * backing storage for these pages. Since these pages have no external
46 * identity, they are discarded when the last reference is removed.
48 * An important function of this layer is to manage low-level sharing
49 * of pages that are logically distinct but that happen to be
50 * physically identical (e.g., the corresponding pages of the processes
51 * resulting from a fork before one process or the other changes their
52 * contents). This pseudo-sharing is present only as an optimization
53 * and is not to be confused with true sharing in which multiple
54 * address spaces deliberately contain references to the same object;
55 * such sharing is managed at a higher level.
57 * The key data structure here is the anon struct, which contains a
58 * reference count for its associated physical page and a hint about
59 * the identity of that page. Anon structs typically live in arrays,
60 * with an instance's position in its array determining where the
61 * corresponding backing storage is allocated; however, the swap_xlate()
62 * routine abstracts away this representation information so that the
63 * rest of the anon layer need not know it. (See the swap layer for
64 * more details on anon struct layout.)
66 * In the future versions of the system, the association between an
67 * anon struct and its position on backing store will change so that
68 * we don't require backing store all anonymous pages in the system.
69 * This is important for consideration for large memory systems.
70 * We can also use this technique to delay binding physical locations
71 * to anonymous pages until pageout time where we can make smarter
72 * allocation decisions to improve anonymous klustering.
74 * Many of the routines defined here take a (struct anon **) argument,
75 * which allows the code at this level to manage anon pages directly,
76 * so that callers can regard anon structs as opaque objects and not be
77 * concerned with assigning or inspecting their contents.
79 * Clients of this layer refer to anon pages indirectly. That is, they
80 * maintain arrays of pointers to anon structs rather than maintaining
81 * anon structs themselves. The (struct anon **) arguments mentioned
82 * above are pointers to entries in these arrays. It is these arrays
83 * that capture the mapping between offsets within a given segment and
84 * the corresponding anonymous backing storage address.
87 #ifdef DEBUG
88 #define ANON_DEBUG
89 #endif
91 #include <sys/types.h>
92 #include <sys/t_lock.h>
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/mman.h>
96 #include <sys/cred.h>
97 #include <sys/thread.h>
98 #include <sys/vnode.h>
99 #include <sys/cpuvar.h>
100 #include <sys/swap.h>
101 #include <sys/cmn_err.h>
102 #include <sys/vtrace.h>
103 #include <sys/kmem.h>
104 #include <sys/sysmacros.h>
105 #include <sys/bitmap.h>
106 #include <sys/vmsystm.h>
107 #include <sys/tuneable.h>
108 #include <sys/debug.h>
109 #include <sys/fs/swapnode.h>
110 #include <sys/tnf_probe.h>
111 #include <sys/lgrp.h>
112 #include <sys/policy.h>
113 #include <sys/condvar_impl.h>
114 #include <sys/mutex_impl.h>
115 #include <sys/rctl.h>
117 #include <vm/as.h>
118 #include <vm/hat.h>
119 #include <vm/anon.h>
120 #include <vm/page.h>
121 #include <vm/vpage.h>
122 #include <vm/seg.h>
123 #include <vm/rm.h>
125 #include <sys/fs_subr.h>
127 struct vnode *anon_vp;
129 int anon_debug;
131 kmutex_t anoninfo_lock;
132 struct k_anoninfo k_anoninfo;
133 ani_free_t *ani_free_pool;
134 pad_mutex_t anon_array_lock[ANON_LOCKSIZE];
135 kcondvar_t anon_array_cv[ANON_LOCKSIZE];
138 * Global hash table for (vp, off) -> anon slot
140 extern int swap_maxcontig;
141 size_t anon_hash_size;
142 unsigned int anon_hash_shift;
143 struct anon **anon_hash;
145 static struct kmem_cache *anon_cache;
146 static struct kmem_cache *anonmap_cache;
148 pad_mutex_t *anonhash_lock;
151 * Used to make the increment of all refcnts of all anon slots of a large
152 * page appear to be atomic. The lock is grabbed for the first anon slot of
153 * a large page.
155 pad_mutex_t *anonpages_hash_lock;
157 #define APH_MUTEX(vp, off) \
158 (&anonpages_hash_lock[(ANON_HASH((vp), (off)) & \
159 (AH_LOCK_SIZE - 1))].pad_mutex)
161 #ifdef VM_STATS
162 static struct anonvmstats_str {
163 ulong_t getpages[30];
164 ulong_t privatepages[10];
165 ulong_t demotepages[9];
166 ulong_t decrefpages[9];
167 ulong_t dupfillholes[4];
168 ulong_t freepages[1];
169 } anonvmstats;
170 #endif /* VM_STATS */
172 /*ARGSUSED*/
173 static int
174 anonmap_cache_constructor(void *buf, void *cdrarg, int kmflags)
176 struct anon_map *amp = buf;
178 rw_init(&amp->a_rwlock, NULL, RW_DEFAULT, NULL);
179 cv_init(&amp->a_purgecv, NULL, CV_DEFAULT, NULL);
180 mutex_init(&amp->a_pmtx, NULL, MUTEX_DEFAULT, NULL);
181 mutex_init(&amp->a_purgemtx, NULL, MUTEX_DEFAULT, NULL);
182 return (0);
185 /*ARGSUSED1*/
186 static void
187 anonmap_cache_destructor(void *buf, void *cdrarg)
189 struct anon_map *amp = buf;
191 rw_destroy(&amp->a_rwlock);
192 cv_destroy(&amp->a_purgecv);
193 mutex_destroy(&amp->a_pmtx);
194 mutex_destroy(&amp->a_purgemtx);
197 void
198 anon_init(void)
200 int i;
201 pad_mutex_t *tmp;
203 /* These both need to be powers of 2 so round up to the next power */
204 anon_hash_shift = highbit((physmem / ANON_HASHAVELEN) - 1);
205 anon_hash_size = 1L << anon_hash_shift;
208 * We need to align the anonhash_lock and anonpages_hash_lock arrays
209 * to a 64B boundary to avoid false sharing. We add 63B to our
210 * allocation so that we can get a 64B aligned address to use.
211 * We allocate both of these together to avoid wasting an additional
212 * 63B.
214 tmp = kmem_zalloc((2 * AH_LOCK_SIZE * sizeof (pad_mutex_t)) + 63,
215 KM_SLEEP);
216 anonhash_lock = (pad_mutex_t *)P2ROUNDUP((uintptr_t)tmp, 64);
217 anonpages_hash_lock = anonhash_lock + AH_LOCK_SIZE;
219 for (i = 0; i < AH_LOCK_SIZE; i++) {
220 mutex_init(&anonhash_lock[i].pad_mutex, NULL, MUTEX_DEFAULT,
221 NULL);
222 mutex_init(&anonpages_hash_lock[i].pad_mutex, NULL,
223 MUTEX_DEFAULT, NULL);
226 for (i = 0; i < ANON_LOCKSIZE; i++) {
227 mutex_init(&anon_array_lock[i].pad_mutex, NULL,
228 MUTEX_DEFAULT, NULL);
229 cv_init(&anon_array_cv[i], NULL, CV_DEFAULT, NULL);
232 anon_hash = (struct anon **)
233 kmem_zalloc(sizeof (struct anon *) * anon_hash_size, KM_SLEEP);
234 anon_cache = kmem_cache_create("anon_cache", sizeof (struct anon),
235 AN_CACHE_ALIGN, NULL, NULL, NULL, NULL, NULL, KMC_PREFILL);
236 anonmap_cache = kmem_cache_create("anonmap_cache",
237 sizeof (struct anon_map), 0,
238 anonmap_cache_constructor, anonmap_cache_destructor, NULL,
239 NULL, NULL, 0);
240 swap_maxcontig = (1024 * 1024) >> PAGESHIFT; /* 1MB of pages */
242 tmp = kmem_zalloc((ANI_MAX_POOL * sizeof (ani_free_t)) + 63, KM_SLEEP);
243 /* Round ani_free_pool to cacheline boundary to avoid false sharing. */
244 ani_free_pool = (ani_free_t *)P2ROUNDUP((uintptr_t)tmp, 64);
246 anon_vp = vn_alloc(KM_SLEEP);
247 vn_setops(anon_vp, &swap_vnodeops);
248 anon_vp->v_type = VREG;
249 anon_vp->v_flag |= (VISSWAP|VISSWAPFS);
253 * Global anon slot hash table manipulation.
256 static void
257 anon_addhash(struct anon *ap)
259 int index;
261 ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off)));
262 index = ANON_HASH(ap->an_vp, ap->an_off);
263 ap->an_hash = anon_hash[index];
264 anon_hash[index] = ap;
267 static void
268 anon_rmhash(struct anon *ap)
270 struct anon **app;
272 ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off)));
274 for (app = &anon_hash[ANON_HASH(ap->an_vp, ap->an_off)];
275 *app; app = &((*app)->an_hash)) {
276 if (*app == ap) {
277 *app = ap->an_hash;
278 break;
284 * The anon array interfaces. Functions allocating,
285 * freeing array of pointers, and returning/setting
286 * entries in the array of pointers for a given offset.
288 * Create the list of pointers
290 struct anon_hdr *
291 anon_create(pgcnt_t npages, int flags)
293 struct anon_hdr *ahp;
294 ulong_t nchunks;
295 int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
297 if ((ahp = kmem_zalloc(sizeof (struct anon_hdr), kmemflags)) == NULL) {
298 return (NULL);
301 mutex_init(&ahp->serial_lock, NULL, MUTEX_DEFAULT, NULL);
303 * Single level case.
305 ahp->size = npages;
306 if (npages <= ANON_CHUNK_SIZE || (flags & ANON_ALLOC_FORCE)) {
308 if (flags & ANON_ALLOC_FORCE)
309 ahp->flags |= ANON_ALLOC_FORCE;
311 ahp->array_chunk = kmem_zalloc(
312 ahp->size * sizeof (struct anon *), kmemflags);
314 if (ahp->array_chunk == NULL) {
315 kmem_free(ahp, sizeof (struct anon_hdr));
316 return (NULL);
318 } else {
320 * 2 Level case.
321 * anon hdr size needs to be rounded off to be a multiple
322 * of ANON_CHUNK_SIZE. This is important as various anon
323 * related functions depend on this.
324 * NOTE -
325 * anon_grow() makes anon hdr size a multiple of
326 * ANON_CHUNK_SIZE.
327 * amp size is <= anon hdr size.
328 * anon_index + seg_pgs <= anon hdr size.
330 ahp->size = P2ROUNDUP(npages, ANON_CHUNK_SIZE);
331 nchunks = ahp->size >> ANON_CHUNK_SHIFT;
333 ahp->array_chunk = kmem_zalloc(nchunks * sizeof (ulong_t *),
334 kmemflags);
336 if (ahp->array_chunk == NULL) {
337 kmem_free(ahp, sizeof (struct anon_hdr));
338 return (NULL);
341 return (ahp);
345 * Free the array of pointers
347 void
348 anon_release(struct anon_hdr *ahp, pgcnt_t npages)
350 ulong_t i;
351 void **ppp;
352 ulong_t nchunks;
354 ASSERT(npages <= ahp->size);
357 * Single level case.
359 if (npages <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) {
360 kmem_free(ahp->array_chunk, ahp->size * sizeof (struct anon *));
361 } else {
363 * 2 level case.
365 nchunks = ahp->size >> ANON_CHUNK_SHIFT;
366 for (i = 0; i < nchunks; i++) {
367 ppp = &ahp->array_chunk[i];
368 if (*ppp != NULL)
369 kmem_free(*ppp, PAGESIZE);
371 kmem_free(ahp->array_chunk, nchunks * sizeof (ulong_t *));
373 mutex_destroy(&ahp->serial_lock);
374 kmem_free(ahp, sizeof (struct anon_hdr));
378 * Return the pointer from the list for a
379 * specified anon index.
381 struct anon *
382 anon_get_ptr(struct anon_hdr *ahp, ulong_t an_idx)
384 struct anon **app;
386 ASSERT(an_idx < ahp->size);
389 * Single level case.
391 if ((ahp->size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) {
392 return ((struct anon *)
393 ((uintptr_t)ahp->array_chunk[an_idx] & ANON_PTRMASK));
394 } else {
397 * 2 level case.
399 app = ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
400 if (app) {
401 return ((struct anon *)
402 ((uintptr_t)app[an_idx & ANON_CHUNK_OFF] &
403 ANON_PTRMASK));
404 } else {
405 return (NULL);
411 * Return the anon pointer for the first valid entry in the anon list,
412 * starting from the given index.
414 struct anon *
415 anon_get_next_ptr(struct anon_hdr *ahp, ulong_t *index)
417 struct anon *ap;
418 struct anon **app;
419 ulong_t chunkoff;
420 ulong_t i;
421 ulong_t j;
422 pgcnt_t size;
424 i = *index;
425 size = ahp->size;
427 ASSERT(i < size);
429 if ((size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) {
431 * 1 level case
433 while (i < size) {
434 ap = (struct anon *)
435 ((uintptr_t)ahp->array_chunk[i] & ANON_PTRMASK);
436 if (ap) {
437 *index = i;
438 return (ap);
440 i++;
442 } else {
444 * 2 level case
446 chunkoff = i & ANON_CHUNK_OFF;
447 while (i < size) {
448 app = ahp->array_chunk[i >> ANON_CHUNK_SHIFT];
449 if (app)
450 for (j = chunkoff; j < ANON_CHUNK_SIZE; j++) {
451 ap = (struct anon *)
452 ((uintptr_t)app[j] & ANON_PTRMASK);
453 if (ap) {
454 *index = i + (j - chunkoff);
455 return (ap);
458 chunkoff = 0;
459 i = (i + ANON_CHUNK_SIZE) & ~ANON_CHUNK_OFF;
462 *index = size;
463 return (NULL);
467 * Set list entry with a given pointer for a specified offset
470 anon_set_ptr(struct anon_hdr *ahp, ulong_t an_idx, struct anon *ap, int flags)
472 void **ppp;
473 struct anon **app;
474 int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
475 uintptr_t *ap_addr;
477 ASSERT(an_idx < ahp->size);
480 * Single level case.
482 if (ahp->size <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) {
483 ap_addr = (uintptr_t *)&ahp->array_chunk[an_idx];
484 } else {
487 * 2 level case.
489 ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
491 ASSERT(ppp != NULL);
492 if (*ppp == NULL) {
493 mutex_enter(&ahp->serial_lock);
494 ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
495 if (*ppp == NULL) {
496 *ppp = kmem_zalloc(PAGESIZE, kmemflags);
497 if (*ppp == NULL) {
498 mutex_exit(&ahp->serial_lock);
499 return (ENOMEM);
502 mutex_exit(&ahp->serial_lock);
504 app = *ppp;
505 ap_addr = (uintptr_t *)&app[an_idx & ANON_CHUNK_OFF];
507 *ap_addr = (*ap_addr & ~ANON_PTRMASK) | (uintptr_t)ap;
508 return (0);
512 * Copy anon array into a given new anon array
515 anon_copy_ptr(struct anon_hdr *sahp, ulong_t s_idx,
516 struct anon_hdr *dahp, ulong_t d_idx,
517 pgcnt_t npages, int flags)
519 void **sapp, **dapp;
520 void *ap;
521 int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
523 ASSERT((s_idx < sahp->size) && (d_idx < dahp->size));
524 ASSERT((npages <= sahp->size) && (npages <= dahp->size));
527 * Both arrays are 1 level.
529 if (((sahp->size <= ANON_CHUNK_SIZE) &&
530 (dahp->size <= ANON_CHUNK_SIZE)) ||
531 ((sahp->flags & ANON_ALLOC_FORCE) &&
532 (dahp->flags & ANON_ALLOC_FORCE))) {
534 bcopy(&sahp->array_chunk[s_idx], &dahp->array_chunk[d_idx],
535 npages * sizeof (struct anon *));
536 return (0);
540 * Both arrays are 2 levels.
542 if (sahp->size > ANON_CHUNK_SIZE &&
543 dahp->size > ANON_CHUNK_SIZE &&
544 ((sahp->flags & ANON_ALLOC_FORCE) == 0) &&
545 ((dahp->flags & ANON_ALLOC_FORCE) == 0)) {
547 ulong_t sapidx, dapidx;
548 ulong_t *sap, *dap;
549 ulong_t chknp;
551 while (npages != 0) {
553 sapidx = s_idx & ANON_CHUNK_OFF;
554 dapidx = d_idx & ANON_CHUNK_OFF;
555 chknp = ANON_CHUNK_SIZE - MAX(sapidx, dapidx);
556 if (chknp > npages)
557 chknp = npages;
559 sapp = &sahp->array_chunk[s_idx >> ANON_CHUNK_SHIFT];
560 if ((sap = *sapp) != NULL) {
561 dapp = &dahp->array_chunk[d_idx
562 >> ANON_CHUNK_SHIFT];
563 if ((dap = *dapp) == NULL) {
564 *dapp = kmem_zalloc(PAGESIZE,
565 kmemflags);
566 if ((dap = *dapp) == NULL)
567 return (ENOMEM);
569 bcopy((sap + sapidx), (dap + dapidx),
570 chknp << ANON_PTRSHIFT);
572 s_idx += chknp;
573 d_idx += chknp;
574 npages -= chknp;
576 return (0);
580 * At least one of the arrays is 2 level.
582 while (npages--) {
583 if ((ap = anon_get_ptr(sahp, s_idx)) != NULL) {
584 ASSERT(!ANON_ISBUSY(anon_get_slot(sahp, s_idx)));
585 if (anon_set_ptr(dahp, d_idx, ap, flags) == ENOMEM)
586 return (ENOMEM);
588 s_idx++;
589 d_idx++;
591 return (0);
596 * ANON_INITBUF is a convenience macro for anon_grow() below. It
597 * takes a buffer dst, which is at least as large as buffer src. It
598 * does a bcopy from src into dst, and then bzeros the extra bytes
599 * of dst. If tail is set, the data in src is tail aligned within
600 * dst instead of head aligned.
603 #define ANON_INITBUF(src, srclen, dst, dstsize, tail) \
604 if (tail) { \
605 bzero((dst), (dstsize) - (srclen)); \
606 bcopy((src), (char *)(dst) + (dstsize) - (srclen), (srclen)); \
607 } else { \
608 bcopy((src), (dst), (srclen)); \
609 bzero((char *)(dst) + (srclen), (dstsize) - (srclen)); \
612 #define ANON_1_LEVEL_INC (ANON_CHUNK_SIZE / 8)
613 #define ANON_2_LEVEL_INC (ANON_1_LEVEL_INC * ANON_CHUNK_SIZE)
616 * anon_grow() is used to efficiently extend an existing anon array.
617 * startidx_p points to the index into the anon array of the first page
618 * that is in use. oldseg_pgs is the number of pages in use, starting at
619 * *startidx_p. newpages is the number of additional pages desired.
621 * If startidx_p == NULL, startidx is taken to be 0 and cannot be changed.
623 * The growth is done by creating a new top level of the anon array,
624 * and (if the array is 2-level) reusing the existing second level arrays.
626 * flags can be used to specify ANON_NOSLEEP and ANON_GROWDOWN.
628 * Returns the new number of pages in the anon array.
630 pgcnt_t
631 anon_grow(struct anon_hdr *ahp, ulong_t *startidx_p, pgcnt_t oldseg_pgs,
632 pgcnt_t newseg_pgs, int flags)
634 ulong_t startidx = startidx_p ? *startidx_p : 0;
635 pgcnt_t oldamp_pgs = ahp->size, newamp_pgs;
636 pgcnt_t oelems, nelems, totpages;
637 void **level1;
638 int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
639 int growdown = (flags & ANON_GROWDOWN);
640 size_t newarrsz, oldarrsz;
641 void *level2;
643 ASSERT(!(startidx_p == NULL && growdown));
644 ASSERT(startidx + oldseg_pgs <= ahp->size);
647 * Determine the total number of pages needed in the new
648 * anon array. If growing down, totpages is all pages from
649 * startidx through the end of the array, plus <newseg_pgs>
650 * pages. If growing up, keep all pages from page 0 through
651 * the last page currently in use, plus <newseg_pgs> pages.
653 if (growdown)
654 totpages = oldamp_pgs - startidx + newseg_pgs;
655 else
656 totpages = startidx + oldseg_pgs + newseg_pgs;
658 /* If the array is already large enough, just return. */
660 if (oldamp_pgs >= totpages) {
661 if (growdown)
662 *startidx_p = oldamp_pgs - totpages;
663 return (oldamp_pgs);
667 * oldamp_pgs/newamp_pgs are the total numbers of pages represented
668 * by the corresponding arrays.
669 * oelems/nelems are the number of pointers in the top level arrays
670 * which may be either level 1 or level 2.
671 * Will the new anon array be one level or two levels?
673 if (totpages <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) {
674 newamp_pgs = P2ROUNDUP(totpages, ANON_1_LEVEL_INC);
675 oelems = oldamp_pgs;
676 nelems = newamp_pgs;
677 } else {
678 newamp_pgs = P2ROUNDUP(totpages, ANON_2_LEVEL_INC);
679 oelems = (oldamp_pgs + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
680 nelems = newamp_pgs >> ANON_CHUNK_SHIFT;
683 newarrsz = nelems * sizeof (void *);
684 level1 = kmem_alloc(newarrsz, kmemflags);
685 if (level1 == NULL)
686 return (0);
688 /* Are we converting from a one level to a two level anon array? */
690 if (newamp_pgs > ANON_CHUNK_SIZE && oldamp_pgs <= ANON_CHUNK_SIZE &&
691 !(ahp->flags & ANON_ALLOC_FORCE)) {
694 * Yes, we're converting to a two level. Reuse old level 1
695 * as new level 2 if it is exactly PAGESIZE. Otherwise
696 * alloc a new level 2 and copy the old level 1 data into it.
698 if (oldamp_pgs == ANON_CHUNK_SIZE) {
699 level2 = (void *)ahp->array_chunk;
700 } else {
701 level2 = kmem_alloc(PAGESIZE, kmemflags);
702 if (level2 == NULL) {
703 kmem_free(level1, newarrsz);
704 return (0);
706 oldarrsz = oldamp_pgs * sizeof (void *);
708 ANON_INITBUF(ahp->array_chunk, oldarrsz,
709 level2, PAGESIZE, growdown);
710 kmem_free(ahp->array_chunk, oldarrsz);
712 bzero(level1, newarrsz);
713 if (growdown)
714 level1[nelems - 1] = level2;
715 else
716 level1[0] = level2;
717 } else {
718 oldarrsz = oelems * sizeof (void *);
720 ANON_INITBUF(ahp->array_chunk, oldarrsz,
721 level1, newarrsz, growdown);
722 kmem_free(ahp->array_chunk, oldarrsz);
725 ahp->array_chunk = level1;
726 ahp->size = newamp_pgs;
727 if (growdown)
728 *startidx_p = newamp_pgs - totpages;
730 return (newamp_pgs);
735 * Called to sync ani_free value.
738 void
739 set_anoninfo(void)
741 processorid_t ix, max_seqid;
742 pgcnt_t total = 0;
743 static clock_t last_time;
744 clock_t new_time;
746 if (ani_free_pool == NULL)
747 return;
750 * Recompute ani_free at most once per tick. Use max_cpu_seqid_ever to
751 * identify the maximum number of CPUs were ever online.
753 new_time = ddi_get_lbolt();
754 if (new_time > last_time) {
756 max_seqid = max_cpu_seqid_ever;
757 ASSERT(ANI_MAX_POOL > max_seqid);
758 for (ix = 0; ix <= max_seqid; ix++)
759 total += ani_free_pool[ix].ani_count;
761 last_time = new_time;
762 k_anoninfo.ani_free = total;
767 * Reserve anon space.
769 * It's no longer simply a matter of incrementing ani_resv to
770 * reserve swap space, we need to check memory-based as well
771 * as disk-backed (physical) swap. The following algorithm
772 * is used:
773 * Check the space on physical swap
774 * i.e. amount needed < ani_max - ani_phys_resv
775 * If we are swapping on swapfs check
776 * amount needed < (availrmem - swapfs_minfree)
777 * Since the algorithm to check for the quantity of swap space is
778 * almost the same as that for reserving it, we'll just use anon_resvmem
779 * with a flag to decrement availrmem.
781 * Return non-zero on success.
784 anon_resvmem(size_t size, boolean_t takemem, zone_t *zone, int tryhard)
786 pgcnt_t npages = btopr(size);
787 pgcnt_t mswap_pages = 0;
788 pgcnt_t pswap_pages = 0;
789 proc_t *p = curproc;
791 if (zone != NULL) {
792 /* test zone.max-swap resource control */
793 mutex_enter(&p->p_lock);
794 if (rctl_incr_swap(p, zone, ptob(npages)) != 0) {
795 mutex_exit(&p->p_lock);
797 if (takemem)
798 atomic_add_64(&zone->zone_anon_alloc_fail, 1);
800 return (0);
803 if (!takemem)
804 rctl_decr_swap(zone, ptob(npages));
806 mutex_exit(&p->p_lock);
808 mutex_enter(&anoninfo_lock);
811 * pswap_pages is the number of pages we can take from
812 * physical (i.e. disk-backed) swap.
814 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
815 pswap_pages = k_anoninfo.ani_max - k_anoninfo.ani_phys_resv;
817 ANON_PRINT(A_RESV,
818 ("anon_resvmem: npages %lu takemem %u pswap %lu caller %p\n",
819 npages, takemem, pswap_pages, (void *)caller()));
821 if (npages <= pswap_pages) {
823 * we have enough space on a physical swap
825 if (takemem)
826 k_anoninfo.ani_phys_resv += npages;
827 mutex_exit(&anoninfo_lock);
828 return (1);
829 } else if (pswap_pages != 0) {
831 * we have some space on a physical swap
833 if (takemem) {
835 * use up remainder of phys swap
837 k_anoninfo.ani_phys_resv += pswap_pages;
838 ASSERT(k_anoninfo.ani_phys_resv == k_anoninfo.ani_max);
842 * since (npages > pswap_pages) we need mem swap
843 * mswap_pages is the number of pages needed from availrmem
845 ASSERT(npages > pswap_pages);
846 mswap_pages = npages - pswap_pages;
848 ANON_PRINT(A_RESV, ("anon_resvmem: need %ld pages from memory\n",
849 mswap_pages));
852 * priv processes can reserve memory as swap as long as availrmem
853 * remains greater than swapfs_minfree; in the case of non-priv
854 * processes, memory can be reserved as swap only if availrmem
855 * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus,
856 * swapfs_reserve amount of memswap is not available to non-priv
857 * processes. This protects daemons such as automounter dying
858 * as a result of application processes eating away almost entire
859 * membased swap. This safeguard becomes useless if apps are run
860 * with root access.
862 * swapfs_reserve is minimum of 4Mb or 1/16 of physmem.
865 if (tryhard) {
866 pgcnt_t floor_pages;
868 if (secpolicy_resource_anon_mem(CRED())) {
869 floor_pages = swapfs_minfree;
870 } else {
871 floor_pages = swapfs_minfree + swapfs_reserve;
874 mutex_exit(&anoninfo_lock);
875 (void) page_reclaim_mem(mswap_pages, floor_pages, 0);
876 mutex_enter(&anoninfo_lock);
879 mutex_enter(&freemem_lock);
880 if (availrmem > (swapfs_minfree + swapfs_reserve + mswap_pages) ||
881 (availrmem > (swapfs_minfree + mswap_pages) &&
882 secpolicy_resource(CRED()) == 0)) {
884 if (takemem) {
886 * Take the memory from the rest of the system.
888 availrmem -= mswap_pages;
889 mutex_exit(&freemem_lock);
890 k_anoninfo.ani_mem_resv += mswap_pages;
891 ANI_ADD(mswap_pages);
892 ANON_PRINT((A_RESV | A_MRESV),
893 ("anon_resvmem: took %ld pages of availrmem\n",
894 mswap_pages));
895 } else {
896 mutex_exit(&freemem_lock);
899 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
900 mutex_exit(&anoninfo_lock);
901 return (1);
902 } else {
904 * Fail if not enough memory
906 if (takemem) {
907 k_anoninfo.ani_phys_resv -= pswap_pages;
910 mutex_exit(&freemem_lock);
911 mutex_exit(&anoninfo_lock);
912 ANON_PRINT(A_RESV,
913 ("anon_resvmem: not enough space from swapfs\n"));
914 if (zone != NULL && takemem)
915 rctl_decr_swap(zone, ptob(npages));
916 return (0);
921 * Give back an anon reservation.
923 void
924 anon_unresvmem(size_t size, zone_t *zone)
926 pgcnt_t npages = btopr(size);
927 spgcnt_t mem_free_pages = 0;
928 pgcnt_t phys_free_slots;
929 #ifdef ANON_DEBUG
930 pgcnt_t mem_resv;
931 #endif
932 if (zone != NULL)
933 rctl_decr_swap(zone, ptob(npages));
935 mutex_enter(&anoninfo_lock);
937 ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap);
940 * If some of this reservation belonged to swapfs
941 * give it back to availrmem.
942 * ani_mem_resv is the amount of availrmem swapfs has reserved.
943 * but some of that memory could be locked by segspt so we can only
944 * return non locked ani_mem_resv back to availrmem
946 if (k_anoninfo.ani_mem_resv > k_anoninfo.ani_locked_swap) {
947 ANON_PRINT((A_RESV | A_MRESV),
948 ("anon_unresv: growing availrmem by %ld pages\n",
949 MIN(k_anoninfo.ani_mem_resv, npages)));
951 mem_free_pages = MIN((spgcnt_t)(k_anoninfo.ani_mem_resv -
952 k_anoninfo.ani_locked_swap), npages);
953 mutex_enter(&freemem_lock);
954 availrmem += mem_free_pages;
955 mutex_exit(&freemem_lock);
956 k_anoninfo.ani_mem_resv -= mem_free_pages;
958 ANI_ADD(-mem_free_pages);
961 * The remainder of the pages is returned to phys swap
963 ASSERT(npages >= mem_free_pages);
964 phys_free_slots = npages - mem_free_pages;
966 if (phys_free_slots) {
967 k_anoninfo.ani_phys_resv -= phys_free_slots;
970 #ifdef ANON_DEBUG
971 mem_resv = k_anoninfo.ani_mem_resv;
972 #endif
974 ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap);
975 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
977 mutex_exit(&anoninfo_lock);
979 ANON_PRINT(A_RESV, ("anon_unresv: %lu, tot %lu, caller %p\n",
980 npages, mem_resv, (void *)caller()));
984 * Allocate an anon slot and return it with the lock held.
986 struct anon *
987 anon_alloc(struct vnode *vp, anoff_t off)
989 struct anon *ap;
990 kmutex_t *ahm;
992 ap = kmem_cache_alloc(anon_cache, KM_SLEEP);
993 if (vp == NULL) {
994 swap_alloc(ap);
995 } else {
996 ap->an_vp = vp;
997 ap->an_off = off;
999 ap->an_refcnt = 1;
1000 ap->an_pvp = NULL;
1001 ap->an_poff = 0;
1002 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1003 mutex_enter(ahm);
1004 anon_addhash(ap);
1005 mutex_exit(ahm);
1006 ANI_ADD(-1);
1007 ANON_PRINT(A_ANON, ("anon_alloc: returning ap %p, vp %p\n",
1008 (void *)ap, (ap ? (void *)ap->an_vp : NULL)));
1009 return (ap);
1013 * Called for pages locked in memory via softlock/pagelock/mlock to make sure
1014 * such pages don't consume any physical swap resources needed for swapping
1015 * unlocked pages.
1017 void
1018 anon_swap_free(struct anon *ap, page_t *pp)
1020 kmutex_t *ahm;
1022 ASSERT(ap != NULL);
1023 ASSERT(pp != NULL);
1024 ASSERT(PAGE_LOCKED(pp));
1025 VERIFY(pp->p_object != NULL);
1026 ASSERT(pp->p_vnode != NULL);
1027 ASSERT(IS_SWAPFSVP(pp->p_vnode));
1028 ASSERT(ap->an_refcnt != 0);
1029 VERIFY(pp->p_object == &ap->an_vp->v_object);
1030 ASSERT(pp->p_vnode == ap->an_vp);
1031 ASSERT(pp->p_offset == ap->an_off);
1033 if (ap->an_pvp == NULL)
1034 return;
1036 page_io_lock(pp);
1037 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1038 mutex_enter(ahm);
1040 ASSERT(ap->an_refcnt != 0);
1041 VERIFY(pp->p_object == &ap->an_vp->v_object);
1042 ASSERT(pp->p_vnode == ap->an_vp);
1043 ASSERT(pp->p_offset == ap->an_off);
1045 if (ap->an_pvp != NULL) {
1046 swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE);
1047 ap->an_pvp = NULL;
1048 ap->an_poff = 0;
1049 mutex_exit(ahm);
1050 hat_setmod(pp);
1051 } else {
1052 mutex_exit(ahm);
1054 page_io_unlock(pp);
1058 * Decrement the reference count of an anon page.
1059 * If reference count goes to zero, free it and
1060 * its associated page (if any).
1062 void
1063 anon_decref(struct anon *ap)
1065 page_t *pp;
1066 struct vnode *vp;
1067 anoff_t off;
1068 kmutex_t *ahm;
1070 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1071 mutex_enter(ahm);
1072 ASSERT(ap->an_refcnt != 0);
1073 if (ap->an_refcnt == 0)
1074 panic("anon_decref: slot count 0");
1075 if (--ap->an_refcnt == 0) {
1076 swap_xlate(ap, &vp, &off);
1077 anon_rmhash(ap);
1078 if (ap->an_pvp != NULL)
1079 swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE);
1080 mutex_exit(ahm);
1083 * If there is a page for this anon slot we will need to
1084 * call VN_DISPOSE to get rid of the vp association and
1085 * put the page back on the free list as really free.
1086 * Acquire the "exclusive" lock to ensure that any
1087 * pending i/o always completes before the swap slot
1088 * is freed.
1090 pp = page_lookup(&vp->v_object, (uoff_t)off, SE_EXCL);
1091 if (pp != NULL) {
1092 VN_DISPOSE(pp, B_INVAL, 0, kcred);
1094 ANON_PRINT(A_ANON, ("anon_decref: free ap %p, vp %p\n",
1095 (void *)ap, (void *)ap->an_vp));
1097 kmem_cache_free(anon_cache, ap);
1099 ANI_ADD(1);
1100 } else {
1101 mutex_exit(ahm);
1107 * check an_refcnt of the root anon slot (anon_index argument is aligned at
1108 * seg->s_szc level) to determine whether COW processing is required.
1109 * anonpages_hash_lock[] held on the root ap ensures that if root's
1110 * refcnt is 1 all other refcnt's are 1 as well (and they can't increase
1111 * later since this process can't fork while its AS lock is held).
1113 * returns 1 if the root anon slot has a refcnt > 1 otherwise returns 0.
1116 anon_szcshare(struct anon_hdr *ahp, ulong_t anon_index)
1118 struct anon *ap;
1119 kmutex_t *ahmpages = NULL;
1121 ap = anon_get_ptr(ahp, anon_index);
1122 if (ap == NULL)
1123 return (0);
1125 ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
1126 mutex_enter(ahmpages);
1127 ASSERT(ap->an_refcnt >= 1);
1128 if (ap->an_refcnt == 1) {
1129 mutex_exit(ahmpages);
1130 return (0);
1132 mutex_exit(ahmpages);
1133 return (1);
1136 * Check 'nslots' anon slots for refcnt > 1.
1138 * returns 1 if any of the 'nslots' anon slots has a refcnt > 1 otherwise
1139 * returns 0.
1141 static int
1142 anon_share(struct anon_hdr *ahp, ulong_t anon_index, pgcnt_t nslots)
1144 struct anon *ap;
1146 while (nslots-- > 0) {
1147 if ((ap = anon_get_ptr(ahp, anon_index)) != NULL &&
1148 ap->an_refcnt > 1)
1149 return (1);
1150 anon_index++;
1153 return (0);
1156 static void
1157 anon_decref_pages(
1158 struct anon_hdr *ahp,
1159 ulong_t an_idx,
1160 uint_t szc)
1162 struct anon *ap = anon_get_ptr(ahp, an_idx);
1163 kmutex_t *ahmpages = NULL;
1164 page_t *pp;
1165 pgcnt_t pgcnt = page_get_pagecnt(szc);
1166 pgcnt_t i;
1167 struct vnode *vp;
1168 anoff_t off;
1169 kmutex_t *ahm;
1170 #ifdef DEBUG
1171 int refcnt = 1;
1172 #endif
1174 ASSERT(szc != 0);
1175 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
1176 ASSERT(IS_P2ALIGNED(an_idx, pgcnt));
1177 ASSERT(an_idx < ahp->size);
1179 if (ahp->size - an_idx < pgcnt) {
1181 * In case of shared mappings total anon map size may not be
1182 * the largest page size aligned.
1184 pgcnt = ahp->size - an_idx;
1187 VM_STAT_ADD(anonvmstats.decrefpages[0]);
1189 if (ap != NULL) {
1190 ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
1191 mutex_enter(ahmpages);
1192 ASSERT((refcnt = ap->an_refcnt) != 0);
1193 VM_STAT_ADD(anonvmstats.decrefpages[1]);
1194 if (ap->an_refcnt == 1) {
1195 VM_STAT_ADD(anonvmstats.decrefpages[2]);
1196 ASSERT(!anon_share(ahp, an_idx, pgcnt));
1197 mutex_exit(ahmpages);
1198 ahmpages = NULL;
1202 i = 0;
1203 while (i < pgcnt) {
1204 if ((ap = anon_get_ptr(ahp, an_idx + i)) == NULL) {
1205 ASSERT(refcnt == 1 && ahmpages == NULL);
1206 i++;
1207 continue;
1209 ASSERT(ap->an_refcnt == refcnt);
1210 ASSERT(ahmpages != NULL || ap->an_refcnt == 1);
1211 ASSERT(ahmpages == NULL || ap->an_refcnt > 1);
1213 if (ahmpages == NULL) {
1214 swap_xlate(ap, &vp, &off);
1215 pp = page_lookup(&vp->v_object, (uoff_t)off, SE_EXCL);
1216 if (pp == NULL || pp->p_szc == 0) {
1217 VM_STAT_ADD(anonvmstats.decrefpages[3]);
1218 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1219 (void) anon_set_ptr(ahp, an_idx + i, NULL,
1220 ANON_SLEEP);
1221 mutex_enter(ahm);
1222 ap->an_refcnt--;
1223 ASSERT(ap->an_refcnt == 0);
1224 anon_rmhash(ap);
1225 if (ap->an_pvp)
1226 swap_phys_free(ap->an_pvp, ap->an_poff,
1227 PAGESIZE);
1228 mutex_exit(ahm);
1229 if (pp == NULL) {
1230 pp = page_lookup(&vp->v_object,
1231 (uoff_t)off, SE_EXCL);
1232 ASSERT(pp == NULL || pp->p_szc == 0);
1234 if (pp != NULL) {
1235 VM_STAT_ADD(anonvmstats.decrefpages[4]);
1236 VN_DISPOSE(pp, B_INVAL, 0, kcred);
1238 kmem_cache_free(anon_cache, ap);
1239 ANI_ADD(1);
1240 i++;
1241 } else {
1242 pgcnt_t j;
1243 pgcnt_t curpgcnt =
1244 page_get_pagecnt(pp->p_szc);
1245 size_t ppasize = curpgcnt * sizeof (page_t *);
1246 page_t **ppa = kmem_alloc(ppasize, KM_SLEEP);
1247 int dispose = 0;
1249 VM_STAT_ADD(anonvmstats.decrefpages[5]);
1251 ASSERT(pp->p_szc <= szc);
1252 ASSERT(IS_P2ALIGNED(curpgcnt, curpgcnt));
1253 ASSERT(IS_P2ALIGNED(i, curpgcnt));
1254 ASSERT(i + curpgcnt <= pgcnt);
1255 ASSERT(!(page_pptonum(pp) & (curpgcnt - 1)));
1256 ppa[0] = pp;
1257 for (j = i + 1; j < i + curpgcnt; j++) {
1258 ap = anon_get_ptr(ahp, an_idx + j);
1259 ASSERT(ap != NULL &&
1260 ap->an_refcnt == 1);
1261 swap_xlate(ap, &vp, &off);
1262 pp = page_lookup(&vp->v_object,
1263 (uoff_t)off, SE_EXCL);
1264 if (pp == NULL)
1265 panic("anon_decref_pages: "
1266 "no page");
1268 (void) hat_pageunload(pp,
1269 HAT_FORCE_PGUNLOAD);
1270 ASSERT(pp->p_szc == ppa[0]->p_szc);
1271 ASSERT(page_pptonum(pp) - 1 ==
1272 page_pptonum(ppa[j - i - 1]));
1273 ppa[j - i] = pp;
1274 if (ap->an_pvp != NULL &&
1275 (ap->an_pvp->v_op->vop_dispose != fs_dispose &&
1276 ap->an_pvp->v_op->vop_dispose != NULL))
1277 dispose = 1;
1279 for (j = i; j < i + curpgcnt; j++) {
1280 ap = anon_get_ptr(ahp, an_idx + j);
1281 ASSERT(ap != NULL &&
1282 ap->an_refcnt == 1);
1283 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1284 (void) anon_set_ptr(ahp, an_idx + j,
1285 NULL, ANON_SLEEP);
1286 mutex_enter(ahm);
1287 ap->an_refcnt--;
1288 ASSERT(ap->an_refcnt == 0);
1289 anon_rmhash(ap);
1290 if (ap->an_pvp)
1291 swap_phys_free(ap->an_pvp,
1292 ap->an_poff, PAGESIZE);
1293 mutex_exit(ahm);
1294 kmem_cache_free(anon_cache, ap);
1295 ANI_ADD(1);
1297 if (!dispose) {
1298 VM_STAT_ADD(anonvmstats.decrefpages[6]);
1299 page_destroy_pages(ppa[0]);
1300 } else {
1301 VM_STAT_ADD(anonvmstats.decrefpages[7]);
1302 for (j = 0; j < curpgcnt; j++) {
1303 ASSERT(PAGE_EXCL(ppa[j]));
1304 ppa[j]->p_szc = 0;
1306 for (j = 0; j < curpgcnt; j++) {
1307 ASSERT(!hat_page_is_mapped(
1308 ppa[j]));
1309 VN_DISPOSE(ppa[j], B_INVAL, 0,
1310 kcred);
1313 kmem_free(ppa, ppasize);
1314 i += curpgcnt;
1316 } else {
1317 VM_STAT_ADD(anonvmstats.decrefpages[8]);
1318 (void) anon_set_ptr(ahp, an_idx + i, NULL, ANON_SLEEP);
1319 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1320 mutex_enter(ahm);
1321 ap->an_refcnt--;
1322 mutex_exit(ahm);
1323 i++;
1327 if (ahmpages != NULL) {
1328 mutex_exit(ahmpages);
1333 * Duplicate references to size bytes worth of anon pages.
1334 * Used when duplicating a segment that contains private anon pages.
1335 * This code assumes that procedure calling this one has already used
1336 * hat_chgprot() to disable write access to the range of addresses that
1337 * that *old actually refers to.
1339 void
1340 anon_dup(struct anon_hdr *old, ulong_t old_idx, struct anon_hdr *new,
1341 ulong_t new_idx, size_t size)
1343 spgcnt_t npages;
1344 kmutex_t *ahm;
1345 struct anon *ap;
1346 ulong_t off;
1347 ulong_t index;
1349 npages = btopr(size);
1350 while (npages > 0) {
1351 index = old_idx;
1352 if ((ap = anon_get_next_ptr(old, &index)) == NULL)
1353 break;
1355 ASSERT(!ANON_ISBUSY(anon_get_slot(old, index)));
1356 off = index - old_idx;
1357 npages -= off;
1358 if (npages <= 0)
1359 break;
1361 (void) anon_set_ptr(new, new_idx + off, ap, ANON_SLEEP);
1362 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1364 mutex_enter(ahm);
1365 ap->an_refcnt++;
1366 mutex_exit(ahm);
1368 off++;
1369 new_idx += off;
1370 old_idx += off;
1371 npages--;
1376 * Just like anon_dup but also guarantees there are no holes (unallocated anon
1377 * slots) within any large page region. That means if a large page region is
1378 * empty in the old array it will skip it. If there are 1 or more valid slots
1379 * in the large page region of the old array it will make sure to fill in any
1380 * unallocated ones and also copy them to the new array. If noalloc is 1 large
1381 * page region should either have no valid anon slots or all slots should be
1382 * valid.
1384 void
1385 anon_dup_fill_holes(
1386 struct anon_hdr *old,
1387 ulong_t old_idx,
1388 struct anon_hdr *new,
1389 ulong_t new_idx,
1390 size_t size,
1391 uint_t szc,
1392 int noalloc)
1394 struct anon *ap;
1395 spgcnt_t npages;
1396 kmutex_t *ahm, *ahmpages = NULL;
1397 pgcnt_t pgcnt, i;
1398 ulong_t index, off;
1399 #ifdef DEBUG
1400 int refcnt;
1401 #endif
1403 ASSERT(szc != 0);
1404 pgcnt = page_get_pagecnt(szc);
1405 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
1406 npages = btopr(size);
1407 ASSERT(IS_P2ALIGNED(npages, pgcnt));
1408 ASSERT(IS_P2ALIGNED(old_idx, pgcnt));
1410 VM_STAT_ADD(anonvmstats.dupfillholes[0]);
1412 while (npages > 0) {
1413 index = old_idx;
1416 * Find the next valid slot.
1418 if (anon_get_next_ptr(old, &index) == NULL)
1419 break;
1421 ASSERT(!ANON_ISBUSY(anon_get_slot(old, index)));
1423 * Now backup index to the beginning of the
1424 * current large page region of the old array.
1426 index = P2ALIGN(index, pgcnt);
1427 off = index - old_idx;
1428 ASSERT(IS_P2ALIGNED(off, pgcnt));
1429 npages -= off;
1430 if (npages <= 0)
1431 break;
1434 * Fill and copy a large page regions worth
1435 * of anon slots.
1437 for (i = 0; i < pgcnt; i++) {
1438 if ((ap = anon_get_ptr(old, index + i)) == NULL) {
1439 if (noalloc) {
1440 panic("anon_dup_fill_holes: "
1441 "empty anon slot\n");
1443 VM_STAT_ADD(anonvmstats.dupfillholes[1]);
1444 ap = anon_alloc(NULL, 0);
1445 (void) anon_set_ptr(old, index + i, ap,
1446 ANON_SLEEP);
1447 } else if (i == 0) {
1449 * make the increment of all refcnts of all
1450 * anon slots of a large page appear atomic by
1451 * getting an anonpages_hash_lock for the
1452 * first anon slot of a large page.
1454 VM_STAT_ADD(anonvmstats.dupfillholes[2]);
1456 ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
1457 mutex_enter(ahmpages);
1459 ASSERT(refcnt = ap->an_refcnt);
1461 VM_STAT_COND_ADD(ap->an_refcnt > 1,
1462 anonvmstats.dupfillholes[3]);
1464 (void) anon_set_ptr(new, new_idx + off + i, ap,
1465 ANON_SLEEP);
1466 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1467 mutex_enter(ahm);
1468 ASSERT(ahmpages != NULL || ap->an_refcnt == 1);
1469 ASSERT(i == 0 || ahmpages == NULL ||
1470 refcnt == ap->an_refcnt);
1471 ap->an_refcnt++;
1472 mutex_exit(ahm);
1474 if (ahmpages != NULL) {
1475 mutex_exit(ahmpages);
1476 ahmpages = NULL;
1478 off += pgcnt;
1479 new_idx += off;
1480 old_idx += off;
1481 npages -= pgcnt;
1486 * Used when a segment with a vnode changes szc. similarly to
1487 * anon_dup_fill_holes() makes sure each large page region either has no anon
1488 * slots or all of them. but new slots are created by COWing the file
1489 * pages. on entrance no anon slots should be shared.
1492 anon_fill_cow_holes(
1493 struct seg *seg,
1494 caddr_t addr,
1495 struct anon_hdr *ahp,
1496 ulong_t an_idx,
1497 struct vnode *vp,
1498 uoff_t vp_off,
1499 size_t size,
1500 uint_t szc,
1501 uint_t prot,
1502 struct vpage vpage[],
1503 struct cred *cred)
1505 struct anon *ap;
1506 spgcnt_t npages;
1507 pgcnt_t pgcnt, i;
1508 ulong_t index, off;
1509 int err = 0;
1510 int pageflags = 0;
1512 ASSERT(szc != 0);
1513 pgcnt = page_get_pagecnt(szc);
1514 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
1515 npages = btopr(size);
1516 ASSERT(IS_P2ALIGNED(npages, pgcnt));
1517 ASSERT(IS_P2ALIGNED(an_idx, pgcnt));
1519 while (npages > 0) {
1520 index = an_idx;
1523 * Find the next valid slot.
1525 if (anon_get_next_ptr(ahp, &index) == NULL) {
1526 break;
1529 ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index)));
1531 * Now backup index to the beginning of the
1532 * current large page region of the anon array.
1534 index = P2ALIGN(index, pgcnt);
1535 off = index - an_idx;
1536 ASSERT(IS_P2ALIGNED(off, pgcnt));
1537 npages -= off;
1538 if (npages <= 0)
1539 break;
1540 an_idx += off;
1541 vp_off += ptob(off);
1542 addr += ptob(off);
1543 if (vpage != NULL) {
1544 vpage += off;
1547 for (i = 0; i < pgcnt; i++, an_idx++, vp_off += PAGESIZE) {
1548 if ((ap = anon_get_ptr(ahp, an_idx)) == NULL) {
1549 page_t *pl[1 + 1];
1550 page_t *pp;
1552 err = fop_getpage(vp, vp_off, PAGESIZE, NULL,
1553 pl, PAGESIZE, seg, addr, S_READ, cred,
1554 NULL);
1555 if (err) {
1556 break;
1558 if (vpage != NULL) {
1559 prot = VPP_PROT(vpage);
1560 pageflags = VPP_ISPPLOCK(vpage) ?
1561 LOCK_PAGE : 0;
1563 pp = anon_private(&ap, seg, addr, prot, pl[0],
1564 pageflags, cred);
1565 if (pp == NULL) {
1566 err = ENOMEM;
1567 break;
1569 (void) anon_set_ptr(ahp, an_idx, ap,
1570 ANON_SLEEP);
1571 page_unlock(pp);
1573 ASSERT(ap->an_refcnt == 1);
1574 addr += PAGESIZE;
1575 if (vpage != NULL) {
1576 vpage++;
1579 npages -= pgcnt;
1582 return (err);
1586 * Free a group of "size" anon pages, size in bytes,
1587 * and clear out the pointers to the anon entries.
1589 void
1590 anon_free(struct anon_hdr *ahp, ulong_t index, size_t size)
1592 spgcnt_t npages;
1593 struct anon *ap;
1594 ulong_t old;
1596 npages = btopr(size);
1598 while (npages > 0) {
1599 old = index;
1600 if ((ap = anon_get_next_ptr(ahp, &index)) == NULL)
1601 break;
1603 ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index)));
1604 npages -= index - old;
1605 if (npages <= 0)
1606 break;
1608 (void) anon_set_ptr(ahp, index, NULL, ANON_SLEEP);
1609 anon_decref(ap);
1611 * Bump index and decrement page count
1613 index++;
1614 npages--;
1618 void
1619 anon_free_pages(
1620 struct anon_hdr *ahp,
1621 ulong_t an_idx,
1622 size_t size,
1623 uint_t szc)
1625 spgcnt_t npages;
1626 pgcnt_t pgcnt;
1627 ulong_t index, off;
1629 ASSERT(szc != 0);
1630 pgcnt = page_get_pagecnt(szc);
1631 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
1632 npages = btopr(size);
1633 ASSERT(IS_P2ALIGNED(npages, pgcnt));
1634 ASSERT(IS_P2ALIGNED(an_idx, pgcnt));
1635 ASSERT(an_idx < ahp->size);
1637 VM_STAT_ADD(anonvmstats.freepages[0]);
1639 while (npages > 0) {
1640 index = an_idx;
1643 * Find the next valid slot.
1645 if (anon_get_next_ptr(ahp, &index) == NULL)
1646 break;
1648 ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index)));
1650 * Now backup index to the beginning of the
1651 * current large page region of the old array.
1653 index = P2ALIGN(index, pgcnt);
1654 off = index - an_idx;
1655 ASSERT(IS_P2ALIGNED(off, pgcnt));
1656 npages -= off;
1657 if (npages <= 0)
1658 break;
1660 anon_decref_pages(ahp, index, szc);
1662 off += pgcnt;
1663 an_idx += off;
1664 npages -= pgcnt;
1669 * Make anonymous pages discardable
1672 anon_disclaim(struct anon_map *amp, ulong_t index, size_t size,
1673 uint_t behav, pgcnt_t *purged)
1675 spgcnt_t npages = btopr(size);
1676 struct anon *ap;
1677 struct vnode *vp;
1678 anoff_t off;
1679 page_t *pp, *root_pp;
1680 kmutex_t *ahm;
1681 pgcnt_t pgcnt, npurged = 0;
1682 ulong_t old_idx, idx, i;
1683 struct anon_hdr *ahp = amp->ahp;
1684 anon_sync_obj_t cookie;
1685 int err = 0;
1687 VERIFY(behav == MADV_FREE || behav == MADV_PURGE);
1688 ASSERT(RW_READ_HELD(&amp->a_rwlock));
1689 pgcnt = 1;
1690 for (; npages > 0; index = (pgcnt == 1) ? index + 1 :
1691 P2ROUNDUP(index + 1, pgcnt), npages -= pgcnt) {
1694 * get anon pointer and index for the first valid entry
1695 * in the anon list, starting from "index"
1697 old_idx = index;
1698 if ((ap = anon_get_next_ptr(ahp, &index)) == NULL)
1699 break;
1702 * decrement npages by number of NULL anon slots we skipped
1704 npages -= index - old_idx;
1705 if (npages <= 0)
1706 break;
1708 anon_array_enter(amp, index, &cookie);
1709 ap = anon_get_ptr(ahp, index);
1710 ASSERT(ap != NULL);
1713 * Get anonymous page and try to lock it SE_EXCL;
1714 * if we couldn't grab the lock we skip to next page.
1716 swap_xlate(ap, &vp, &off);
1717 pp = page_lookup_nowait(&vp->v_object, (uoff_t)off, SE_EXCL);
1718 if (pp == NULL) {
1719 segadvstat.MADV_FREE_miss.value.ul++;
1720 pgcnt = 1;
1721 anon_array_exit(&cookie);
1722 continue;
1724 pgcnt = page_get_pagecnt(pp->p_szc);
1727 * we cannot free a page which is permanently locked.
1728 * The page_struct_lock need not be acquired to examine
1729 * these fields since the page has an "exclusive" lock.
1731 if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
1732 page_unlock(pp);
1733 segadvstat.MADV_FREE_miss.value.ul++;
1734 anon_array_exit(&cookie);
1735 err = EBUSY;
1736 continue;
1739 ahm = AH_MUTEX(vp, off);
1740 mutex_enter(ahm);
1741 ASSERT(ap->an_refcnt != 0);
1743 * skip this one if copy-on-write is not yet broken.
1745 if (ap->an_refcnt > 1) {
1746 mutex_exit(ahm);
1747 page_unlock(pp);
1748 segadvstat.MADV_FREE_miss.value.ul++;
1749 anon_array_exit(&cookie);
1750 continue;
1753 if (behav == MADV_PURGE && pp->p_szc != 0) {
1755 * If we're purging and we have a large page, simplify
1756 * things a bit by demoting ourselves into the base
1757 * page case.
1759 (void) page_try_demote_pages(pp);
1762 if (pp->p_szc == 0) {
1763 pgcnt = 1;
1766 * free swap slot;
1768 if (ap->an_pvp) {
1769 swap_phys_free(ap->an_pvp, ap->an_poff,
1770 PAGESIZE);
1771 ap->an_pvp = NULL;
1772 ap->an_poff = 0;
1775 if (behav == MADV_PURGE) {
1777 * If we're purging (instead of merely freeing),
1778 * rip out this anon structure entirely to
1779 * assure that any subsequent fault pulls from
1780 * the backing vnode (if any).
1782 if (--ap->an_refcnt == 0)
1783 anon_rmhash(ap);
1785 mutex_exit(ahm);
1786 (void) anon_set_ptr(ahp, index,
1787 NULL, ANON_SLEEP);
1788 npurged++;
1789 ANI_ADD(1);
1790 kmem_cache_free(anon_cache, ap);
1791 } else {
1792 mutex_exit(ahm);
1795 segadvstat.MADV_FREE_hit.value.ul++;
1798 * while we are at it, unload all the translations
1799 * and attempt to free the page.
1801 (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
1803 VN_DISPOSE(pp,
1804 behav == MADV_FREE ? B_FREE : B_INVAL, 0, kcred);
1806 anon_array_exit(&cookie);
1807 continue;
1810 pgcnt = page_get_pagecnt(pp->p_szc);
1811 if (!IS_P2ALIGNED(index, pgcnt) || npages < pgcnt) {
1812 if (!page_try_demote_pages(pp)) {
1813 mutex_exit(ahm);
1814 page_unlock(pp);
1815 segadvstat.MADV_FREE_miss.value.ul++;
1816 anon_array_exit(&cookie);
1817 err = EBUSY;
1818 continue;
1819 } else {
1820 pgcnt = 1;
1821 if (ap->an_pvp) {
1822 swap_phys_free(ap->an_pvp,
1823 ap->an_poff, PAGESIZE);
1824 ap->an_pvp = NULL;
1825 ap->an_poff = 0;
1827 mutex_exit(ahm);
1828 (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
1830 VN_DISPOSE(pp, B_FREE, 0, kcred);
1831 segadvstat.MADV_FREE_hit.value.ul++;
1832 anon_array_exit(&cookie);
1833 continue;
1836 mutex_exit(ahm);
1837 root_pp = pp;
1840 * try to lock remaining pages
1842 for (idx = 1; idx < pgcnt; idx++) {
1843 pp++;
1844 if (!page_trylock(pp, SE_EXCL))
1845 break;
1846 if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
1847 page_unlock(pp);
1848 break;
1852 if (idx == pgcnt) {
1853 for (i = 0; i < pgcnt; i++) {
1854 ap = anon_get_ptr(ahp, index + i);
1855 if (ap == NULL)
1856 break;
1857 swap_xlate(ap, &vp, &off);
1858 ahm = AH_MUTEX(vp, off);
1859 mutex_enter(ahm);
1860 ASSERT(ap->an_refcnt != 0);
1863 * skip this one if copy-on-write
1864 * is not yet broken.
1866 if (ap->an_refcnt > 1) {
1867 mutex_exit(ahm);
1868 goto skiplp;
1870 if (ap->an_pvp) {
1871 swap_phys_free(ap->an_pvp,
1872 ap->an_poff, PAGESIZE);
1873 ap->an_pvp = NULL;
1874 ap->an_poff = 0;
1876 mutex_exit(ahm);
1878 page_destroy_pages(root_pp);
1879 segadvstat.MADV_FREE_hit.value.ul += pgcnt;
1880 anon_array_exit(&cookie);
1881 continue;
1883 skiplp:
1884 segadvstat.MADV_FREE_miss.value.ul += pgcnt;
1885 for (i = 0, pp = root_pp; i < idx; pp++, i++)
1886 page_unlock(pp);
1887 anon_array_exit(&cookie);
1890 if (purged != NULL)
1891 *purged = npurged;
1893 return (err);
1897 * Return the kept page(s) and protections back to the segment driver.
1900 anon_getpage(
1901 struct anon **app,
1902 uint_t *protp,
1903 page_t *pl[],
1904 size_t plsz,
1905 struct seg *seg,
1906 caddr_t addr,
1907 enum seg_rw rw,
1908 struct cred *cred)
1910 page_t *pp;
1911 struct anon *ap = *app;
1912 struct vnode *vp;
1913 anoff_t off;
1914 int err;
1915 kmutex_t *ahm;
1917 swap_xlate(ap, &vp, &off);
1920 * Lookup the page. If page is being paged in,
1921 * wait for it to finish as we must return a list of
1922 * pages since this routine acts like the fop_getpage
1923 * routine does.
1925 if (pl != NULL && (pp = page_lookup(&vp->v_object, (uoff_t)off, SE_SHARED))) {
1926 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1927 mutex_enter(ahm);
1928 if (ap->an_refcnt == 1)
1929 *protp = PROT_ALL;
1930 else
1931 *protp = PROT_ALL & ~PROT_WRITE;
1932 mutex_exit(ahm);
1933 pl[0] = pp;
1934 pl[1] = NULL;
1935 return (0);
1939 * Simply treat it as a vnode fault on the anon vp.
1942 err = fop_getpage(vp, (uoff_t)off, PAGESIZE, protp, pl, plsz,
1943 seg, addr, rw, cred, NULL);
1945 if (err == 0 && pl != NULL) {
1946 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1947 mutex_enter(ahm);
1948 if (ap->an_refcnt != 1)
1949 *protp &= ~PROT_WRITE; /* make read-only */
1950 mutex_exit(ahm);
1952 return (err);
1956 * Creates or returns kept pages to the segment driver. returns -1 if a large
1957 * page cannot be allocated. returns -2 if some other process has allocated a
1958 * larger page.
1960 * For cowfault it will allocate any size pages to fill the requested area to
1961 * avoid partially overwriting anon slots (i.e. sharing only some of the anon
1962 * slots within a large page with other processes). This policy greatly
1963 * simplifies large page freeing (which is only freed when all anon slot
1964 * refcnts are 0).
1967 anon_map_getpages(
1968 struct anon_map *amp,
1969 ulong_t start_idx,
1970 uint_t szc,
1971 struct seg *seg,
1972 caddr_t addr,
1973 uint_t prot,
1974 uint_t *protp,
1975 page_t *ppa[],
1976 uint_t *ppa_szc,
1977 struct vpage vpage[],
1978 enum seg_rw rw,
1979 int brkcow,
1980 int anypgsz,
1981 int pgflags,
1982 struct cred *cred)
1984 pgcnt_t pgcnt;
1985 struct anon *ap;
1986 struct vnode *vp;
1987 anoff_t off;
1988 page_t *pp, *pl[2], *conpp = NULL;
1989 caddr_t vaddr;
1990 ulong_t pg_idx, an_idx, i;
1991 spgcnt_t nreloc = 0;
1992 int prealloc = 1;
1993 int err, slotcreate;
1994 uint_t vpprot;
1995 int upsize = (szc < seg->s_szc);
1997 #if !defined(__i386) && !defined(__amd64)
1998 ASSERT(seg->s_szc != 0);
1999 #endif
2000 ASSERT(szc <= seg->s_szc);
2001 ASSERT(ppa_szc != NULL);
2002 ASSERT(rw != S_CREATE);
2004 *protp = PROT_ALL;
2006 VM_STAT_ADD(anonvmstats.getpages[0]);
2008 if (szc == 0) {
2009 VM_STAT_ADD(anonvmstats.getpages[1]);
2010 if ((ap = anon_get_ptr(amp->ahp, start_idx)) != NULL) {
2011 err = anon_getpage(&ap, protp, pl, PAGESIZE, seg,
2012 addr, rw, cred);
2013 if (err)
2014 return (err);
2015 ppa[0] = pl[0];
2016 if (brkcow == 0 || (*protp & PROT_WRITE)) {
2017 VM_STAT_ADD(anonvmstats.getpages[2]);
2018 if (ppa[0]->p_szc != 0 && upsize) {
2019 VM_STAT_ADD(anonvmstats.getpages[3]);
2020 *ppa_szc = MIN(ppa[0]->p_szc,
2021 seg->s_szc);
2022 page_unlock(ppa[0]);
2023 return (-2);
2025 return (0);
2027 panic("anon_map_getpages: cowfault for szc 0");
2028 } else {
2029 VM_STAT_ADD(anonvmstats.getpages[4]);
2030 ppa[0] = anon_zero(seg, addr, &ap, cred);
2031 if (ppa[0] == NULL)
2032 return (ENOMEM);
2033 (void) anon_set_ptr(amp->ahp, start_idx, ap,
2034 ANON_SLEEP);
2035 return (0);
2039 pgcnt = page_get_pagecnt(szc);
2040 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
2041 ASSERT(IS_P2ALIGNED(start_idx, pgcnt));
2044 * First we check for the case that the requtested large
2045 * page or larger page already exists in the system.
2046 * Actually we only check if the first constituent page
2047 * exists and only preallocate if it's not found.
2049 ap = anon_get_ptr(amp->ahp, start_idx);
2050 if (ap) {
2051 uint_t pszc;
2052 swap_xlate(ap, &vp, &off);
2053 if (page_exists_forreal(&vp->v_object, (uoff_t)off, &pszc)) {
2054 if (pszc > szc && upsize) {
2055 *ppa_szc = MIN(pszc, seg->s_szc);
2056 return (-2);
2058 if (pszc >= szc) {
2059 prealloc = 0;
2064 VM_STAT_COND_ADD(prealloc == 0, anonvmstats.getpages[5]);
2065 VM_STAT_COND_ADD(prealloc != 0, anonvmstats.getpages[6]);
2067 top:
2069 * If a smaller page or no page at all was found,
2070 * grab a large page off the freelist.
2072 if (prealloc) {
2073 ASSERT(conpp == NULL);
2074 if (page_alloc_pages(&anon_vp->v_object, seg, addr, NULL, ppa,
2075 szc, 0, pgflags) != 0) {
2076 VM_STAT_ADD(anonvmstats.getpages[7]);
2077 if (brkcow == 0 || szc < seg->s_szc ||
2078 !anon_szcshare(amp->ahp, start_idx)) {
2080 * If the refcnt's of all anon slots are <= 1
2081 * they can't increase since we are holding
2082 * the address space's lock. So segvn can
2083 * safely decrease szc without risking to
2084 * generate a cow fault for the region smaller
2085 * than the segment's largest page size.
2087 VM_STAT_ADD(anonvmstats.getpages[8]);
2088 return (-1);
2090 docow:
2092 * This is a cow fault. Copy away the entire 1 large
2093 * page region of this segment.
2095 if (szc != seg->s_szc)
2096 panic("anon_map_getpages: cowfault for szc %d",
2097 szc);
2098 vaddr = addr;
2099 for (pg_idx = 0, an_idx = start_idx; pg_idx < pgcnt;
2100 pg_idx++, an_idx++, vaddr += PAGESIZE) {
2101 if ((ap = anon_get_ptr(amp->ahp, an_idx)) !=
2102 NULL) {
2103 err = anon_getpage(&ap, &vpprot, pl,
2104 PAGESIZE, seg, vaddr, rw, cred);
2105 if (err) {
2106 for (i = 0; i < pg_idx; i++) {
2107 if ((pp = ppa[i]) !=
2108 NULL)
2109 page_unlock(pp);
2111 return (err);
2113 ppa[pg_idx] = pl[0];
2114 } else {
2116 * Since this is a cowfault we know
2117 * that this address space has a
2118 * parent or children which means
2119 * anon_dup_fill_holes() has initialized
2120 * all anon slots within a large page
2121 * region that had at least one anon
2122 * slot at the time of fork().
2124 panic("anon_map_getpages: "
2125 "cowfault but anon slot is empty");
2128 VM_STAT_ADD(anonvmstats.getpages[9]);
2129 *protp = PROT_ALL;
2130 return (anon_map_privatepages(amp, start_idx, szc, seg,
2131 addr, prot, ppa, vpage, anypgsz, pgflags, cred));
2135 VM_STAT_ADD(anonvmstats.getpages[10]);
2137 an_idx = start_idx;
2138 pg_idx = 0;
2139 vaddr = addr;
2140 while (pg_idx < pgcnt) {
2141 slotcreate = 0;
2142 if ((ap = anon_get_ptr(amp->ahp, an_idx)) == NULL) {
2143 VM_STAT_ADD(anonvmstats.getpages[11]);
2145 * For us to have decided not to preallocate
2146 * would have meant that a large page
2147 * was found. Which also means that all of the
2148 * anon slots for that page would have been
2149 * already created for us.
2151 if (prealloc == 0)
2152 panic("anon_map_getpages: prealloc = 0");
2154 slotcreate = 1;
2155 ap = anon_alloc(NULL, 0);
2157 swap_xlate(ap, &vp, &off);
2160 * Now setup our preallocated page to pass down
2161 * to swap_getpage().
2163 if (prealloc) {
2164 ASSERT(ppa[pg_idx]->p_szc == szc);
2165 conpp = ppa[pg_idx];
2167 ASSERT(prealloc || conpp == NULL);
2170 * If we just created this anon slot then call
2171 * with S_CREATE to prevent doing IO on the page.
2172 * Similar to the anon_zero case.
2174 err = swap_getconpage(vp, (uoff_t)off, PAGESIZE,
2175 NULL, pl, PAGESIZE, conpp, ppa_szc, &nreloc, seg, vaddr,
2176 slotcreate == 1 ? S_CREATE : rw, cred);
2178 if (err) {
2179 ASSERT(err != -2 || upsize);
2180 VM_STAT_ADD(anonvmstats.getpages[12]);
2181 ASSERT(slotcreate == 0);
2182 goto io_err;
2185 pp = pl[0];
2187 if (pp->p_szc < szc || (pp->p_szc > szc && upsize)) {
2188 VM_STAT_ADD(anonvmstats.getpages[13]);
2189 ASSERT(slotcreate == 0);
2190 ASSERT(prealloc == 0);
2191 ASSERT(pg_idx == 0);
2192 if (pp->p_szc > szc) {
2193 ASSERT(upsize);
2194 *ppa_szc = MIN(pp->p_szc, seg->s_szc);
2195 page_unlock(pp);
2196 VM_STAT_ADD(anonvmstats.getpages[14]);
2197 return (-2);
2199 page_unlock(pp);
2200 prealloc = 1;
2201 goto top;
2205 * If we decided to preallocate but fop_getpage
2206 * found a page in the system that satisfies our
2207 * request then free up our preallocated large page
2208 * and continue looping accross the existing large
2209 * page via fop_getpage.
2211 if (prealloc && pp != ppa[pg_idx]) {
2212 VM_STAT_ADD(anonvmstats.getpages[15]);
2213 ASSERT(slotcreate == 0);
2214 ASSERT(pg_idx == 0);
2215 conpp = NULL;
2216 prealloc = 0;
2217 page_free_pages(ppa[0]);
2220 if (prealloc && nreloc > 1) {
2222 * we have relocated out of a smaller large page.
2223 * skip npgs - 1 iterations and continue which will
2224 * increment by one the loop indices.
2226 spgcnt_t npgs = nreloc;
2228 VM_STAT_ADD(anonvmstats.getpages[16]);
2230 ASSERT(pp == ppa[pg_idx]);
2231 ASSERT(slotcreate == 0);
2232 ASSERT(pg_idx + npgs <= pgcnt);
2233 if ((*protp & PROT_WRITE) &&
2234 anon_share(amp->ahp, an_idx, npgs)) {
2235 *protp &= ~PROT_WRITE;
2237 pg_idx += npgs;
2238 an_idx += npgs;
2239 vaddr += PAGESIZE * npgs;
2240 continue;
2243 VM_STAT_ADD(anonvmstats.getpages[17]);
2246 * Anon_zero case.
2248 if (slotcreate) {
2249 ASSERT(prealloc);
2250 pagezero(pp, 0, PAGESIZE);
2251 CPU_STATS_ADD_K(vm, zfod, 1);
2252 hat_setrefmod(pp);
2255 ASSERT(prealloc == 0 || ppa[pg_idx] == pp);
2256 ASSERT(prealloc != 0 || PAGE_SHARED(pp));
2257 ASSERT(prealloc == 0 || PAGE_EXCL(pp));
2259 if (pg_idx > 0 &&
2260 ((page_pptonum(pp) != page_pptonum(ppa[pg_idx - 1]) + 1) ||
2261 (pp->p_szc != ppa[pg_idx - 1]->p_szc))) {
2262 panic("anon_map_getpages: unexpected page");
2263 } else if (pg_idx == 0 && (page_pptonum(pp) & (pgcnt - 1))) {
2264 panic("anon_map_getpages: unaligned page");
2267 if (prealloc == 0) {
2268 ppa[pg_idx] = pp;
2271 if (ap->an_refcnt > 1) {
2272 VM_STAT_ADD(anonvmstats.getpages[18]);
2273 *protp &= ~PROT_WRITE;
2277 * If this is a new anon slot then initialize
2278 * the anon array entry.
2280 if (slotcreate) {
2281 (void) anon_set_ptr(amp->ahp, an_idx, ap, ANON_SLEEP);
2283 pg_idx++;
2284 an_idx++;
2285 vaddr += PAGESIZE;
2289 * Since preallocated pages come off the freelist
2290 * they are locked SE_EXCL. Simply downgrade and return.
2292 if (prealloc) {
2293 VM_STAT_ADD(anonvmstats.getpages[19]);
2294 conpp = NULL;
2295 for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) {
2296 page_downgrade(ppa[pg_idx]);
2299 ASSERT(conpp == NULL);
2301 if (brkcow == 0 || (*protp & PROT_WRITE)) {
2302 VM_STAT_ADD(anonvmstats.getpages[20]);
2303 return (0);
2306 if (szc < seg->s_szc)
2307 panic("anon_map_getpages: cowfault for szc %d", szc);
2309 VM_STAT_ADD(anonvmstats.getpages[21]);
2311 *protp = PROT_ALL;
2312 return (anon_map_privatepages(amp, start_idx, szc, seg, addr, prot,
2313 ppa, vpage, anypgsz, pgflags, cred));
2314 io_err:
2316 * We got an IO error somewhere in our large page.
2317 * If we were using a preallocated page then just demote
2318 * all the constituent pages that we've succeeded with sofar
2319 * to PAGESIZE pages and leave them in the system
2320 * unlocked.
2323 ASSERT(err != -2 || ((pg_idx == 0) && upsize));
2325 VM_STAT_COND_ADD(err > 0, anonvmstats.getpages[22]);
2326 VM_STAT_COND_ADD(err == -1, anonvmstats.getpages[23]);
2327 VM_STAT_COND_ADD(err == -2, anonvmstats.getpages[24]);
2329 if (prealloc) {
2330 conpp = NULL;
2331 if (pg_idx > 0) {
2332 VM_STAT_ADD(anonvmstats.getpages[25]);
2333 for (i = 0; i < pgcnt; i++) {
2334 pp = ppa[i];
2335 ASSERT(PAGE_EXCL(pp));
2336 ASSERT(pp->p_szc == szc);
2337 pp->p_szc = 0;
2339 for (i = 0; i < pg_idx; i++) {
2340 ASSERT(!hat_page_is_mapped(ppa[i]));
2341 page_unlock(ppa[i]);
2344 * Now free up the remaining unused constituent
2345 * pages.
2347 while (pg_idx < pgcnt) {
2348 ASSERT(!hat_page_is_mapped(ppa[pg_idx]));
2349 page_free(ppa[pg_idx], 0);
2350 pg_idx++;
2352 } else {
2353 VM_STAT_ADD(anonvmstats.getpages[26]);
2354 page_free_pages(ppa[0]);
2356 } else {
2357 VM_STAT_ADD(anonvmstats.getpages[27]);
2358 ASSERT(err > 0);
2359 for (i = 0; i < pg_idx; i++)
2360 page_unlock(ppa[i]);
2362 ASSERT(conpp == NULL);
2363 if (err != -1)
2364 return (err);
2366 * we are here because we failed to relocate.
2368 ASSERT(prealloc);
2369 if (brkcow == 0 || szc < seg->s_szc ||
2370 !anon_szcshare(amp->ahp, start_idx)) {
2371 VM_STAT_ADD(anonvmstats.getpages[28]);
2372 return (-1);
2374 VM_STAT_ADD(anonvmstats.getpages[29]);
2375 goto docow;
2380 * Turn a reference to an object or shared anon page
2381 * into a private page with a copy of the data from the
2382 * original page which is always locked by the caller.
2383 * This routine unloads the translation and unlocks the
2384 * original page, if it isn't being stolen, before returning
2385 * to the caller.
2387 * NOTE: The original anon slot is not freed by this routine
2388 * It must be freed by the caller while holding the
2389 * "anon_map" lock to prevent races which can occur if
2390 * a process has multiple lwps in its address space.
2392 page_t *
2393 anon_private(
2394 struct anon **app,
2395 struct seg *seg,
2396 caddr_t addr,
2397 uint_t prot,
2398 page_t *opp,
2399 int oppflags,
2400 struct cred *cred)
2402 struct anon *old = *app;
2403 struct anon *new;
2404 page_t *pp = NULL;
2405 struct vnode *vp;
2406 anoff_t off;
2407 page_t *anon_pl[1 + 1];
2408 int err;
2410 if (oppflags & STEAL_PAGE)
2411 ASSERT(PAGE_EXCL(opp));
2412 else
2413 ASSERT(PAGE_LOCKED(opp));
2415 CPU_STATS_ADD_K(vm, cow_fault, 1);
2417 *app = new = anon_alloc(NULL, 0);
2418 swap_xlate(new, &vp, &off);
2420 if (oppflags & STEAL_PAGE) {
2421 page_rename(opp, &vp->v_object, (uoff_t)off);
2422 pp = opp;
2423 hat_setmod(pp);
2425 /* bug 4026339 */
2426 page_downgrade(pp);
2427 return (pp);
2431 * Call the fop_getpage routine to create the page, thereby
2432 * enabling the vnode driver to allocate any filesystem
2433 * space (e.g., disk block allocation for UFS). This also
2434 * prevents more than one page from being added to the
2435 * vnode at the same time.
2437 err = fop_getpage(vp, (uoff_t)off, PAGESIZE, NULL,
2438 anon_pl, PAGESIZE, seg, addr, S_CREATE, cred, NULL);
2439 if (err)
2440 goto out;
2442 pp = anon_pl[0];
2445 * If the original page was locked, we need to move the lock
2446 * to the new page by transfering 'cowcnt/lckcnt' of the original
2447 * page to 'cowcnt/lckcnt' of the new page.
2449 * See Statement at the beginning of segvn_lockop() and
2450 * comments in page_pp_useclaim() regarding the way
2451 * cowcnts/lckcnts are handled.
2453 * Also availrmem must be decremented up front for read only mapping
2454 * before calling page_pp_useclaim. page_pp_useclaim will bump it back
2455 * if availrmem did not need to be decremented after all.
2457 if (oppflags & LOCK_PAGE) {
2458 if ((prot & PROT_WRITE) == 0) {
2459 mutex_enter(&freemem_lock);
2460 if (availrmem > pages_pp_maximum) {
2461 availrmem--;
2462 pages_useclaim++;
2463 } else {
2464 mutex_exit(&freemem_lock);
2465 goto out;
2467 mutex_exit(&freemem_lock);
2469 page_pp_useclaim(opp, pp, prot & PROT_WRITE);
2473 * Now copy the contents from the original page,
2474 * which is locked and loaded in the MMU by
2475 * the caller to prevent yet another page fault.
2477 /* XXX - should set mod bit in here */
2478 if (ppcopy(opp, pp) == 0) {
2480 * Before ppcopy could hanlde UE or other faults, we
2481 * would have panicked here, and still have no option
2482 * but to do so now.
2484 panic("anon_private, ppcopy failed, opp = 0x%p, pp = 0x%p",
2485 (void *)opp, (void *)pp);
2488 hat_setrefmod(pp); /* mark as modified */
2491 * Unload the old translation.
2493 hat_unload(seg->s_as->a_hat, addr, PAGESIZE, HAT_UNLOAD);
2496 * Free unmapped, unmodified original page.
2497 * or release the lock on the original page,
2498 * otherwise the process will sleep forever in
2499 * anon_decref() waiting for the "exclusive" lock
2500 * on the page.
2502 (void) page_release(opp, 1);
2505 * we are done with page creation so downgrade the new
2506 * page's selock to shared, this helps when multiple
2507 * as_fault(...SOFTLOCK...) are done to the same
2508 * page(aio)
2510 page_downgrade(pp);
2513 * NOTE: The original anon slot must be freed by the
2514 * caller while holding the "anon_map" lock, if we
2515 * copied away from an anonymous page.
2517 return (pp);
2519 out:
2520 *app = old;
2521 if (pp)
2522 page_unlock(pp);
2523 anon_decref(new);
2524 page_unlock(opp);
2525 return (NULL);
2529 anon_map_privatepages(
2530 struct anon_map *amp,
2531 ulong_t start_idx,
2532 uint_t szc,
2533 struct seg *seg,
2534 caddr_t addr,
2535 uint_t prot,
2536 page_t *ppa[],
2537 struct vpage vpage[],
2538 int anypgsz,
2539 int pgflags,
2540 struct cred *cred)
2542 pgcnt_t pgcnt;
2543 struct vnode *vp;
2544 anoff_t off;
2545 page_t *pl[2], *conpp = NULL;
2546 int err;
2547 int prealloc = 1;
2548 struct anon *ap, *oldap;
2549 caddr_t vaddr;
2550 page_t *pplist, *pp;
2551 ulong_t pg_idx, an_idx;
2552 spgcnt_t nreloc = 0;
2553 int pagelock = 0;
2554 kmutex_t *ahmpages = NULL;
2555 #ifdef DEBUG
2556 int refcnt;
2557 #endif
2559 ASSERT(szc != 0);
2560 ASSERT(szc == seg->s_szc);
2562 VM_STAT_ADD(anonvmstats.privatepages[0]);
2564 pgcnt = page_get_pagecnt(szc);
2565 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
2566 ASSERT(IS_P2ALIGNED(start_idx, pgcnt));
2568 ASSERT(amp != NULL);
2569 ap = anon_get_ptr(amp->ahp, start_idx);
2570 ASSERT(ap == NULL || ap->an_refcnt >= 1);
2572 VM_STAT_COND_ADD(ap == NULL, anonvmstats.privatepages[1]);
2575 * Now try and allocate the large page. If we fail then just
2576 * let fop_getpage give us PAGESIZE pages. Normally we let
2577 * the caller make this decision but to avoid added complexity
2578 * it's simplier to handle that case here.
2580 if (anypgsz == -1) {
2581 VM_STAT_ADD(anonvmstats.privatepages[2]);
2582 prealloc = 0;
2583 } else if (page_alloc_pages(&anon_vp->v_object, seg, addr, &pplist,
2584 NULL, szc, anypgsz, pgflags) != 0) {
2585 VM_STAT_ADD(anonvmstats.privatepages[3]);
2586 prealloc = 0;
2590 * make the decrement of all refcnts of all
2591 * anon slots of a large page appear atomic by
2592 * getting an anonpages_hash_lock for the
2593 * first anon slot of a large page.
2595 if (ap != NULL) {
2596 ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
2597 mutex_enter(ahmpages);
2598 if (ap->an_refcnt == 1) {
2599 VM_STAT_ADD(anonvmstats.privatepages[4]);
2600 ASSERT(!anon_share(amp->ahp, start_idx, pgcnt));
2601 mutex_exit(ahmpages);
2603 if (prealloc) {
2604 page_free_replacement_page(pplist);
2605 page_create_putback(pgcnt);
2607 ASSERT(ppa[0]->p_szc <= szc);
2608 if (ppa[0]->p_szc == szc) {
2609 VM_STAT_ADD(anonvmstats.privatepages[5]);
2610 return (0);
2612 for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) {
2613 ASSERT(ppa[pg_idx] != NULL);
2614 page_unlock(ppa[pg_idx]);
2616 return (-1);
2621 * If we are passed in the vpage array and this is
2622 * not PROT_WRITE then we need to decrement availrmem
2623 * up front before we try anything. If we need to and
2624 * can't decrement availrmem then its better to fail now
2625 * than in the middle of processing the new large page.
2626 * page_pp_usclaim() on behalf of each constituent page
2627 * below will adjust availrmem back for the cases not needed.
2629 if (vpage != NULL && (prot & PROT_WRITE) == 0) {
2630 for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) {
2631 if (VPP_ISPPLOCK(&vpage[pg_idx])) {
2632 pagelock = 1;
2633 break;
2636 if (pagelock) {
2637 VM_STAT_ADD(anonvmstats.privatepages[6]);
2638 mutex_enter(&freemem_lock);
2639 if (availrmem >= pages_pp_maximum + pgcnt) {
2640 availrmem -= pgcnt;
2641 pages_useclaim += pgcnt;
2642 } else {
2643 VM_STAT_ADD(anonvmstats.privatepages[7]);
2644 mutex_exit(&freemem_lock);
2645 if (ahmpages != NULL) {
2646 mutex_exit(ahmpages);
2648 if (prealloc) {
2649 page_free_replacement_page(pplist);
2650 page_create_putback(pgcnt);
2652 for (pg_idx = 0; pg_idx < pgcnt; pg_idx++)
2653 if (ppa[pg_idx] != NULL)
2654 page_unlock(ppa[pg_idx]);
2655 return (ENOMEM);
2657 mutex_exit(&freemem_lock);
2661 CPU_STATS_ADD_K(vm, cow_fault, pgcnt);
2663 VM_STAT_ADD(anonvmstats.privatepages[8]);
2665 an_idx = start_idx;
2666 pg_idx = 0;
2667 vaddr = addr;
2668 for (; pg_idx < pgcnt; pg_idx++, an_idx++, vaddr += PAGESIZE) {
2669 ASSERT(ppa[pg_idx] != NULL);
2670 oldap = anon_get_ptr(amp->ahp, an_idx);
2671 ASSERT(ahmpages != NULL || oldap == NULL);
2672 ASSERT(ahmpages == NULL || oldap != NULL);
2673 ASSERT(ahmpages == NULL || oldap->an_refcnt > 1);
2674 ASSERT(ahmpages == NULL || pg_idx != 0 ||
2675 (refcnt = oldap->an_refcnt));
2676 ASSERT(ahmpages == NULL || pg_idx == 0 ||
2677 refcnt == oldap->an_refcnt);
2679 ap = anon_alloc(NULL, 0);
2681 swap_xlate(ap, &vp, &off);
2684 * Now setup our preallocated page to pass down to
2685 * swap_getpage().
2687 if (prealloc) {
2688 pp = pplist;
2689 page_sub(&pplist, pp);
2690 conpp = pp;
2693 err = swap_getconpage(vp, (uoff_t)off, PAGESIZE, NULL, pl,
2694 PAGESIZE, conpp, NULL, &nreloc, seg, vaddr,
2695 S_CREATE, cred);
2698 * Impossible to fail this is S_CREATE.
2700 if (err)
2701 panic("anon_map_privatepages: fop_getpage failed");
2703 ASSERT(prealloc ? pp == pl[0] : pl[0]->p_szc == 0);
2704 ASSERT(prealloc == 0 || nreloc == 1);
2706 pp = pl[0];
2709 * If the original page was locked, we need to move
2710 * the lock to the new page by transfering
2711 * 'cowcnt/lckcnt' of the original page to 'cowcnt/lckcnt'
2712 * of the new page. pg_idx can be used to index
2713 * into the vpage array since the caller will guarentee
2714 * that vpage struct passed in corresponds to addr
2715 * and forward.
2717 if (vpage != NULL && VPP_ISPPLOCK(&vpage[pg_idx])) {
2718 page_pp_useclaim(ppa[pg_idx], pp, prot & PROT_WRITE);
2719 } else if (pagelock) {
2720 mutex_enter(&freemem_lock);
2721 availrmem++;
2722 pages_useclaim--;
2723 mutex_exit(&freemem_lock);
2727 * Now copy the contents from the original page.
2729 if (ppcopy(ppa[pg_idx], pp) == 0) {
2731 * Before ppcopy could hanlde UE or other faults, we
2732 * would have panicked here, and still have no option
2733 * but to do so now.
2735 panic("anon_map_privatepages, ppcopy failed");
2738 hat_setrefmod(pp); /* mark as modified */
2741 * Release the lock on the original page,
2742 * derement the old slot, and down grade the lock
2743 * on the new copy.
2745 page_unlock(ppa[pg_idx]);
2747 if (!prealloc)
2748 page_downgrade(pp);
2750 ppa[pg_idx] = pp;
2753 * Now reflect the copy in the new anon array.
2755 ASSERT(ahmpages == NULL || oldap->an_refcnt > 1);
2756 if (oldap != NULL)
2757 anon_decref(oldap);
2758 (void) anon_set_ptr(amp->ahp, an_idx, ap, ANON_SLEEP);
2762 * Unload the old large page translation.
2764 hat_unload(seg->s_as->a_hat, addr, pgcnt << PAGESHIFT, HAT_UNLOAD);
2766 if (ahmpages != NULL) {
2767 mutex_exit(ahmpages);
2769 ASSERT(prealloc == 0 || pplist == NULL);
2770 if (prealloc) {
2771 VM_STAT_ADD(anonvmstats.privatepages[9]);
2772 for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) {
2773 page_downgrade(ppa[pg_idx]);
2777 return (0);
2781 * Allocate a private zero-filled anon page.
2783 page_t *
2784 anon_zero(struct seg *seg, caddr_t addr, struct anon **app, struct cred *cred)
2786 struct anon *ap;
2787 page_t *pp;
2788 struct vnode *vp;
2789 anoff_t off;
2790 page_t *anon_pl[1 + 1];
2791 int err;
2793 *app = ap = anon_alloc(NULL, 0);
2794 swap_xlate(ap, &vp, &off);
2797 * Call the fop_getpage routine to create the page, thereby
2798 * enabling the vnode driver to allocate any filesystem
2799 * dependent structures (e.g., disk block allocation for UFS).
2800 * This also prevents more than on page from being added to
2801 * the vnode at the same time since it is locked.
2803 err = fop_getpage(vp, off, PAGESIZE, NULL,
2804 anon_pl, PAGESIZE, seg, addr, S_CREATE, cred, NULL);
2805 if (err) {
2806 *app = NULL;
2807 anon_decref(ap);
2808 return (NULL);
2810 pp = anon_pl[0];
2812 pagezero(pp, 0, PAGESIZE); /* XXX - should set mod bit */
2813 page_downgrade(pp);
2814 CPU_STATS_ADD_K(vm, zfod, 1);
2815 hat_setrefmod(pp); /* mark as modified so pageout writes back */
2816 return (pp);
2821 * Allocate array of private zero-filled anon pages for empty slots
2822 * and kept pages for non empty slots within given range.
2824 * NOTE: This rontine will try and use large pages
2825 * if available and supported by underlying platform.
2828 anon_map_createpages(
2829 struct anon_map *amp,
2830 ulong_t start_index,
2831 size_t len,
2832 page_t *ppa[],
2833 struct seg *seg,
2834 caddr_t addr,
2835 enum seg_rw rw,
2836 struct cred *cred)
2839 struct anon *ap;
2840 struct vnode *ap_vp;
2841 page_t *pp, *pplist, *anon_pl[1 + 1], *conpp = NULL;
2842 int err = 0;
2843 ulong_t p_index, index;
2844 pgcnt_t npgs, pg_cnt;
2845 spgcnt_t nreloc = 0;
2846 uint_t l_szc, szc, prot;
2847 anoff_t ap_off;
2848 size_t pgsz;
2849 lgrp_t *lgrp;
2850 kmutex_t *ahm;
2853 * XXX For now only handle S_CREATE.
2855 ASSERT(rw == S_CREATE);
2857 index = start_index;
2858 p_index = 0;
2859 npgs = btopr(len);
2862 * If this platform supports multiple page sizes
2863 * then try and allocate directly from the free
2864 * list for pages larger than PAGESIZE.
2866 * NOTE:When we have page_create_ru we can stop
2867 * directly allocating from the freelist.
2869 l_szc = seg->s_szc;
2870 ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
2871 while (npgs) {
2874 * if anon slot already exists
2875 * (means page has been created)
2876 * so 1) look up the page
2877 * 2) if the page is still in memory, get it.
2878 * 3) if not, create a page and
2879 * page in from physical swap device.
2880 * These are done in anon_getpage().
2882 ap = anon_get_ptr(amp->ahp, index);
2883 if (ap) {
2884 err = anon_getpage(&ap, &prot, anon_pl, PAGESIZE,
2885 seg, addr, S_READ, cred);
2886 if (err) {
2887 ANON_LOCK_EXIT(&amp->a_rwlock);
2888 panic("anon_map_createpages: anon_getpage");
2890 pp = anon_pl[0];
2891 ppa[p_index++] = pp;
2894 * an_pvp can become non-NULL after SysV's page was
2895 * paged out before ISM was attached to this SysV
2896 * shared memory segment. So free swap slot if needed.
2898 if (ap->an_pvp != NULL) {
2899 page_io_lock(pp);
2900 ahm = AH_MUTEX(ap->an_vp, ap->an_off);
2901 mutex_enter(ahm);
2902 if (ap->an_pvp != NULL) {
2903 swap_phys_free(ap->an_pvp,
2904 ap->an_poff, PAGESIZE);
2905 ap->an_pvp = NULL;
2906 ap->an_poff = 0;
2907 mutex_exit(ahm);
2908 hat_setmod(pp);
2909 } else {
2910 mutex_exit(ahm);
2912 page_io_unlock(pp);
2915 addr += PAGESIZE;
2916 index++;
2917 npgs--;
2918 continue;
2921 * Now try and allocate the largest page possible
2922 * for the current address and range.
2923 * Keep dropping down in page size until:
2925 * 1) Properly aligned
2926 * 2) Does not overlap existing anon pages
2927 * 3) Fits in remaining range.
2928 * 4) able to allocate one.
2930 * NOTE: XXX When page_create_ru is completed this code
2931 * will change.
2933 szc = l_szc;
2934 pplist = NULL;
2935 pg_cnt = 0;
2936 while (szc) {
2937 pgsz = page_get_pagesize(szc);
2938 pg_cnt = pgsz >> PAGESHIFT;
2939 if (IS_P2ALIGNED(addr, pgsz) && pg_cnt <= npgs &&
2940 anon_pages(amp->ahp, index, pg_cnt) == 0) {
2942 * XXX
2943 * Since we are faking page_create()
2944 * we also need to do the freemem and
2945 * pcf accounting.
2947 (void) page_create_wait(pg_cnt, PG_WAIT);
2950 * Get lgroup to allocate next page of shared
2951 * memory from and use it to specify where to
2952 * allocate the physical memory
2954 lgrp = lgrp_mem_choose(seg, addr, pgsz);
2956 pplist = page_get_freelist(
2957 &anon_vp->v_object, 0, seg,
2958 addr, pgsz, 0, lgrp);
2960 if (pplist == NULL) {
2961 page_create_putback(pg_cnt);
2965 * If a request for a page of size
2966 * larger than PAGESIZE failed
2967 * then don't try that size anymore.
2969 if (pplist == NULL) {
2970 l_szc = szc - 1;
2971 } else {
2972 break;
2975 szc--;
2979 * If just using PAGESIZE pages then don't
2980 * directly allocate from the free list.
2982 if (pplist == NULL) {
2983 ASSERT(szc == 0);
2984 pp = anon_zero(seg, addr, &ap, cred);
2985 if (pp == NULL) {
2986 ANON_LOCK_EXIT(&amp->a_rwlock);
2987 panic("anon_map_createpages: anon_zero");
2989 ppa[p_index++] = pp;
2991 ASSERT(anon_get_ptr(amp->ahp, index) == NULL);
2992 (void) anon_set_ptr(amp->ahp, index, ap, ANON_SLEEP);
2994 addr += PAGESIZE;
2995 index++;
2996 npgs--;
2997 continue;
3001 * pplist is a list of pg_cnt PAGESIZE pages.
3002 * These pages are locked SE_EXCL since they
3003 * came directly off the free list.
3005 ASSERT(IS_P2ALIGNED(pg_cnt, pg_cnt));
3006 ASSERT(IS_P2ALIGNED(index, pg_cnt));
3007 ASSERT(conpp == NULL);
3008 while (pg_cnt--) {
3010 ap = anon_alloc(NULL, 0);
3011 swap_xlate(ap, &ap_vp, &ap_off);
3013 ASSERT(pplist != NULL);
3014 pp = pplist;
3015 page_sub(&pplist, pp);
3016 PP_CLRFREE(pp);
3017 PP_CLRAGED(pp);
3018 conpp = pp;
3020 err = swap_getconpage(ap_vp, ap_off, PAGESIZE,
3021 (uint_t *)NULL, anon_pl, PAGESIZE, conpp, NULL,
3022 &nreloc, seg, addr, S_CREATE, cred);
3024 if (err) {
3025 ANON_LOCK_EXIT(&amp->a_rwlock);
3026 panic("anon_map_createpages: S_CREATE");
3029 ASSERT(anon_pl[0] == pp);
3030 ASSERT(nreloc == 1);
3031 pagezero(pp, 0, PAGESIZE);
3032 CPU_STATS_ADD_K(vm, zfod, 1);
3033 hat_setrefmod(pp);
3035 ASSERT(anon_get_ptr(amp->ahp, index) == NULL);
3036 (void) anon_set_ptr(amp->ahp, index, ap, ANON_SLEEP);
3038 ppa[p_index++] = pp;
3040 addr += PAGESIZE;
3041 index++;
3042 npgs--;
3044 conpp = NULL;
3045 pg_cnt = pgsz >> PAGESHIFT;
3046 p_index = p_index - pg_cnt;
3047 while (pg_cnt--) {
3048 page_downgrade(ppa[p_index++]);
3051 ANON_LOCK_EXIT(&amp->a_rwlock);
3052 return (0);
3055 static int
3056 anon_try_demote_pages(
3057 struct anon_hdr *ahp,
3058 ulong_t sidx,
3059 uint_t szc,
3060 page_t **ppa,
3061 int private)
3063 struct anon *ap;
3064 pgcnt_t pgcnt = page_get_pagecnt(szc);
3065 page_t *pp;
3066 pgcnt_t i;
3067 kmutex_t *ahmpages = NULL;
3068 int root = 0;
3069 pgcnt_t npgs;
3070 pgcnt_t curnpgs = 0;
3071 size_t ppasize = 0;
3073 ASSERT(szc != 0);
3074 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
3075 ASSERT(IS_P2ALIGNED(sidx, pgcnt));
3076 ASSERT(sidx < ahp->size);
3078 if (ppa == NULL) {
3079 ppasize = pgcnt * sizeof (page_t *);
3080 ppa = kmem_alloc(ppasize, KM_SLEEP);
3083 ap = anon_get_ptr(ahp, sidx);
3084 if (ap != NULL && private) {
3085 VM_STAT_ADD(anonvmstats.demotepages[1]);
3086 ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
3087 mutex_enter(ahmpages);
3090 if (ap != NULL && ap->an_refcnt > 1) {
3091 if (ahmpages != NULL) {
3092 VM_STAT_ADD(anonvmstats.demotepages[2]);
3093 mutex_exit(ahmpages);
3095 if (ppasize != 0) {
3096 kmem_free(ppa, ppasize);
3098 return (0);
3100 if (ahmpages != NULL) {
3101 mutex_exit(ahmpages);
3103 if (ahp->size - sidx < pgcnt) {
3104 ASSERT(private == 0);
3105 pgcnt = ahp->size - sidx;
3107 for (i = 0; i < pgcnt; i++, sidx++) {
3108 ap = anon_get_ptr(ahp, sidx);
3109 if (ap != NULL) {
3110 if (ap->an_refcnt != 1) {
3111 panic("anon_try_demote_pages: an_refcnt != 1");
3113 pp = ppa[i] = page_lookup(&ap->an_vp->v_object,
3114 ap->an_off, SE_EXCL);
3115 if (pp != NULL) {
3116 (void) hat_pageunload(pp,
3117 HAT_FORCE_PGUNLOAD);
3119 } else {
3120 ppa[i] = NULL;
3123 for (i = 0; i < pgcnt; i++) {
3124 if ((pp = ppa[i]) != NULL && pp->p_szc != 0) {
3125 ASSERT(pp->p_szc <= szc);
3126 if (!root) {
3127 VM_STAT_ADD(anonvmstats.demotepages[3]);
3128 if (curnpgs != 0)
3129 panic("anon_try_demote_pages: "
3130 "bad large page");
3132 root = 1;
3133 curnpgs = npgs =
3134 page_get_pagecnt(pp->p_szc);
3136 ASSERT(npgs <= pgcnt);
3137 ASSERT(IS_P2ALIGNED(npgs, npgs));
3138 ASSERT(!(page_pptonum(pp) & (npgs - 1)));
3139 } else {
3140 ASSERT(i > 0);
3141 ASSERT(page_pptonum(pp) - 1 ==
3142 page_pptonum(ppa[i - 1]));
3143 if ((page_pptonum(pp) & (npgs - 1)) ==
3144 npgs - 1)
3145 root = 0;
3147 ASSERT(PAGE_EXCL(pp));
3148 pp->p_szc = 0;
3149 ASSERT(curnpgs > 0);
3150 curnpgs--;
3153 if (root != 0 || curnpgs != 0)
3154 panic("anon_try_demote_pages: bad large page");
3156 for (i = 0; i < pgcnt; i++) {
3157 if ((pp = ppa[i]) != NULL) {
3158 ASSERT(!hat_page_is_mapped(pp));
3159 ASSERT(pp->p_szc == 0);
3160 page_unlock(pp);
3163 if (ppasize != 0) {
3164 kmem_free(ppa, ppasize);
3166 return (1);
3170 * anon_map_demotepages() can only be called by MAP_PRIVATE segments.
3173 anon_map_demotepages(
3174 struct anon_map *amp,
3175 ulong_t start_idx,
3176 struct seg *seg,
3177 caddr_t addr,
3178 uint_t prot,
3179 struct vpage vpage[],
3180 struct cred *cred)
3182 struct anon *ap;
3183 uint_t szc = seg->s_szc;
3184 pgcnt_t pgcnt = page_get_pagecnt(szc);
3185 size_t ppasize = pgcnt * sizeof (page_t *);
3186 page_t **ppa = kmem_alloc(ppasize, KM_SLEEP);
3187 page_t *pp;
3188 page_t *pl[2];
3189 pgcnt_t i, pg_idx;
3190 ulong_t an_idx;
3191 caddr_t vaddr;
3192 int err;
3193 int retry = 0;
3194 uint_t vpprot;
3196 ASSERT(RW_WRITE_HELD(&amp->a_rwlock));
3197 ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
3198 ASSERT(IS_P2ALIGNED(start_idx, pgcnt));
3199 ASSERT(ppa != NULL);
3200 ASSERT(szc != 0);
3201 ASSERT(szc == amp->a_szc);
3203 VM_STAT_ADD(anonvmstats.demotepages[0]);
3205 top:
3206 if (anon_try_demote_pages(amp->ahp, start_idx, szc, ppa, 1)) {
3207 kmem_free(ppa, ppasize);
3208 return (0);
3211 VM_STAT_ADD(anonvmstats.demotepages[4]);
3213 ASSERT(retry == 0); /* we can be here only once */
3215 vaddr = addr;
3216 for (pg_idx = 0, an_idx = start_idx; pg_idx < pgcnt;
3217 pg_idx++, an_idx++, vaddr += PAGESIZE) {
3218 ap = anon_get_ptr(amp->ahp, an_idx);
3219 if (ap == NULL)
3220 panic("anon_map_demotepages: no anon slot");
3221 err = anon_getpage(&ap, &vpprot, pl, PAGESIZE, seg, vaddr,
3222 S_READ, cred);
3223 if (err) {
3224 for (i = 0; i < pg_idx; i++) {
3225 if ((pp = ppa[i]) != NULL)
3226 page_unlock(pp);
3228 kmem_free(ppa, ppasize);
3229 return (err);
3231 ppa[pg_idx] = pl[0];
3234 err = anon_map_privatepages(amp, start_idx, szc, seg, addr, prot, ppa,
3235 vpage, -1, 0, cred);
3236 if (err > 0) {
3237 VM_STAT_ADD(anonvmstats.demotepages[5]);
3238 kmem_free(ppa, ppasize);
3239 return (err);
3241 ASSERT(err == 0 || err == -1);
3242 if (err == -1) {
3243 VM_STAT_ADD(anonvmstats.demotepages[6]);
3244 retry = 1;
3245 goto top;
3247 for (i = 0; i < pgcnt; i++) {
3248 ASSERT(ppa[i] != NULL);
3249 if (ppa[i]->p_szc != 0)
3250 retry = 1;
3251 page_unlock(ppa[i]);
3253 if (retry) {
3254 VM_STAT_ADD(anonvmstats.demotepages[7]);
3255 goto top;
3258 VM_STAT_ADD(anonvmstats.demotepages[8]);
3260 kmem_free(ppa, ppasize);
3262 return (0);
3266 * Free pages of shared anon map. It's assumed that anon maps don't share anon
3267 * structures with private anon maps. Therefore all anon structures should
3268 * have at most one reference at this point. This means underlying pages can
3269 * be exclusively locked and demoted or freed. If not freeing the entire
3270 * large pages demote the ends of the region we free to be able to free
3271 * subpages. Page roots correspond to aligned index positions in anon map.
3273 void
3274 anon_shmap_free_pages(struct anon_map *amp, ulong_t sidx, size_t len)
3276 ulong_t eidx = sidx + btopr(len);
3277 pgcnt_t pages = page_get_pagecnt(amp->a_szc);
3278 struct anon_hdr *ahp = amp->ahp;
3279 ulong_t tidx;
3280 size_t size;
3281 ulong_t sidx_aligned;
3282 ulong_t eidx_aligned;
3284 ASSERT(ANON_WRITE_HELD(&amp->a_rwlock));
3285 ASSERT(amp->refcnt <= 1);
3286 ASSERT(amp->a_szc > 0);
3287 ASSERT(eidx <= ahp->size);
3288 ASSERT(!anon_share(ahp, sidx, btopr(len)));
3290 if (len == 0) { /* XXX */
3291 return;
3294 sidx_aligned = P2ALIGN(sidx, pages);
3295 if (sidx_aligned != sidx ||
3296 (eidx < sidx_aligned + pages && eidx < ahp->size)) {
3297 if (!anon_try_demote_pages(ahp, sidx_aligned,
3298 amp->a_szc, NULL, 0)) {
3299 panic("anon_shmap_free_pages: demote failed");
3301 size = (eidx <= sidx_aligned + pages) ? (eidx - sidx) :
3302 P2NPHASE(sidx, pages);
3303 size <<= PAGESHIFT;
3304 anon_free(ahp, sidx, size);
3305 sidx = sidx_aligned + pages;
3306 if (eidx <= sidx) {
3307 return;
3310 eidx_aligned = P2ALIGN(eidx, pages);
3311 if (sidx < eidx_aligned) {
3312 anon_free_pages(ahp, sidx,
3313 (eidx_aligned - sidx) << PAGESHIFT,
3314 amp->a_szc);
3315 sidx = eidx_aligned;
3317 ASSERT(sidx == eidx_aligned);
3318 if (eidx == eidx_aligned) {
3319 return;
3321 tidx = eidx;
3322 if (eidx != ahp->size && anon_get_next_ptr(ahp, &tidx) != NULL &&
3323 tidx - sidx < pages) {
3324 if (!anon_try_demote_pages(ahp, sidx, amp->a_szc, NULL, 0)) {
3325 panic("anon_shmap_free_pages: demote failed");
3327 size = (eidx - sidx) << PAGESHIFT;
3328 anon_free(ahp, sidx, size);
3329 } else {
3330 anon_free_pages(ahp, sidx, pages << PAGESHIFT, amp->a_szc);
3335 * This routine should be called with amp's writer lock when there're no other
3336 * users of amp. All pcache entries of this amp must have been already
3337 * inactivated. We must not drop a_rwlock here to prevent new users from
3338 * attaching to this amp.
3340 void
3341 anonmap_purge(struct anon_map *amp)
3343 ASSERT(ANON_WRITE_HELD(&amp->a_rwlock));
3344 ASSERT(amp->refcnt <= 1);
3346 if (amp->a_softlockcnt != 0) {
3347 seg_ppurge(NULL, amp, 0);
3351 * Since all pcache entries were already inactive before this routine
3352 * was called seg_ppurge() couldn't return while there're still
3353 * entries that can be found via the list anchored at a_phead. So we
3354 * can assert this list is empty now. a_softlockcnt may be still non 0
3355 * if asynchronous thread that manages pcache already removed pcache
3356 * entries but hasn't unlocked the pages yet. If a_softlockcnt is non
3357 * 0 we just wait on a_purgecv for shamp_reclaim() to finish. Even if
3358 * a_softlockcnt is 0 we grab a_purgemtx to avoid freeing anon map
3359 * before shamp_reclaim() is done with it. a_purgemtx also taken by
3360 * shamp_reclaim() while a_softlockcnt was still not 0 acts as a
3361 * barrier that prevents anonmap_purge() to complete while
3362 * shamp_reclaim() may still be referencing this amp.
3364 ASSERT(amp->a_phead.p_lnext == &amp->a_phead);
3365 ASSERT(amp->a_phead.p_lprev == &amp->a_phead);
3367 mutex_enter(&amp->a_purgemtx);
3368 while (amp->a_softlockcnt != 0) {
3369 ASSERT(amp->a_phead.p_lnext == &amp->a_phead);
3370 ASSERT(amp->a_phead.p_lprev == &amp->a_phead);
3371 amp->a_purgewait = 1;
3372 cv_wait(&amp->a_purgecv, &amp->a_purgemtx);
3374 mutex_exit(&amp->a_purgemtx);
3376 ASSERT(amp->a_phead.p_lnext == &amp->a_phead);
3377 ASSERT(amp->a_phead.p_lprev == &amp->a_phead);
3378 ASSERT(amp->a_softlockcnt == 0);
3382 * Allocate and initialize an anon_map structure for seg
3383 * associating the given swap reservation with the new anon_map.
3385 struct anon_map *
3386 anonmap_alloc(size_t size, size_t swresv, int flags)
3388 struct anon_map *amp;
3389 int kmflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
3391 amp = kmem_cache_alloc(anonmap_cache, kmflags);
3392 if (amp == NULL) {
3393 ASSERT(kmflags == KM_NOSLEEP);
3394 return (NULL);
3397 amp->ahp = anon_create(btopr(size), flags);
3398 if (amp->ahp == NULL) {
3399 ASSERT(flags == ANON_NOSLEEP);
3400 kmem_cache_free(anonmap_cache, amp);
3401 return (NULL);
3403 amp->refcnt = 1;
3404 amp->size = size;
3405 amp->swresv = swresv;
3406 amp->locality = 0;
3407 amp->a_szc = 0;
3408 amp->a_sp = NULL;
3409 amp->a_softlockcnt = 0;
3410 amp->a_purgewait = 0;
3411 amp->a_phead.p_lnext = &amp->a_phead;
3412 amp->a_phead.p_lprev = &amp->a_phead;
3414 return (amp);
3417 void
3418 anonmap_free(struct anon_map *amp)
3420 ASSERT(amp->ahp != NULL);
3421 ASSERT(amp->refcnt == 0);
3422 ASSERT(amp->a_softlockcnt == 0);
3423 ASSERT(amp->a_phead.p_lnext == &amp->a_phead);
3424 ASSERT(amp->a_phead.p_lprev == &amp->a_phead);
3426 lgrp_shm_policy_fini(amp, NULL);
3427 anon_release(amp->ahp, btopr(amp->size));
3428 kmem_cache_free(anonmap_cache, amp);
3432 * Returns true if the app array has some empty slots.
3433 * The offp and lenp parameters are in/out parameters. On entry
3434 * these values represent the starting offset and length of the
3435 * mapping. When true is returned, these values may be modified
3436 * to be the largest range which includes empty slots.
3439 non_anon(struct anon_hdr *ahp, ulong_t anon_idx, uoff_t *offp,
3440 size_t *lenp)
3442 ulong_t i, el;
3443 ssize_t low, high;
3444 struct anon *ap;
3446 low = -1;
3447 for (i = 0, el = *lenp; i < el; i += PAGESIZE, anon_idx++) {
3448 ap = anon_get_ptr(ahp, anon_idx);
3449 if (ap == NULL) {
3450 if (low == -1)
3451 low = i;
3452 high = i;
3455 if (low != -1) {
3457 * Found at least one non-anon page.
3458 * Set up the off and len return values.
3460 if (low != 0)
3461 *offp += low;
3462 *lenp = high - low + PAGESIZE;
3463 return (1);
3465 return (0);
3469 * Return a count of the number of existing anon pages in the anon array
3470 * app in the range (off, off+len). The array and slots must be guaranteed
3471 * stable by the caller.
3473 pgcnt_t
3474 anon_pages(struct anon_hdr *ahp, ulong_t anon_index, pgcnt_t nslots)
3476 pgcnt_t cnt = 0;
3478 while (nslots-- > 0) {
3479 if ((anon_get_ptr(ahp, anon_index)) != NULL)
3480 cnt++;
3481 anon_index++;
3483 return (cnt);
3487 * Move reserved phys swap into memory swap (unreserve phys swap
3488 * and reserve mem swap by the same amount).
3489 * Used by segspt when it needs to lock reserved swap npages in memory
3492 anon_swap_adjust(pgcnt_t npages)
3494 pgcnt_t unlocked_mem_swap;
3496 mutex_enter(&anoninfo_lock);
3498 ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap);
3499 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
3501 unlocked_mem_swap = k_anoninfo.ani_mem_resv
3502 - k_anoninfo.ani_locked_swap;
3503 if (npages > unlocked_mem_swap) {
3504 spgcnt_t adjusted_swap = npages - unlocked_mem_swap;
3507 * if there is not enough unlocked mem swap we take missing
3508 * amount from phys swap and give it to mem swap
3510 if (!page_reclaim_mem(adjusted_swap, segspt_minfree, 1)) {
3511 mutex_exit(&anoninfo_lock);
3512 return (ENOMEM);
3515 k_anoninfo.ani_mem_resv += adjusted_swap;
3516 ASSERT(k_anoninfo.ani_phys_resv >= adjusted_swap);
3517 k_anoninfo.ani_phys_resv -= adjusted_swap;
3519 ANI_ADD(adjusted_swap);
3521 k_anoninfo.ani_locked_swap += npages;
3523 ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap);
3524 ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
3526 mutex_exit(&anoninfo_lock);
3528 return (0);
3532 * 'unlocked' reserved mem swap so when it is unreserved it
3533 * can be moved back phys (disk) swap
3535 void
3536 anon_swap_restore(pgcnt_t npages)
3538 mutex_enter(&anoninfo_lock);
3540 ASSERT(k_anoninfo.ani_locked_swap <= k_anoninfo.ani_mem_resv);
3542 ASSERT(k_anoninfo.ani_locked_swap >= npages);
3543 k_anoninfo.ani_locked_swap -= npages;
3545 ASSERT(k_anoninfo.ani_locked_swap <= k_anoninfo.ani_mem_resv);
3547 mutex_exit(&anoninfo_lock);
3551 * Return the pointer from the list for a
3552 * specified anon index.
3554 ulong_t *
3555 anon_get_slot(struct anon_hdr *ahp, ulong_t an_idx)
3557 struct anon **app;
3558 void **ppp;
3560 ASSERT(an_idx < ahp->size);
3563 * Single level case.
3565 if ((ahp->size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) {
3566 return ((ulong_t *)&ahp->array_chunk[an_idx]);
3567 } else {
3570 * 2 level case.
3572 ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
3573 if (*ppp == NULL) {
3574 mutex_enter(&ahp->serial_lock);
3575 ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
3576 if (*ppp == NULL)
3577 *ppp = kmem_zalloc(PAGESIZE, KM_SLEEP);
3578 mutex_exit(&ahp->serial_lock);
3580 app = *ppp;
3581 return ((ulong_t *)&app[an_idx & ANON_CHUNK_OFF]);
3585 void
3586 anon_array_enter(struct anon_map *amp, ulong_t an_idx, anon_sync_obj_t *sobj)
3588 ulong_t *ap_slot;
3589 kmutex_t *mtx;
3590 kcondvar_t *cv;
3591 int hash;
3594 * Use szc to determine anon slot(s) to appear atomic.
3595 * If szc = 0, then lock the anon slot and mark it busy.
3596 * If szc > 0, then lock the range of slots by getting the
3597 * anon_array_lock for the first anon slot, and mark only the
3598 * first anon slot busy to represent whole range being busy.
3601 ASSERT(RW_READ_HELD(&amp->a_rwlock));
3602 an_idx = P2ALIGN(an_idx, page_get_pagecnt(amp->a_szc));
3603 hash = ANON_ARRAY_HASH(amp, an_idx);
3604 sobj->sync_mutex = mtx = &anon_array_lock[hash].pad_mutex;
3605 sobj->sync_cv = cv = &anon_array_cv[hash];
3606 mutex_enter(mtx);
3607 ap_slot = anon_get_slot(amp->ahp, an_idx);
3608 while (ANON_ISBUSY(ap_slot))
3609 cv_wait(cv, mtx);
3610 ANON_SETBUSY(ap_slot);
3611 sobj->sync_data = ap_slot;
3612 mutex_exit(mtx);
3615 void
3616 anon_array_exit(anon_sync_obj_t *sobj)
3618 mutex_enter(sobj->sync_mutex);
3619 ASSERT(ANON_ISBUSY(sobj->sync_data));
3620 ANON_CLRBUSY(sobj->sync_data);
3621 if (CV_HAS_WAITERS(sobj->sync_cv))
3622 cv_broadcast(sobj->sync_cv);
3623 mutex_exit(sobj->sync_mutex);