4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
27 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/cmn_err.h>
33 #include <sys/systm.h>
34 #include <sys/tuneable.h>
42 #include <sys/atomic.h>
43 #include <vm/seg_spt.h>
44 #include <sys/debug.h>
45 #include <sys/vtrace.h>
47 #include <sys/shm_impl.h>
49 #include <sys/vmsystm.h>
50 #include <sys/policy.h>
51 #include <sys/project.h>
54 #define SEGSPTADDR (caddr_t)0x0
57 * # pages used for spt
62 * See spt_setminfree().
64 pgcnt_t segspt_minfree
= 0;
65 size_t segspt_minfree_clamp
= (1UL << 30); /* 1GB in bytes */
67 static int segspt_create(struct seg
**segpp
, void *argsp
);
68 static int segspt_unmap(struct seg
*seg
, caddr_t raddr
, size_t ssize
);
69 static void segspt_free(struct seg
*seg
);
70 static void segspt_free_pages(struct seg
*seg
, caddr_t addr
, size_t len
);
71 static lgrp_mem_policy_info_t
*segspt_getpolicy(struct seg
*seg
, caddr_t addr
);
75 segspt_badop_dup(struct seg
*seg __unused
, struct seg
*newseg __unused
)
77 panic("%s called", __func__
);
81 __NORETURN
static faultcode_t
82 segspt_badop_fault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
83 size_t len
, enum fault_type type
, enum seg_rw rw
)
85 panic("%s called", __func__
);
89 __NORETURN
static faultcode_t
90 segspt_badop_faulta(struct seg
*seg __unused
, caddr_t addr __unused
)
92 panic("%s called", __func__
);
97 segspt_badop_prot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
99 panic("%s called", __func__
);
103 __NORETURN
static int
104 segspt_badop_checkprot(struct seg
*seg
, caddr_t addr
, size_t size
, uint_t prot
)
106 panic("%s called", __func__
);
110 __NORETURN
static int
111 segspt_badop_kluster(struct seg
*seg
, caddr_t addr
, ssize_t delta
)
113 panic("%s called", __func__
);
117 __NORETURN
static size_t
118 segspt_badop_swapout(struct seg
*seg
)
120 panic("%s called", __func__
);
124 __NORETURN
static int
125 segspt_badop_sync(struct seg
*seg
, caddr_t addr
, size_t len
, int attr
,
128 panic("%s called", __func__
);
134 segspt_badop_incore(struct seg
*seg
, caddr_t addr
, size_t len
, char *vec
)
136 panic("%s called", __func__
);
140 __NORETURN
static int
141 segspt_badop_lockop(struct seg
*seg
, caddr_t addr
, size_t len
, int attr
,
142 int op
, ulong_t
*lockmap
, size_t pos
)
144 panic("%s called", __func__
);
148 __NORETURN
static int
149 segspt_badop_getprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t
*protv
)
151 panic("%s called", __func__
);
155 __NORETURN
static u_offset_t
156 segspt_badop_getoffset(struct seg
*seg
, caddr_t addr
)
158 panic("%s called", __func__
);
162 __NORETURN
static int
163 segspt_badop_gettype(struct seg
*seg
, caddr_t addr
)
165 panic("%s called", __func__
);
169 __NORETURN
static int
170 segspt_badop_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
)
172 panic("%s called", __func__
);
176 __NORETURN
static int
177 segspt_badop_advise(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t behav
)
179 panic("%s called", __func__
);
183 __NORETURN
static void
184 segspt_badop_dump(struct seg
*seg
)
186 panic("%s called", __func__
);
190 __NORETURN
static int
191 segspt_badop_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
192 struct page
***ppp
, enum lock_type type
, enum seg_rw rw
)
194 panic("%s called", __func__
);
198 __NORETURN
static int
199 segspt_badop_setpgsz(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t szc
)
201 panic("%s called", __func__
);
205 __NORETURN
static int
206 segspt_badop_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
)
208 panic("%s called", __func__
);
212 __NORETURN
static int
213 segspt_badop_capable(struct seg
*seg
, segcapability_t capability
)
215 panic("%s called", __func__
);
218 struct seg_ops segspt_ops
= {
219 segspt_badop_dup
, /* dup */
222 segspt_badop_fault
, /* fault */
223 segspt_badop_faulta
, /* faulta */
224 segspt_badop_prot
, /* setprot */
225 segspt_badop_checkprot
, /* checkprot */
226 segspt_badop_kluster
, /* kluster */
227 segspt_badop_swapout
, /* swapout */
228 segspt_badop_sync
, /* sync */
229 segspt_badop_incore
, /* incore */
230 segspt_badop_lockop
, /* lockop */
231 segspt_badop_getprot
, /* getprot */
232 segspt_badop_getoffset
, /* getoffset */
233 segspt_badop_gettype
, /* gettype */
234 segspt_badop_getvp
, /* getvp */
235 segspt_badop_advise
, /* advise */
236 segspt_badop_dump
, /* dump */
237 segspt_badop_pagelock
, /* pagelock */
238 segspt_badop_setpgsz
, /* setpgsz */
239 segspt_badop_getmemid
, /* getmemid */
240 segspt_getpolicy
, /* getpolicy */
241 segspt_badop_capable
, /* capable */
242 seg_inherit_notsup
/* inherit */
245 static int segspt_shmdup(struct seg
*seg
, struct seg
*newseg
);
246 static int segspt_shmunmap(struct seg
*seg
, caddr_t raddr
, size_t ssize
);
247 static void segspt_shmfree(struct seg
*seg
);
248 static faultcode_t
segspt_shmfault(struct hat
*hat
, struct seg
*seg
,
249 caddr_t addr
, size_t len
, enum fault_type type
, enum seg_rw rw
);
250 static faultcode_t
segspt_shmfaulta(struct seg
*seg
, caddr_t addr
);
251 static int segspt_shmsetprot(struct seg
*seg
, caddr_t addr
, size_t len
,
253 static int segspt_shmcheckprot(struct seg
*seg
, caddr_t addr
, size_t size
,
255 static int segspt_shmkluster(struct seg
*seg
, caddr_t addr
, ssize_t delta
);
256 static size_t segspt_shmswapout(struct seg
*seg
);
257 static size_t segspt_shmincore(struct seg
*seg
, caddr_t addr
, size_t len
,
259 static int segspt_shmsync(struct seg
*seg
, caddr_t addr
, size_t len
,
260 int attr
, uint_t flags
);
261 static int segspt_shmlockop(struct seg
*seg
, caddr_t addr
, size_t len
,
262 int attr
, int op
, ulong_t
*lockmap
, size_t pos
);
263 static int segspt_shmgetprot(struct seg
*seg
, caddr_t addr
, size_t len
,
265 static u_offset_t
segspt_shmgetoffset(struct seg
*seg
, caddr_t addr
);
266 static int segspt_shmgettype(struct seg
*seg
, caddr_t addr
);
267 static int segspt_shmgetvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
);
268 static int segspt_shmadvise(struct seg
*seg
, caddr_t addr
, size_t len
,
270 static void segspt_shmdump(struct seg
*seg
);
271 static int segspt_shmpagelock(struct seg
*, caddr_t
, size_t,
272 struct page
***, enum lock_type
, enum seg_rw
);
273 static int segspt_shmsetpgsz(struct seg
*, caddr_t
, size_t, uint_t
);
274 static int segspt_shmgetmemid(struct seg
*, caddr_t
, memid_t
*);
275 static lgrp_mem_policy_info_t
*segspt_shmgetpolicy(struct seg
*, caddr_t
);
276 static int segspt_shmcapable(struct seg
*, segcapability_t
);
278 struct seg_ops segspt_shmops
= {
295 segspt_shmadvise
, /* advise */
305 static void segspt_purge(struct seg
*seg
);
306 static int segspt_reclaim(void *, caddr_t
, size_t, struct page
**,
308 static int spt_anon_getpages(struct seg
*seg
, caddr_t addr
, size_t len
,
312 * This value corresponds to headroom in availrmem that ISM can never allocate
313 * (but others can). The original intent here was to prevent ISM from locking
314 * all of the remaining availrmem into memory, making forward progress
315 * difficult. It's not clear how much this matters on modern systems.
317 * The traditional default value of 5% of total memory is used, except on
318 * systems where that quickly gets ridiculous: in that case we clamp at a rather
319 * arbitrary value of 1GB.
321 * Note that since this is called lazily on the first sptcreate(), in theory,
322 * this could represent a very small value if the system is heavily loaded
323 * already. In practice, the first ISM user is pretty likely to come along
324 * earlier during the system's operation.
326 * This never gets re-figured.
331 segspt_minfree
= availrmem
/ 20;
333 if (segspt_minfree_clamp
!= 0 &&
334 segspt_minfree
> (segspt_minfree_clamp
/ PAGESIZE
))
335 segspt_minfree
= segspt_minfree_clamp
/ PAGESIZE
;
339 sptcreate(size_t size
, struct seg
**sptseg
, struct anon_map
*amp
,
340 uint_t prot
, uint_t flags
, uint_t share_szc
)
344 struct segspt_crargs sptcargs
;
346 if (segspt_minfree
== 0)
349 if (!hat_supported(HAT_SHARED_PT
, (void *)0))
353 * get a new as for this shared memory segment
356 newas
->a_proc
= NULL
;
358 sptcargs
.prot
= prot
;
359 sptcargs
.flags
= flags
;
360 sptcargs
.szc
= share_szc
;
362 * create a shared page table (spt) segment
365 if (err
= as_map(newas
, SEGSPTADDR
, size
, segspt_create
, &sptcargs
)) {
369 *sptseg
= sptcargs
.seg_spt
;
374 sptdestroy(struct as
*as
, struct anon_map
*amp
)
377 (void) as_unmap(as
, SEGSPTADDR
, amp
->size
);
382 * called from seg_free().
383 * free (i.e., unlock, unmap, return to free list)
384 * all the pages in the given seg.
387 segspt_free(struct seg
*seg
)
389 struct spt_data
*sptd
= (struct spt_data
*)seg
->s_data
;
391 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
394 if (sptd
->spt_realsize
)
395 segspt_free_pages(seg
, seg
->s_base
, sptd
->spt_realsize
);
397 if (sptd
->spt_ppa_lckcnt
) {
398 kmem_free(sptd
->spt_ppa_lckcnt
,
399 sizeof (*sptd
->spt_ppa_lckcnt
)
400 * btopr(sptd
->spt_amp
->size
));
402 kmem_free(sptd
->spt_vp
, sizeof (*sptd
->spt_vp
));
403 cv_destroy(&sptd
->spt_cv
);
404 mutex_destroy(&sptd
->spt_lock
);
405 kmem_free(sptd
, sizeof (*sptd
));
411 segspt_shmsync(struct seg
*seg
, caddr_t addr
, size_t len
, int attr
,
414 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
421 segspt_shmincore(struct seg
*seg
, caddr_t addr
, size_t len
, char *vec
)
425 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
427 struct spt_data
*sptd
;
429 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
433 sptseg
= shmd
->shm_sptseg
;
434 sptd
= sptseg
->s_data
;
436 if ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0) {
438 while (addr
< eo_seg
) {
439 /* page exists, and it's locked. */
440 *vec
++ = SEG_PAGE_INCORE
| SEG_PAGE_LOCKED
|
446 struct anon_map
*amp
= shmd
->shm_amp
;
454 anon_sync_obj_t cookie
;
456 addr
= (caddr_t
)((uintptr_t)addr
& (uintptr_t)PAGEMASK
);
457 anon_index
= seg_page(seg
, addr
);
459 if (anon_index
+ npages
> btopr(shmd
->shm_amp
->size
)) {
462 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
463 for (i
= 0; i
< npages
; i
++, anon_index
++) {
465 anon_array_enter(amp
, anon_index
, &cookie
);
466 ap
= anon_get_ptr(amp
->ahp
, anon_index
);
468 swap_xlate(ap
, &vp
, &off
);
469 anon_array_exit(&cookie
);
470 pp
= page_lookup_nowait(vp
, off
, SE_SHARED
);
472 ret
|= SEG_PAGE_INCORE
| SEG_PAGE_ANON
;
476 anon_array_exit(&cookie
);
478 if (shmd
->shm_vpage
[anon_index
] & DISM_PG_LOCKED
) {
479 ret
|= SEG_PAGE_LOCKED
;
483 ANON_LOCK_EXIT(&
->a_rwlock
);
489 segspt_unmap(struct seg
*seg
, caddr_t raddr
, size_t ssize
)
493 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
496 * seg.s_size may have been rounded up to the largest page size
498 * XXX This should be cleanedup. sptdestroy should take a length
499 * argument which should be the same as sptcreate. Then
500 * this rounding would not be needed (or is done in shm.c)
501 * Only the check for full segment will be needed.
503 * XXX -- shouldn't raddr == 0 always? These tests don't seem
504 * to be useful at all.
506 share_size
= page_get_pagesize(seg
->s_szc
);
507 ssize
= P2ROUNDUP(ssize
, share_size
);
509 if (raddr
== seg
->s_base
&& ssize
== seg
->s_size
) {
517 segspt_create(struct seg
**segpp
, void *argsp
)
519 struct seg
*seg
= *segpp
;
521 caddr_t addr
= seg
->s_base
;
522 struct spt_data
*sptd
;
523 struct segspt_crargs
*sptcargs
= (struct segspt_crargs
*)argsp
;
524 struct anon_map
*amp
= sptcargs
->amp
;
525 struct kshmid
*sp
= amp
->a_sp
;
526 struct cred
*cred
= CRED();
527 ulong_t i
, j
, anon_index
= 0;
528 pgcnt_t npages
= btopr(amp
->size
);
537 proc_t
*procp
= curproc
;
538 rctl_qty_t lockedbytes
= 0;
542 * We are holding the a_lock on the underlying dummy as,
543 * so we can make calls to the HAT layer.
545 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
548 if ((sptcargs
->flags
& SHM_PAGEABLE
) == 0) {
549 if (err
= anon_swap_adjust(npages
))
554 if ((sptd
= kmem_zalloc(sizeof (*sptd
), KM_NOSLEEP
)) == NULL
)
558 if ((sptcargs
->flags
& SHM_PAGEABLE
) == 0) {
559 if ((ppa
= kmem_zalloc(((sizeof (page_t
*)) * npages
),
560 KM_NOSLEEP
)) == NULL
)
564 mutex_init(&sptd
->spt_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
566 if ((vp
= kmem_zalloc(sizeof (*vp
), KM_NOSLEEP
)) == NULL
)
569 seg
->s_ops
= &segspt_ops
;
572 sptd
->spt_prot
= sptcargs
->prot
;
573 sptd
->spt_flags
= sptcargs
->flags
;
574 seg
->s_data
= (caddr_t
)sptd
;
575 sptd
->spt_ppa
= NULL
;
576 sptd
->spt_ppa_lckcnt
= NULL
;
577 seg
->s_szc
= sptcargs
->szc
;
578 cv_init(&sptd
->spt_cv
, NULL
, CV_DEFAULT
, NULL
);
581 ANON_LOCK_ENTER(&
->a_rwlock
, RW_WRITER
);
582 if (seg
->s_szc
> amp
->a_szc
) {
583 amp
->a_szc
= seg
->s_szc
;
585 ANON_LOCK_EXIT(&
->a_rwlock
);
588 * Set policy to affect initial allocation of pages in
589 * anon_map_createpages()
591 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT
, amp
, anon_index
,
592 NULL
, 0, ptob(npages
));
594 if (sptcargs
->flags
& SHM_PAGEABLE
) {
596 pgcnt_t new_npgs
, more_pgs
;
597 struct anon_hdr
*nahp
;
600 share_sz
= page_get_pagesize(seg
->s_szc
);
601 if (!IS_P2ALIGNED(amp
->size
, share_sz
)) {
603 * We are rounding up the size of the anon array
604 * on 4 M boundary because we always create 4 M
605 * of page(s) when locking, faulting pages and we
606 * don't have to check for all corner cases e.g.
607 * if there is enough space to allocate 4 M
610 new_npgs
= btop(P2ROUNDUP(amp
->size
, share_sz
));
611 more_pgs
= new_npgs
- npages
;
614 * The zone will never be NULL, as a fully created
615 * shm always has an owning zone.
617 zone
= sp
->shm_perm
.ipc_zone_ref
.zref_zone
;
618 ASSERT(zone
!= NULL
);
619 if (anon_resv_zone(ptob(more_pgs
), zone
) == 0) {
624 nahp
= anon_create(new_npgs
, ANON_SLEEP
);
625 ANON_LOCK_ENTER(&
->a_rwlock
, RW_WRITER
);
626 (void) anon_copy_ptr(amp
->ahp
, 0, nahp
, 0, npages
,
628 anon_release(amp
->ahp
, npages
);
630 ASSERT(amp
->swresv
== ptob(npages
));
631 amp
->swresv
= amp
->size
= ptob(new_npgs
);
632 ANON_LOCK_EXIT(&
->a_rwlock
);
636 sptd
->spt_ppa_lckcnt
= kmem_zalloc(npages
*
637 sizeof (*sptd
->spt_ppa_lckcnt
), KM_SLEEP
);
638 sptd
->spt_pcachecnt
= 0;
639 sptd
->spt_realsize
= ptob(npages
);
640 sptcargs
->seg_spt
= seg
;
645 * get array of pages for each anon slot in amp
647 if ((err
= anon_map_createpages(amp
, anon_index
, ptob(npages
), ppa
,
648 seg
, addr
, S_CREATE
, cred
)) != 0)
651 mutex_enter(&sp
->shm_mlock
);
653 /* May be partially locked, so, count bytes to charge for locking */
654 for (i
= 0; i
< npages
; i
++)
655 if (ppa
[i
]->p_lckcnt
== 0)
656 lockedbytes
+= PAGESIZE
;
658 proj
= sp
->shm_perm
.ipc_proj
;
660 if (lockedbytes
> 0) {
661 mutex_enter(&procp
->p_lock
);
662 if (rctl_incr_locked_mem(procp
, proj
, lockedbytes
, 0)) {
663 mutex_exit(&procp
->p_lock
);
664 mutex_exit(&sp
->shm_mlock
);
665 for (i
= 0; i
< npages
; i
++)
670 mutex_exit(&procp
->p_lock
);
674 * addr is initial address corresponding to the first page on ppa list
676 for (i
= 0; i
< npages
; i
++) {
677 /* attempt to lock all pages */
678 if (page_pp_lock(ppa
[i
], 0, 1) == 0) {
680 * if unable to lock any page, unlock all
681 * of them and return error
683 for (j
= 0; j
< i
; j
++)
684 page_pp_unlock(ppa
[j
], 0, 1);
685 for (i
= 0; i
< npages
; i
++)
687 rctl_decr_locked_mem(NULL
, proj
, lockedbytes
, 0);
688 mutex_exit(&sp
->shm_mlock
);
693 mutex_exit(&sp
->shm_mlock
);
696 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
697 * for the entire life of the segment. For example platforms
698 * that do not support Dynamic Reconfiguration.
700 hat_flags
= HAT_LOAD_SHARE
;
701 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP
, NULL
))
702 hat_flags
|= HAT_LOAD_LOCK
;
705 * Load translations one lare page at a time
706 * to make sure we don't create mappings bigger than
707 * segment's size code in case underlying pages
708 * are shared with segvn's segment that uses bigger
709 * size code than we do.
711 pgsz
= page_get_pagesize(seg
->s_szc
);
712 pgcnt
= page_get_pagecnt(seg
->s_szc
);
713 for (a
= addr
, pidx
= 0; pidx
< npages
; a
+= pgsz
, pidx
+= pgcnt
) {
714 sz
= MIN(pgsz
, ptob(npages
- pidx
));
715 hat_memload_array(seg
->s_as
->a_hat
, a
, sz
,
716 &ppa
[pidx
], sptd
->spt_prot
, hat_flags
);
720 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
721 * we will leave the pages locked SE_SHARED for the life
722 * of the ISM segment. This will prevent any calls to
723 * hat_pageunload() on this ISM segment for those platforms.
725 if (!(hat_flags
& HAT_LOAD_LOCK
)) {
727 * On platforms that support HAT_DYNAMIC_ISM_UNMAP,
728 * we no longer need to hold the SE_SHARED lock on the pages,
729 * since L_PAGELOCK and F_SOFTLOCK calls will grab the
730 * SE_SHARED lock on the pages as necessary.
732 for (i
= 0; i
< npages
; i
++)
735 sptd
->spt_pcachecnt
= 0;
736 kmem_free(ppa
, ((sizeof (page_t
*)) * npages
));
737 sptd
->spt_realsize
= ptob(npages
);
738 atomic_add_long(&spt_used
, npages
);
739 sptcargs
->seg_spt
= seg
;
744 kmem_free(vp
, sizeof (*vp
));
745 cv_destroy(&sptd
->spt_cv
);
747 mutex_destroy(&sptd
->spt_lock
);
748 if ((sptcargs
->flags
& SHM_PAGEABLE
) == 0)
749 kmem_free(ppa
, (sizeof (*ppa
) * npages
));
751 kmem_free(sptd
, sizeof (*sptd
));
753 if ((sptcargs
->flags
& SHM_PAGEABLE
) == 0)
754 anon_swap_restore(npages
);
760 segspt_free_pages(struct seg
*seg
, caddr_t addr
, size_t len
)
763 struct spt_data
*sptd
= (struct spt_data
*)seg
->s_data
;
766 struct anon_map
*amp
;
772 pgcnt_t pgs
, curnpgs
= 0;
774 rctl_qty_t unlocked_bytes
= 0;
778 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
780 len
= P2ROUNDUP(len
, PAGESIZE
);
784 hat_flags
= HAT_UNLOAD_UNLOCK
| HAT_UNLOAD_UNMAP
;
785 if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP
, (void *)0)) ||
786 (sptd
->spt_flags
& SHM_PAGEABLE
)) {
787 hat_flags
= HAT_UNLOAD_UNMAP
;
790 hat_unload(seg
->s_as
->a_hat
, addr
, len
, hat_flags
);
793 if (sptd
->spt_flags
& SHM_PAGEABLE
)
794 npages
= btop(amp
->size
);
801 if ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0) {
803 proj
= sp
->shm_perm
.ipc_proj
;
804 mutex_enter(&sp
->shm_mlock
);
806 for (anon_idx
= 0; anon_idx
< npages
; anon_idx
++) {
807 if ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0) {
808 if ((ap
= anon_get_ptr(amp
->ahp
, anon_idx
)) == NULL
) {
809 panic("segspt_free_pages: null app");
813 if ((ap
= anon_get_next_ptr(amp
->ahp
, &anon_idx
))
817 ASSERT(ANON_ISBUSY(anon_get_slot(amp
->ahp
, anon_idx
)) == 0);
818 swap_xlate(ap
, &vp
, &off
);
821 * If this platform supports HAT_DYNAMIC_ISM_UNMAP,
822 * the pages won't be having SE_SHARED lock at this
825 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
826 * the pages are still held SE_SHARED locked from the
827 * original segspt_create()
829 * Our goal is to get SE_EXCL lock on each page, remove
830 * permanent lock on it and invalidate the page.
832 if ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0) {
833 if (hat_flags
== HAT_UNLOAD_UNMAP
)
834 pp
= page_lookup(vp
, off
, SE_EXCL
);
836 if ((pp
= page_find(vp
, off
)) == NULL
) {
837 panic("segspt_free_pages: "
841 if (!page_tryupgrade(pp
)) {
843 pp
= page_lookup(vp
, off
, SE_EXCL
);
847 panic("segspt_free_pages: "
848 "page not in the system");
851 ASSERT(pp
->p_lckcnt
> 0);
852 page_pp_unlock(pp
, 0, 1);
853 if (pp
->p_lckcnt
== 0)
854 unlocked_bytes
+= PAGESIZE
;
856 if ((pp
= page_lookup(vp
, off
, SE_EXCL
)) == NULL
)
860 * It's logical to invalidate the pages here as in most cases
861 * these were created by segspt.
863 if (pp
->p_szc
!= 0) {
865 ASSERT(curnpgs
== 0);
868 pgs
= curnpgs
= page_get_pagecnt(pp
->p_szc
);
870 ASSERT(IS_P2ALIGNED(pgs
, pgs
));
871 ASSERT(!(page_pptonum(pp
) & (pgs
- 1)));
873 } else if ((page_pptonum(pp
) & (pgs
- 1)) == pgs
- 1) {
874 ASSERT(curnpgs
== 1);
875 ASSERT(page_pptonum(pp
) ==
876 page_pptonum(rootpp
) + (pgs
- 1));
877 page_destroy_pages(rootpp
);
882 ASSERT(page_pptonum(pp
) ==
883 page_pptonum(rootpp
) + (pgs
- curnpgs
));
887 if (root
!= 0 || curnpgs
!= 0) {
888 panic("segspt_free_pages: bad large page");
892 * Before destroying the pages, we need to take care
893 * of the rctl locked memory accounting. For that
894 * we need to calculte the unlocked_bytes.
896 if (pp
->p_lckcnt
> 0)
897 unlocked_bytes
+= PAGESIZE
;
898 /*LINTED: constant in conditional context */
899 VN_DISPOSE(pp
, B_INVAL
, 0, kcred
);
902 if ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0) {
903 if (unlocked_bytes
> 0)
904 rctl_decr_locked_mem(NULL
, proj
, unlocked_bytes
, 0);
905 mutex_exit(&sp
->shm_mlock
);
907 if (root
!= 0 || curnpgs
!= 0) {
908 panic("segspt_free_pages: bad large page");
913 * mark that pages have been released
915 sptd
->spt_realsize
= 0;
917 if ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0) {
918 atomic_add_long(&spt_used
, -npages
);
919 anon_swap_restore(npages
);
924 * Get memory allocation policy info for specified address in given segment
926 static lgrp_mem_policy_info_t
*
927 segspt_getpolicy(struct seg
*seg
, caddr_t addr
)
929 struct anon_map
*amp
;
931 lgrp_mem_policy_info_t
*policy_info
;
932 struct spt_data
*spt_data
;
937 * Get anon_map from segspt
939 * Assume that no lock needs to be held on anon_map, since
940 * it should be protected by its reference count which must be
941 * nonzero for an existing segment
942 * Need to grab readers lock on policy tree though
944 spt_data
= (struct spt_data
*)seg
->s_data
;
945 if (spt_data
== NULL
)
947 amp
= spt_data
->spt_amp
;
948 ASSERT(amp
->refcnt
!= 0);
953 * Assume starting anon index of 0
955 anon_index
= seg_page(seg
, addr
);
956 policy_info
= lgrp_shm_policy_get(amp
, anon_index
, NULL
, 0);
958 return (policy_info
);
963 * Return locked pages over a given range.
965 * We will cache all DISM locked pages and save the pplist for the
966 * entire segment in the ppa field of the underlying DISM segment structure.
967 * Later, during a call to segspt_reclaim() we will use this ppa array
968 * to page_unlock() all of the pages and then we will free this ppa list.
972 segspt_dismpagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
973 struct page
***ppp
, enum lock_type type
, enum seg_rw rw
)
975 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
976 struct seg
*sptseg
= shmd
->shm_sptseg
;
977 struct spt_data
*sptd
= sptseg
->s_data
;
978 pgcnt_t pg_idx
, npages
, tot_npages
, npgs
;
979 struct page
**pplist
, **pl
, **ppa
, *pp
;
980 struct anon_map
*amp
;
987 pgcnt_t claim_availrmem
= 0;
990 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
991 ASSERT(type
== L_PAGELOCK
|| type
== L_PAGEUNLOCK
);
994 * We want to lock/unlock the entire ISM segment. Therefore,
995 * we will be using the underlying sptseg and it's base address
996 * and length for the caching arguments.
1001 pg_idx
= seg_page(seg
, addr
);
1002 npages
= btopr(len
);
1005 * check if the request is larger than number of pages covered
1008 if (pg_idx
+ npages
> btopr(sptd
->spt_amp
->size
)) {
1013 if (type
== L_PAGEUNLOCK
) {
1014 ASSERT(sptd
->spt_ppa
!= NULL
);
1016 seg_pinactive(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1017 sptd
->spt_ppa
, S_WRITE
, SEGP_FORCE_WIRED
, segspt_reclaim
);
1020 * If someone is blocked while unmapping, we purge
1021 * segment page cache and thus reclaim pplist synchronously
1022 * without waiting for seg_pasync_thread. This speeds up
1023 * unmapping in cases where munmap(2) is called, while
1024 * raw async i/o is still in progress or where a thread
1025 * exits on data fault in a multithreaded application.
1027 if ((sptd
->spt_flags
& DISM_PPA_CHANGED
) ||
1028 (AS_ISUNMAPWAIT(seg
->s_as
) &&
1029 shmd
->shm_softlockcnt
> 0)) {
1035 /* The L_PAGELOCK case ... */
1037 if (sptd
->spt_flags
& DISM_PPA_CHANGED
) {
1040 * for DISM ppa needs to be rebuild since
1041 * number of locked pages could be changed
1048 * First try to find pages in segment page cache, without
1049 * holding the segment lock.
1051 pplist
= seg_plookup(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1052 S_WRITE
, SEGP_FORCE_WIRED
);
1053 if (pplist
!= NULL
) {
1054 ASSERT(sptd
->spt_ppa
!= NULL
);
1055 ASSERT(sptd
->spt_ppa
== pplist
);
1056 ppa
= sptd
->spt_ppa
;
1057 for (an_idx
= pg_idx
; an_idx
< pg_idx
+ npages
; ) {
1058 if (ppa
[an_idx
] == NULL
) {
1059 seg_pinactive(seg
, NULL
, seg
->s_base
,
1060 sptd
->spt_amp
->size
, ppa
,
1061 S_WRITE
, SEGP_FORCE_WIRED
, segspt_reclaim
);
1065 if ((szc
= ppa
[an_idx
]->p_szc
) != 0) {
1066 npgs
= page_get_pagecnt(szc
);
1067 an_idx
= P2ROUNDUP(an_idx
+ 1, npgs
);
1073 * Since we cache the entire DISM segment, we want to
1074 * set ppp to point to the first slot that corresponds
1075 * to the requested addr, i.e. pg_idx.
1077 *ppp
= &(sptd
->spt_ppa
[pg_idx
]);
1081 mutex_enter(&sptd
->spt_lock
);
1083 * try to find pages in segment page cache with mutex
1085 pplist
= seg_plookup(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1086 S_WRITE
, SEGP_FORCE_WIRED
);
1087 if (pplist
!= NULL
) {
1088 ASSERT(sptd
->spt_ppa
!= NULL
);
1089 ASSERT(sptd
->spt_ppa
== pplist
);
1090 ppa
= sptd
->spt_ppa
;
1091 for (an_idx
= pg_idx
; an_idx
< pg_idx
+ npages
; ) {
1092 if (ppa
[an_idx
] == NULL
) {
1093 mutex_exit(&sptd
->spt_lock
);
1094 seg_pinactive(seg
, NULL
, seg
->s_base
,
1095 sptd
->spt_amp
->size
, ppa
,
1096 S_WRITE
, SEGP_FORCE_WIRED
, segspt_reclaim
);
1100 if ((szc
= ppa
[an_idx
]->p_szc
) != 0) {
1101 npgs
= page_get_pagecnt(szc
);
1102 an_idx
= P2ROUNDUP(an_idx
+ 1, npgs
);
1108 * Since we cache the entire DISM segment, we want to
1109 * set ppp to point to the first slot that corresponds
1110 * to the requested addr, i.e. pg_idx.
1112 mutex_exit(&sptd
->spt_lock
);
1113 *ppp
= &(sptd
->spt_ppa
[pg_idx
]);
1116 if (seg_pinsert_check(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1117 SEGP_FORCE_WIRED
) == SEGP_FAIL
) {
1118 mutex_exit(&sptd
->spt_lock
);
1124 * No need to worry about protections because DISM pages are always rw.
1127 amp
= sptd
->spt_amp
;
1130 * Do we need to build the ppa array?
1132 if (sptd
->spt_ppa
== NULL
) {
1133 pgcnt_t lpg_cnt
= 0;
1136 tot_npages
= btopr(sptd
->spt_amp
->size
);
1138 ASSERT(sptd
->spt_pcachecnt
== 0);
1139 pplist
= kmem_zalloc(sizeof (page_t
*) * tot_npages
, KM_SLEEP
);
1142 ANON_LOCK_ENTER(&
->a_rwlock
, RW_WRITER
);
1143 for (an_idx
= 0; an_idx
< tot_npages
; ) {
1144 ap
= anon_get_ptr(amp
->ahp
, an_idx
);
1146 * Cache only mlocked pages. For large pages
1147 * if one (constituent) page is mlocked
1148 * all pages for that large page
1149 * are cached also. This is for quick
1150 * lookups of ppa array;
1152 if ((ap
!= NULL
) && (lpg_cnt
!= 0 ||
1153 (sptd
->spt_ppa_lckcnt
[an_idx
] != 0))) {
1155 swap_xlate(ap
, &vp
, &off
);
1156 pp
= page_lookup(vp
, off
, SE_SHARED
);
1161 * For a small page, we are done --
1162 * lpg_count is reset to 0 below.
1164 * For a large page, we are guaranteed
1165 * to find the anon structures of all
1166 * constituent pages and a non-zero
1167 * lpg_cnt ensures that we don't test
1168 * for mlock for these. We are done
1169 * when lpg_count reaches (npgs + 1).
1170 * If we are not the first constituent
1171 * page, restart at the first one.
1173 npgs
= page_get_pagecnt(pp
->p_szc
);
1174 if (!IS_P2ALIGNED(an_idx
, npgs
)) {
1175 an_idx
= P2ALIGN(an_idx
, npgs
);
1180 if (++lpg_cnt
> npgs
)
1184 * availrmem is decremented only
1185 * for unlocked pages
1187 if (sptd
->spt_ppa_lckcnt
[an_idx
] == 0)
1189 pplist
[an_idx
] = pp
;
1193 ANON_LOCK_EXIT(&
->a_rwlock
);
1195 if (claim_availrmem
) {
1196 mutex_enter(&freemem_lock
);
1197 if (availrmem
< tune
.t_minarmem
+ claim_availrmem
) {
1198 mutex_exit(&freemem_lock
);
1200 claim_availrmem
= 0;
1203 availrmem
-= claim_availrmem
;
1205 mutex_exit(&freemem_lock
);
1211 * We already have a valid ppa[].
1218 ret
= seg_pinsert(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1219 sptd
->spt_amp
->size
, pl
, S_WRITE
, SEGP_FORCE_WIRED
,
1221 if (ret
== SEGP_FAIL
) {
1223 * seg_pinsert failed. We return
1224 * ENOTSUP, so that the as_pagelock() code will
1225 * then try the slower F_SOFTLOCK path.
1229 * No one else has referenced the ppa[].
1230 * We created it and we need to destroy it.
1232 sptd
->spt_ppa
= NULL
;
1239 * In either case, we increment softlockcnt on the 'real' segment.
1241 sptd
->spt_pcachecnt
++;
1242 atomic_inc_ulong((ulong_t
*)(&(shmd
->shm_softlockcnt
)));
1244 ppa
= sptd
->spt_ppa
;
1245 for (an_idx
= pg_idx
; an_idx
< pg_idx
+ npages
; ) {
1246 if (ppa
[an_idx
] == NULL
) {
1247 mutex_exit(&sptd
->spt_lock
);
1248 seg_pinactive(seg
, NULL
, seg
->s_base
,
1249 sptd
->spt_amp
->size
,
1250 pl
, S_WRITE
, SEGP_FORCE_WIRED
, segspt_reclaim
);
1254 if ((szc
= ppa
[an_idx
]->p_szc
) != 0) {
1255 npgs
= page_get_pagecnt(szc
);
1256 an_idx
= P2ROUNDUP(an_idx
+ 1, npgs
);
1262 * We can now drop the sptd->spt_lock since the ppa[]
1263 * exists and we have incremented pacachecnt.
1265 mutex_exit(&sptd
->spt_lock
);
1268 * Since we cache the entire segment, we want to
1269 * set ppp to point to the first slot that corresponds
1270 * to the requested addr, i.e. pg_idx.
1272 *ppp
= &(sptd
->spt_ppa
[pg_idx
]);
1277 * We will only reach this code if we tried and failed.
1279 * And we can drop the lock on the dummy seg, once we've failed
1280 * to set up a new ppa[].
1282 mutex_exit(&sptd
->spt_lock
);
1285 if (claim_availrmem
) {
1286 mutex_enter(&freemem_lock
);
1287 availrmem
+= claim_availrmem
;
1288 mutex_exit(&freemem_lock
);
1292 * We created pl and we need to destroy it.
1295 for (an_idx
= 0; an_idx
< tot_npages
; an_idx
++) {
1296 if (pplist
[an_idx
] != NULL
)
1297 page_unlock(pplist
[an_idx
]);
1299 kmem_free(pl
, sizeof (page_t
*) * tot_npages
);
1302 if (shmd
->shm_softlockcnt
<= 0) {
1303 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
1304 mutex_enter(&seg
->s_as
->a_contents
);
1305 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
1306 AS_CLRUNMAPWAIT(seg
->s_as
);
1307 cv_broadcast(&seg
->s_as
->a_cv
);
1309 mutex_exit(&seg
->s_as
->a_contents
);
1319 * return locked pages over a given range.
1321 * We will cache the entire ISM segment and save the pplist for the
1322 * entire segment in the ppa field of the underlying ISM segment structure.
1323 * Later, during a call to segspt_reclaim() we will use this ppa array
1324 * to page_unlock() all of the pages and then we will free this ppa list.
1328 segspt_shmpagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
1329 struct page
***ppp
, enum lock_type type
, enum seg_rw rw
)
1331 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
1332 struct seg
*sptseg
= shmd
->shm_sptseg
;
1333 struct spt_data
*sptd
= sptseg
->s_data
;
1334 pgcnt_t np
, page_index
, npages
;
1335 caddr_t a
, spt_base
;
1336 struct page
**pplist
, **pl
, *pp
;
1337 struct anon_map
*amp
;
1340 uint_t pl_built
= 0;
1345 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
1346 ASSERT(type
== L_PAGELOCK
|| type
== L_PAGEUNLOCK
);
1350 * We want to lock/unlock the entire ISM segment. Therefore,
1351 * we will be using the underlying sptseg and it's base address
1352 * and length for the caching arguments.
1357 if (sptd
->spt_flags
& SHM_PAGEABLE
) {
1358 return (segspt_dismpagelock(seg
, addr
, len
, ppp
, type
, rw
));
1361 page_index
= seg_page(seg
, addr
);
1362 npages
= btopr(len
);
1365 * check if the request is larger than number of pages covered
1368 if (page_index
+ npages
> btopr(sptd
->spt_amp
->size
)) {
1373 if (type
== L_PAGEUNLOCK
) {
1375 ASSERT(sptd
->spt_ppa
!= NULL
);
1377 seg_pinactive(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1378 sptd
->spt_ppa
, S_WRITE
, SEGP_FORCE_WIRED
, segspt_reclaim
);
1381 * If someone is blocked while unmapping, we purge
1382 * segment page cache and thus reclaim pplist synchronously
1383 * without waiting for seg_pasync_thread. This speeds up
1384 * unmapping in cases where munmap(2) is called, while
1385 * raw async i/o is still in progress or where a thread
1386 * exits on data fault in a multithreaded application.
1388 if (AS_ISUNMAPWAIT(seg
->s_as
) && (shmd
->shm_softlockcnt
> 0)) {
1394 /* The L_PAGELOCK case... */
1397 * First try to find pages in segment page cache, without
1398 * holding the segment lock.
1400 pplist
= seg_plookup(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1401 S_WRITE
, SEGP_FORCE_WIRED
);
1402 if (pplist
!= NULL
) {
1403 ASSERT(sptd
->spt_ppa
== pplist
);
1404 ASSERT(sptd
->spt_ppa
[page_index
]);
1406 * Since we cache the entire ISM segment, we want to
1407 * set ppp to point to the first slot that corresponds
1408 * to the requested addr, i.e. page_index.
1410 *ppp
= &(sptd
->spt_ppa
[page_index
]);
1414 mutex_enter(&sptd
->spt_lock
);
1417 * try to find pages in segment page cache
1419 pplist
= seg_plookup(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1420 S_WRITE
, SEGP_FORCE_WIRED
);
1421 if (pplist
!= NULL
) {
1422 ASSERT(sptd
->spt_ppa
== pplist
);
1424 * Since we cache the entire segment, we want to
1425 * set ppp to point to the first slot that corresponds
1426 * to the requested addr, i.e. page_index.
1428 mutex_exit(&sptd
->spt_lock
);
1429 *ppp
= &(sptd
->spt_ppa
[page_index
]);
1433 if (seg_pinsert_check(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1434 SEGP_FORCE_WIRED
) == SEGP_FAIL
) {
1435 mutex_exit(&sptd
->spt_lock
);
1441 * No need to worry about protections because ISM pages
1447 * Do we need to build the ppa array?
1449 if (sptd
->spt_ppa
== NULL
) {
1450 ASSERT(sptd
->spt_ppa
== pplist
);
1452 spt_base
= sptseg
->s_base
;
1456 * availrmem is decremented once during anon_swap_adjust()
1457 * and is incremented during the anon_unresv(), which is
1458 * called from shm_rm_amp() when the segment is destroyed.
1460 amp
= sptd
->spt_amp
;
1461 ASSERT(amp
!= NULL
);
1463 /* pcachecnt is protected by sptd->spt_lock */
1464 ASSERT(sptd
->spt_pcachecnt
== 0);
1465 pplist
= kmem_zalloc(sizeof (page_t
*)
1466 * btopr(sptd
->spt_amp
->size
), KM_SLEEP
);
1469 anon_index
= seg_page(sptseg
, spt_base
);
1471 ANON_LOCK_ENTER(&
->a_rwlock
, RW_WRITER
);
1472 for (a
= spt_base
; a
< (spt_base
+ sptd
->spt_amp
->size
);
1473 a
+= PAGESIZE
, anon_index
++, pplist
++) {
1474 ap
= anon_get_ptr(amp
->ahp
, anon_index
);
1476 swap_xlate(ap
, &vp
, &off
);
1477 pp
= page_lookup(vp
, off
, SE_SHARED
);
1481 ANON_LOCK_EXIT(&
->a_rwlock
);
1483 if (a
< (spt_base
+ sptd
->spt_amp
->size
)) {
1490 * We already have a valid ppa[].
1497 ret
= seg_pinsert(seg
, NULL
, seg
->s_base
, sptd
->spt_amp
->size
,
1498 sptd
->spt_amp
->size
, pl
, S_WRITE
, SEGP_FORCE_WIRED
,
1500 if (ret
== SEGP_FAIL
) {
1502 * seg_pinsert failed. We return
1503 * ENOTSUP, so that the as_pagelock() code will
1504 * then try the slower F_SOFTLOCK path.
1508 * No one else has referenced the ppa[].
1509 * We created it and we need to destroy it.
1511 sptd
->spt_ppa
= NULL
;
1518 * In either case, we increment softlockcnt on the 'real' segment.
1520 sptd
->spt_pcachecnt
++;
1521 atomic_inc_ulong((ulong_t
*)(&(shmd
->shm_softlockcnt
)));
1524 * We can now drop the sptd->spt_lock since the ppa[]
1525 * exists and we have incremented pacachecnt.
1527 mutex_exit(&sptd
->spt_lock
);
1530 * Since we cache the entire segment, we want to
1531 * set ppp to point to the first slot that corresponds
1532 * to the requested addr, i.e. page_index.
1534 *ppp
= &(sptd
->spt_ppa
[page_index
]);
1539 * We will only reach this code if we tried and failed.
1541 * And we can drop the lock on the dummy seg, once we've failed
1542 * to set up a new ppa[].
1544 mutex_exit(&sptd
->spt_lock
);
1548 * We created pl and we need to destroy it.
1551 np
= (((uintptr_t)(a
- spt_base
)) >> PAGESHIFT
);
1553 page_unlock(*pplist
);
1557 kmem_free(pl
, sizeof (page_t
*) * btopr(sptd
->spt_amp
->size
));
1559 if (shmd
->shm_softlockcnt
<= 0) {
1560 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
1561 mutex_enter(&seg
->s_as
->a_contents
);
1562 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
1563 AS_CLRUNMAPWAIT(seg
->s_as
);
1564 cv_broadcast(&seg
->s_as
->a_cv
);
1566 mutex_exit(&seg
->s_as
->a_contents
);
1574 * purge any cached pages in the I/O page cache
1577 segspt_purge(struct seg
*seg
)
1579 seg_ppurge(seg
, NULL
, SEGP_FORCE_WIRED
);
1583 segspt_reclaim(void *ptag
, caddr_t addr
, size_t len
, struct page
**pplist
,
1584 enum seg_rw rw
, int async
)
1586 struct seg
*seg
= (struct seg
*)ptag
;
1587 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
1589 struct spt_data
*sptd
;
1590 pgcnt_t npages
, i
, free_availrmem
= 0;
1596 sptseg
= shmd
->shm_sptseg
;
1597 sptd
= sptseg
->s_data
;
1598 npages
= (len
>> PAGESHIFT
);
1600 ASSERT(sptd
->spt_pcachecnt
!= 0);
1601 ASSERT(sptd
->spt_ppa
== pplist
);
1602 ASSERT(npages
== btopr(sptd
->spt_amp
->size
));
1603 ASSERT(async
|| AS_LOCK_HELD(seg
->s_as
));
1606 * Acquire the lock on the dummy seg and destroy the
1607 * ppa array IF this is the last pcachecnt.
1609 mutex_enter(&sptd
->spt_lock
);
1610 if (--sptd
->spt_pcachecnt
== 0) {
1611 for (i
= 0; i
< npages
; i
++) {
1612 if (pplist
[i
] == NULL
) {
1615 if (rw
== S_WRITE
) {
1616 hat_setrefmod(pplist
[i
]);
1618 hat_setref(pplist
[i
]);
1620 if ((sptd
->spt_flags
& SHM_PAGEABLE
) &&
1621 (sptd
->spt_ppa_lckcnt
[i
] == 0))
1623 page_unlock(pplist
[i
]);
1625 if ((sptd
->spt_flags
& SHM_PAGEABLE
) && free_availrmem
) {
1626 mutex_enter(&freemem_lock
);
1627 availrmem
+= free_availrmem
;
1628 mutex_exit(&freemem_lock
);
1631 * Since we want to cach/uncache the entire ISM segment,
1632 * we will track the pplist in a segspt specific field
1633 * ppa, that is initialized at the time we add an entry to
1636 ASSERT(sptd
->spt_pcachecnt
== 0);
1637 kmem_free(pplist
, sizeof (page_t
*) * npages
);
1638 sptd
->spt_ppa
= NULL
;
1639 sptd
->spt_flags
&= ~DISM_PPA_CHANGED
;
1641 cv_broadcast(&sptd
->spt_cv
);
1644 mutex_exit(&sptd
->spt_lock
);
1647 * If we are pcache async thread or called via seg_ppurge_wiredpp() we
1648 * may not hold AS lock (in this case async argument is not 0). This
1649 * means if softlockcnt drops to 0 after the decrement below address
1650 * space may get freed. We can't allow it since after softlock
1651 * derement to 0 we still need to access as structure for possible
1652 * wakeup of unmap waiters. To prevent the disappearance of as we take
1653 * this segment's shm_segfree_syncmtx. segspt_shmfree() also takes
1654 * this mutex as a barrier to make sure this routine completes before
1657 * The second complication we have to deal with in async case is a
1658 * possibility of missed wake up of unmap wait thread. When we don't
1659 * hold as lock here we may take a_contents lock before unmap wait
1660 * thread that was first to see softlockcnt was still not 0. As a
1661 * result we'll fail to wake up an unmap wait thread. To avoid this
1662 * race we set nounmapwait flag in as structure if we drop softlockcnt
1663 * to 0 if async is not 0. unmapwait thread
1664 * will not block if this flag is set.
1667 mutex_enter(&shmd
->shm_segfree_syncmtx
);
1670 * Now decrement softlockcnt.
1672 ASSERT(shmd
->shm_softlockcnt
> 0);
1673 atomic_dec_ulong((ulong_t
*)(&(shmd
->shm_softlockcnt
)));
1675 if (shmd
->shm_softlockcnt
<= 0) {
1676 if (async
|| AS_ISUNMAPWAIT(seg
->s_as
)) {
1677 mutex_enter(&seg
->s_as
->a_contents
);
1679 AS_SETNOUNMAPWAIT(seg
->s_as
);
1680 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
1681 AS_CLRUNMAPWAIT(seg
->s_as
);
1682 cv_broadcast(&seg
->s_as
->a_cv
);
1684 mutex_exit(&seg
->s_as
->a_contents
);
1689 mutex_exit(&shmd
->shm_segfree_syncmtx
);
1695 * Do a F_SOFTUNLOCK call over the range requested.
1696 * The range must have already been F_SOFTLOCK'ed.
1698 * The calls to acquire and release the anon map lock mutex were
1699 * removed in order to avoid a deadly embrace during a DR
1700 * memory delete operation. (Eg. DR blocks while waiting for a
1701 * exclusive lock on a page that is being used for kaio; the
1702 * thread that will complete the kaio and call segspt_softunlock
1703 * blocks on the anon map lock; another thread holding the anon
1704 * map lock blocks on another page lock via the segspt_shmfault
1705 * -> page_lookup -> page_lookup_create -> page_lock_es code flow.)
1707 * The appropriateness of the removal is based upon the following:
1708 * 1. If we are holding a segment's reader lock and the page is held
1709 * shared, then the corresponding element in anonmap which points to
1710 * anon struct cannot change and there is no need to acquire the
1711 * anonymous map lock.
1712 * 2. Threads in segspt_softunlock have a reader lock on the segment
1713 * and already have the shared page lock, so we are guaranteed that
1714 * the anon map slot cannot change and therefore can call anon_get_ptr()
1715 * without grabbing the anonymous map lock.
1716 * 3. Threads that softlock a shared page break copy-on-write, even if
1717 * its a read. Thus cow faults can be ignored with respect to soft
1718 * unlocking, since the breaking of cow means that the anon slot(s) will
1722 segspt_softunlock(struct seg
*seg
, caddr_t sptseg_addr
,
1723 size_t len
, enum seg_rw rw
)
1725 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
1727 struct spt_data
*sptd
;
1733 struct anon_map
*amp
; /* XXX - for locknest */
1734 struct anon
*ap
= NULL
;
1737 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
1739 sptseg
= shmd
->shm_sptseg
;
1740 sptd
= sptseg
->s_data
;
1743 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1744 * and therefore their pages are SE_SHARED locked
1745 * for the entire life of the segment.
1747 if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP
, (void *)0)) &&
1748 ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0)) {
1749 goto softlock_decrement
;
1753 * Any thread is free to do a page_find and
1754 * page_unlock() on the pages within this seg.
1756 * We are already holding the as->a_lock on the user's
1757 * real segment, but we need to hold the a_lock on the
1758 * underlying dummy as. This is mostly to satisfy the
1759 * underlying HAT layer.
1761 AS_LOCK_ENTER(sptseg
->s_as
, RW_READER
);
1762 hat_unlock(sptseg
->s_as
->a_hat
, sptseg_addr
, len
);
1763 AS_LOCK_EXIT(sptseg
->s_as
);
1765 amp
= sptd
->spt_amp
;
1766 ASSERT(amp
!= NULL
);
1767 anon_index
= seg_page(sptseg
, sptseg_addr
);
1769 for (adr
= sptseg_addr
; adr
< sptseg_addr
+ len
; adr
+= PAGESIZE
) {
1770 ap
= anon_get_ptr(amp
->ahp
, anon_index
++);
1772 swap_xlate(ap
, &vp
, &offset
);
1775 * Use page_find() instead of page_lookup() to
1776 * find the page since we know that it has a
1779 pp
= page_find(vp
, offset
);
1780 ASSERT(ap
== anon_get_ptr(amp
->ahp
, anon_index
- 1));
1782 panic("segspt_softunlock: "
1783 "addr %p, ap %p, vp %p, off %llx",
1784 (void *)adr
, (void *)ap
, (void *)vp
, offset
);
1788 if (rw
== S_WRITE
) {
1790 } else if (rw
!= S_OTHER
) {
1797 npages
= btopr(len
);
1798 ASSERT(shmd
->shm_softlockcnt
>= npages
);
1799 atomic_add_long((ulong_t
*)(&(shmd
->shm_softlockcnt
)), -npages
);
1800 if (shmd
->shm_softlockcnt
== 0) {
1802 * All SOFTLOCKS are gone. Wakeup any waiting
1803 * unmappers so they can try again to unmap.
1804 * Check for waiters first without the mutex
1805 * held so we don't always grab the mutex on
1808 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
1809 mutex_enter(&seg
->s_as
->a_contents
);
1810 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
1811 AS_CLRUNMAPWAIT(seg
->s_as
);
1812 cv_broadcast(&seg
->s_as
->a_cv
);
1814 mutex_exit(&seg
->s_as
->a_contents
);
1820 segspt_shmattach(struct seg
**segpp
, void *argsp
)
1822 struct seg
*seg
= *segpp
;
1823 struct shm_data
*shmd_arg
= (struct shm_data
*)argsp
;
1824 struct shm_data
*shmd
;
1825 struct anon_map
*shm_amp
= shmd_arg
->shm_amp
;
1826 struct spt_data
*sptd
;
1829 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
1831 shmd
= kmem_zalloc((sizeof (*shmd
)), KM_NOSLEEP
);
1835 shmd
->shm_sptas
= shmd_arg
->shm_sptas
;
1836 shmd
->shm_amp
= shm_amp
;
1837 shmd
->shm_sptseg
= shmd_arg
->shm_sptseg
;
1839 (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT
, shm_amp
, 0,
1840 NULL
, 0, seg
->s_size
);
1842 mutex_init(&shmd
->shm_segfree_syncmtx
, NULL
, MUTEX_DEFAULT
, NULL
);
1844 seg
->s_data
= (void *)shmd
;
1845 seg
->s_ops
= &segspt_shmops
;
1846 seg
->s_szc
= shmd
->shm_sptseg
->s_szc
;
1847 sptd
= shmd
->shm_sptseg
->s_data
;
1849 if (sptd
->spt_flags
& SHM_PAGEABLE
) {
1850 if ((shmd
->shm_vpage
= kmem_zalloc(btopr(shm_amp
->size
),
1851 KM_NOSLEEP
)) == NULL
) {
1852 seg
->s_data
= (void *)NULL
;
1853 kmem_free(shmd
, (sizeof (*shmd
)));
1856 shmd
->shm_lckpgs
= 0;
1857 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP
, (void *)0)) {
1858 if ((error
= hat_share(seg
->s_as
->a_hat
, seg
->s_base
,
1859 shmd_arg
->shm_sptas
->a_hat
, SEGSPTADDR
,
1860 seg
->s_size
, seg
->s_szc
)) != 0) {
1861 kmem_free(shmd
->shm_vpage
,
1862 btopr(shm_amp
->size
));
1866 error
= hat_share(seg
->s_as
->a_hat
, seg
->s_base
,
1867 shmd_arg
->shm_sptas
->a_hat
, SEGSPTADDR
,
1868 seg
->s_size
, seg
->s_szc
);
1872 seg
->s_data
= (void *)NULL
;
1873 kmem_free(shmd
, (sizeof (*shmd
)));
1875 ANON_LOCK_ENTER(&shm_amp
->a_rwlock
, RW_WRITER
);
1877 ANON_LOCK_EXIT(&shm_amp
->a_rwlock
);
1883 segspt_shmunmap(struct seg
*seg
, caddr_t raddr
, size_t ssize
)
1885 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
1888 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
1890 if (shmd
->shm_softlockcnt
> 0) {
1899 if (ssize
!= seg
->s_size
) {
1901 cmn_err(CE_WARN
, "Incompatible ssize %lx s_size %lx\n",
1902 ssize
, seg
->s_size
);
1907 (void) segspt_shmlockop(seg
, raddr
, shmd
->shm_amp
->size
, 0, MC_UNLOCK
,
1909 hat_unshare(seg
->s_as
->a_hat
, raddr
, ssize
, seg
->s_szc
);
1917 segspt_shmfree(struct seg
*seg
)
1919 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
1920 struct anon_map
*shm_amp
= shmd
->shm_amp
;
1922 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
1924 (void) segspt_shmlockop(seg
, seg
->s_base
, shm_amp
->size
, 0,
1925 MC_UNLOCK
, NULL
, 0);
1928 * Need to increment refcnt when attaching
1929 * and decrement when detaching because of dup().
1931 ANON_LOCK_ENTER(&shm_amp
->a_rwlock
, RW_WRITER
);
1933 ANON_LOCK_EXIT(&shm_amp
->a_rwlock
);
1935 if (shmd
->shm_vpage
) { /* only for DISM */
1936 kmem_free(shmd
->shm_vpage
, btopr(shm_amp
->size
));
1937 shmd
->shm_vpage
= NULL
;
1941 * Take shm_segfree_syncmtx lock to let segspt_reclaim() finish if it's
1942 * still working with this segment without holding as lock.
1944 ASSERT(shmd
->shm_softlockcnt
== 0);
1945 mutex_enter(&shmd
->shm_segfree_syncmtx
);
1946 mutex_destroy(&shmd
->shm_segfree_syncmtx
);
1948 kmem_free(shmd
, sizeof (*shmd
));
1953 segspt_shmsetprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
1955 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
1958 * Shared page table is more than shared mapping.
1959 * Individual process sharing page tables can't change prot
1960 * because there is only one set of page tables.
1961 * This will be allowed after private page table is
1964 /* need to return correct status error? */
1970 segspt_dismfault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
1971 size_t len
, enum fault_type type
, enum seg_rw rw
)
1973 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
1974 struct seg
*sptseg
= shmd
->shm_sptseg
;
1975 struct as
*curspt
= shmd
->shm_sptas
;
1976 struct spt_data
*sptd
= sptseg
->s_data
;
1979 caddr_t segspt_addr
, shm_addr
;
1984 int dyn_ism_unmap
= hat_supported(HAT_DYNAMIC_ISM_UNMAP
, (void *)0);
1993 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
1996 * Because of the way spt is implemented
1997 * the realsize of the segment does not have to be
1998 * equal to the segment size itself. The segment size is
1999 * often in multiples of a page size larger than PAGESIZE.
2000 * The realsize is rounded up to the nearest PAGESIZE
2001 * based on what the user requested. This is a bit of
2002 * ungliness that is historical but not easily fixed
2003 * without re-designing the higher levels of ISM.
2005 ASSERT(addr
>= seg
->s_base
);
2006 if (((addr
+ len
) - seg
->s_base
) > sptd
->spt_realsize
)
2009 * For all of the following cases except F_PROT, we need to
2010 * make any necessary adjustments to addr and len
2011 * and get all of the necessary page_t's into an array called ppa[].
2013 * The code in shmat() forces base addr and len of ISM segment
2014 * to be aligned to largest page size supported. Therefore,
2015 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
2016 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
2017 * in large pagesize chunks, or else we will screw up the HAT
2018 * layer by calling hat_memload_array() with differing page sizes
2019 * over a given virtual range.
2021 pgsz
= page_get_pagesize(sptseg
->s_szc
);
2022 pgcnt
= page_get_pagecnt(sptseg
->s_szc
);
2023 shm_addr
= (caddr_t
)P2ALIGN((uintptr_t)(addr
), pgsz
);
2024 size
= P2ROUNDUP((uintptr_t)(((addr
+ len
) - shm_addr
)), pgsz
);
2025 npages
= btopr(size
);
2028 * Now we need to convert from addr in segshm to addr in segspt.
2030 an_idx
= seg_page(seg
, shm_addr
);
2031 segspt_addr
= sptseg
->s_base
+ ptob(an_idx
);
2033 ASSERT((segspt_addr
+ ptob(npages
)) <=
2034 (sptseg
->s_base
+ sptd
->spt_realsize
));
2035 ASSERT(segspt_addr
< (sptseg
->s_base
+ sptseg
->s_size
));
2041 atomic_add_long((ulong_t
*)(&(shmd
->shm_softlockcnt
)), npages
);
2043 * Fall through to the F_INVAL case to load up the hat layer
2044 * entries with the HAT_LOAD_LOCK flag.
2049 if ((rw
== S_EXEC
) && !(sptd
->spt_prot
& PROT_EXEC
))
2052 ppa
= kmem_zalloc(npages
* sizeof (page_t
*), KM_SLEEP
);
2054 err
= spt_anon_getpages(sptseg
, segspt_addr
, size
, ppa
);
2056 if (type
== F_SOFTLOCK
) {
2057 atomic_add_long((ulong_t
*)(
2058 &(shmd
->shm_softlockcnt
)), -npages
);
2062 AS_LOCK_ENTER(sptseg
->s_as
, RW_READER
);
2065 if (type
== F_SOFTLOCK
) {
2068 * Load up the translation keeping it
2069 * locked and don't unlock the page.
2071 for (; pidx
< npages
; a
+= pgsz
, pidx
+= pgcnt
) {
2072 hat_memload_array(sptseg
->s_as
->a_hat
,
2073 a
, pgsz
, &ppa
[pidx
], sptd
->spt_prot
,
2074 HAT_LOAD_LOCK
| HAT_LOAD_SHARE
);
2078 * Migrate pages marked for migration
2080 if (lgrp_optimizations())
2081 page_migrate(seg
, shm_addr
, ppa
, npages
);
2083 for (; pidx
< npages
; a
+= pgsz
, pidx
+= pgcnt
) {
2084 hat_memload_array(sptseg
->s_as
->a_hat
,
2085 a
, pgsz
, &ppa
[pidx
],
2091 * And now drop the SE_SHARED lock(s).
2093 if (dyn_ism_unmap
) {
2094 for (i
= 0; i
< npages
; i
++) {
2095 page_unlock(ppa
[i
]);
2100 if (!dyn_ism_unmap
) {
2101 if (hat_share(seg
->s_as
->a_hat
, shm_addr
,
2102 curspt
->a_hat
, segspt_addr
, ptob(npages
),
2104 panic("hat_share err in DISM fault");
2107 if (type
== F_INVAL
) {
2108 for (i
= 0; i
< npages
; i
++) {
2109 page_unlock(ppa
[i
]);
2113 AS_LOCK_EXIT(sptseg
->s_as
);
2115 kmem_free(ppa
, npages
* sizeof (page_t
*));
2121 * This is a bit ugly, we pass in the real seg pointer,
2122 * but the segspt_addr is the virtual address within the
2125 segspt_softunlock(seg
, segspt_addr
, size
, rw
);
2131 * This takes care of the unusual case where a user
2132 * allocates a stack in shared memory and a register
2133 * window overflow is written to that stack page before
2134 * it is otherwise modified.
2136 * We can get away with this because ISM segments are
2137 * always rw. Other than this unusual case, there
2138 * should be no instances of protection violations.
2144 panic("segspt_dismfault default type?");
2153 segspt_shmfault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
2154 size_t len
, enum fault_type type
, enum seg_rw rw
)
2156 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
2157 struct seg
*sptseg
= shmd
->shm_sptseg
;
2158 struct as
*curspt
= shmd
->shm_sptas
;
2159 struct spt_data
*sptd
= sptseg
->s_data
;
2162 caddr_t sptseg_addr
, shm_addr
;
2166 ulong_t anon_index
= 0;
2168 struct anon_map
*amp
; /* XXX - for locknest */
2169 struct anon
*ap
= NULL
;
2180 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
2182 if (sptd
->spt_flags
& SHM_PAGEABLE
) {
2183 return (segspt_dismfault(hat
, seg
, addr
, len
, type
, rw
));
2187 * Because of the way spt is implemented
2188 * the realsize of the segment does not have to be
2189 * equal to the segment size itself. The segment size is
2190 * often in multiples of a page size larger than PAGESIZE.
2191 * The realsize is rounded up to the nearest PAGESIZE
2192 * based on what the user requested. This is a bit of
2193 * ungliness that is historical but not easily fixed
2194 * without re-designing the higher levels of ISM.
2196 ASSERT(addr
>= seg
->s_base
);
2197 if (((addr
+ len
) - seg
->s_base
) > sptd
->spt_realsize
)
2200 * For all of the following cases except F_PROT, we need to
2201 * make any necessary adjustments to addr and len
2202 * and get all of the necessary page_t's into an array called ppa[].
2204 * The code in shmat() forces base addr and len of ISM segment
2205 * to be aligned to largest page size supported. Therefore,
2206 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
2207 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
2208 * in large pagesize chunks, or else we will screw up the HAT
2209 * layer by calling hat_memload_array() with differing page sizes
2210 * over a given virtual range.
2212 pgsz
= page_get_pagesize(sptseg
->s_szc
);
2213 pgcnt
= page_get_pagecnt(sptseg
->s_szc
);
2214 shm_addr
= (caddr_t
)P2ALIGN((uintptr_t)(addr
), pgsz
);
2215 size
= P2ROUNDUP((uintptr_t)(((addr
+ len
) - shm_addr
)), pgsz
);
2216 npages
= btopr(size
);
2219 * Now we need to convert from addr in segshm to addr in segspt.
2221 anon_index
= seg_page(seg
, shm_addr
);
2222 sptseg_addr
= sptseg
->s_base
+ ptob(anon_index
);
2225 * And now we may have to adjust npages downward if we have
2226 * exceeded the realsize of the segment or initial anon
2229 if ((sptseg_addr
+ ptob(npages
)) >
2230 (sptseg
->s_base
+ sptd
->spt_realsize
))
2231 size
= (sptseg
->s_base
+ sptd
->spt_realsize
) - sptseg_addr
;
2233 npages
= btopr(size
);
2235 ASSERT(sptseg_addr
< (sptseg
->s_base
+ sptseg
->s_size
));
2236 ASSERT((sptd
->spt_flags
& SHM_PAGEABLE
) == 0);
2243 * availrmem is decremented once during anon_swap_adjust()
2244 * and is incremented during the anon_unresv(), which is
2245 * called from shm_rm_amp() when the segment is destroyed.
2247 atomic_add_long((ulong_t
*)(&(shmd
->shm_softlockcnt
)), npages
);
2249 * Some platforms assume that ISM pages are SE_SHARED
2250 * locked for the entire life of the segment.
2252 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP
, (void *)0))
2255 * Fall through to the F_INVAL case to load up the hat layer
2256 * entries with the HAT_LOAD_LOCK flag.
2262 if ((rw
== S_EXEC
) && !(sptd
->spt_prot
& PROT_EXEC
))
2266 * Some platforms that do NOT support DYNAMIC_ISM_UNMAP
2267 * may still rely on this call to hat_share(). That
2268 * would imply that those hat's can fault on a
2269 * HAT_LOAD_LOCK translation, which would seem
2272 if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP
, (void *)0)) {
2273 if (hat_share(seg
->s_as
->a_hat
, seg
->s_base
,
2274 curspt
->a_hat
, sptseg
->s_base
,
2275 sptseg
->s_size
, sptseg
->s_szc
) != 0) {
2276 panic("hat_share error in ISM fault");
2281 ppa
= kmem_zalloc(sizeof (page_t
*) * npages
, KM_SLEEP
);
2284 * I see no need to lock the real seg,
2285 * here, because all of our work will be on the underlying
2288 * sptseg_addr and npages now account for large pages.
2290 amp
= sptd
->spt_amp
;
2291 ASSERT(amp
!= NULL
);
2292 anon_index
= seg_page(sptseg
, sptseg_addr
);
2294 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
2295 for (i
= 0; i
< npages
; i
++) {
2296 ap
= anon_get_ptr(amp
->ahp
, anon_index
++);
2298 swap_xlate(ap
, &vp
, &offset
);
2299 pp
= page_lookup(vp
, offset
, SE_SHARED
);
2303 ANON_LOCK_EXIT(&
->a_rwlock
);
2304 ASSERT(i
== npages
);
2307 * We are already holding the as->a_lock on the user's
2308 * real segment, but we need to hold the a_lock on the
2309 * underlying dummy as. This is mostly to satisfy the
2310 * underlying HAT layer.
2312 AS_LOCK_ENTER(sptseg
->s_as
, RW_READER
);
2315 if (type
== F_SOFTLOCK
) {
2317 * Load up the translation keeping it
2318 * locked and don't unlock the page.
2320 for (; pidx
< npages
; a
+= pgsz
, pidx
+= pgcnt
) {
2321 sz
= MIN(pgsz
, ptob(npages
- pidx
));
2322 hat_memload_array(sptseg
->s_as
->a_hat
, a
,
2323 sz
, &ppa
[pidx
], sptd
->spt_prot
,
2324 HAT_LOAD_LOCK
| HAT_LOAD_SHARE
);
2328 * Migrate pages marked for migration.
2330 if (lgrp_optimizations())
2331 page_migrate(seg
, shm_addr
, ppa
, npages
);
2333 for (; pidx
< npages
; a
+= pgsz
, pidx
+= pgcnt
) {
2334 sz
= MIN(pgsz
, ptob(npages
- pidx
));
2335 hat_memload_array(sptseg
->s_as
->a_hat
,
2337 sptd
->spt_prot
, HAT_LOAD_SHARE
);
2341 * And now drop the SE_SHARED lock(s).
2343 for (i
= 0; i
< npages
; i
++)
2344 page_unlock(ppa
[i
]);
2346 AS_LOCK_EXIT(sptseg
->s_as
);
2348 kmem_free(ppa
, sizeof (page_t
*) * npages
);
2353 * This is a bit ugly, we pass in the real seg pointer,
2354 * but the sptseg_addr is the virtual address within the
2357 segspt_softunlock(seg
, sptseg_addr
, ptob(npages
), rw
);
2363 * This takes care of the unusual case where a user
2364 * allocates a stack in shared memory and a register
2365 * window overflow is written to that stack page before
2366 * it is otherwise modified.
2368 * We can get away with this because ISM segments are
2369 * always rw. Other than this unusual case, there
2370 * should be no instances of protection violations.
2376 cmn_err(CE_WARN
, "segspt_shmfault default type?");
2384 segspt_shmfaulta(struct seg
*seg
, caddr_t addr
)
2391 segspt_shmkluster(struct seg
*seg
, caddr_t addr
, ssize_t delta
)
2398 segspt_shmswapout(struct seg
*seg
)
2404 * duplicate the shared page tables
2407 segspt_shmdup(struct seg
*seg
, struct seg
*newseg
)
2409 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
2410 struct anon_map
*amp
= shmd
->shm_amp
;
2411 struct shm_data
*shmd_new
;
2412 struct seg
*spt_seg
= shmd
->shm_sptseg
;
2413 struct spt_data
*sptd
= spt_seg
->s_data
;
2416 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
2418 shmd_new
= kmem_zalloc((sizeof (*shmd_new
)), KM_SLEEP
);
2419 newseg
->s_data
= (void *)shmd_new
;
2420 shmd_new
->shm_sptas
= shmd
->shm_sptas
;
2421 shmd_new
->shm_amp
= amp
;
2422 shmd_new
->shm_sptseg
= shmd
->shm_sptseg
;
2423 newseg
->s_ops
= &segspt_shmops
;
2424 newseg
->s_szc
= seg
->s_szc
;
2425 ASSERT(seg
->s_szc
== shmd
->shm_sptseg
->s_szc
);
2427 ANON_LOCK_ENTER(&
->a_rwlock
, RW_WRITER
);
2429 ANON_LOCK_EXIT(&
->a_rwlock
);
2431 if (sptd
->spt_flags
& SHM_PAGEABLE
) {
2432 shmd_new
->shm_vpage
= kmem_zalloc(btopr(amp
->size
), KM_SLEEP
);
2433 shmd_new
->shm_lckpgs
= 0;
2434 if (hat_supported(HAT_DYNAMIC_ISM_UNMAP
, (void *)0)) {
2435 if ((error
= hat_share(newseg
->s_as
->a_hat
,
2436 newseg
->s_base
, shmd
->shm_sptas
->a_hat
, SEGSPTADDR
,
2437 seg
->s_size
, seg
->s_szc
)) != 0) {
2438 kmem_free(shmd_new
->shm_vpage
,
2444 return (hat_share(newseg
->s_as
->a_hat
, newseg
->s_base
,
2445 shmd
->shm_sptas
->a_hat
, SEGSPTADDR
, seg
->s_size
,
2453 segspt_shmcheckprot(struct seg
*seg
, caddr_t addr
, size_t size
, uint_t prot
)
2455 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
2456 struct spt_data
*sptd
= (struct spt_data
*)shmd
->shm_sptseg
->s_data
;
2458 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
2461 * ISM segment is always rw.
2463 return (((sptd
->spt_prot
& prot
) != prot
) ? EACCES
: 0);
2467 * Return an array of locked large pages, for empty slots allocate
2468 * private zero-filled anon pages.
2477 struct spt_data
*sptd
= sptseg
->s_data
;
2478 struct anon_map
*amp
= sptd
->spt_amp
;
2479 enum seg_rw rw
= sptd
->spt_prot
;
2480 uint_t szc
= sptseg
->s_szc
;
2481 size_t pg_sz
, share_sz
= page_get_pagesize(szc
);
2483 caddr_t lp_addr
, e_sptaddr
;
2484 uint_t vpprot
, ppa_szc
= 0;
2485 struct vpage
*vpage
= NULL
;
2489 anon_sync_obj_t cookie
;
2490 int anon_locked
= 0;
2494 ASSERT(IS_P2ALIGNED(sptaddr
, share_sz
) && IS_P2ALIGNED(len
, share_sz
));
2498 lp_npgs
= btop(pg_sz
);
2500 e_sptaddr
= sptaddr
+ len
;
2501 an_idx
= seg_page(sptseg
, sptaddr
);
2504 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
2506 amp_pgs
= page_get_pagecnt(amp
->a_szc
);
2510 for (; lp_addr
< e_sptaddr
;
2511 an_idx
+= lp_npgs
, lp_addr
+= pg_sz
, ppa_idx
+= lp_npgs
) {
2514 * If we're currently locked, and we get to a new
2515 * page, unlock our current anon chunk.
2517 if (anon_locked
&& P2PHASE(an_idx
, amp_pgs
) == 0) {
2518 anon_array_exit(&cookie
);
2522 anon_array_enter(amp
, an_idx
, &cookie
);
2525 ppa_szc
= (uint_t
)-1;
2526 ierr
= anon_map_getpages(amp
, an_idx
, szc
, sptseg
,
2527 lp_addr
, sptd
->spt_prot
, &vpprot
, &ppa
[ppa_idx
],
2528 &ppa_szc
, vpage
, rw
, 0, segvn_anypgsz
, 0, kcred
);
2532 err
= FC_MAKE_ERR(ierr
);
2538 if (lp_addr
== e_sptaddr
) {
2541 ASSERT(lp_addr
< e_sptaddr
);
2544 * ierr == -1 means we failed to allocate a large page.
2545 * so do a size down operation.
2547 * ierr == -2 means some other process that privately shares
2548 * pages with this process has allocated a larger page and we
2549 * need to retry with larger pages. So do a size up
2550 * operation. This relies on the fact that large pages are
2551 * never partially shared i.e. if we share any constituent
2552 * page of a large page with another process we must share the
2553 * entire large page. Note this cannot happen for SOFTLOCK
2554 * case, unless current address (lpaddr) is at the beginning
2555 * of the next page size boundary because the other process
2556 * couldn't have relocated locked pages.
2558 ASSERT(ierr
== -1 || ierr
== -2);
2559 if (segvn_anypgsz
) {
2560 ASSERT(ierr
== -2 || szc
!= 0);
2561 ASSERT(ierr
== -1 || szc
< sptseg
->s_szc
);
2562 szc
= (ierr
== -1) ? szc
- 1 : szc
+ 1;
2565 * For faults and segvn_anypgsz == 0
2566 * we need to be careful not to loop forever
2567 * if existing page is found with szc other
2568 * than 0 or seg->s_szc. This could be due
2569 * to page relocations on behalf of DR or
2570 * more likely large page creation. For this
2571 * case simply re-size to existing page's szc
2572 * if returned by anon_map_getpages().
2574 if (ppa_szc
== (uint_t
)-1) {
2575 szc
= (ierr
== -1) ? 0 : sptseg
->s_szc
;
2577 ASSERT(ppa_szc
<= sptseg
->s_szc
);
2578 ASSERT(ierr
== -2 || ppa_szc
< szc
);
2579 ASSERT(ierr
== -1 || ppa_szc
> szc
);
2583 pg_sz
= page_get_pagesize(szc
);
2584 lp_npgs
= btop(pg_sz
);
2585 ASSERT(IS_P2ALIGNED(lp_addr
, pg_sz
));
2588 anon_array_exit(&cookie
);
2590 ANON_LOCK_EXIT(&
->a_rwlock
);
2595 anon_array_exit(&cookie
);
2597 ANON_LOCK_EXIT(&
->a_rwlock
);
2598 for (j
= 0; j
< ppa_idx
; j
++)
2599 page_unlock(ppa
[j
]);
2604 * count the number of bytes in a set of spt pages that are currently not
2608 spt_unlockedbytes(pgcnt_t npages
, page_t
**ppa
)
2611 rctl_qty_t unlocked
= 0;
2613 for (i
= 0; i
< npages
; i
++) {
2614 if (ppa
[i
]->p_lckcnt
== 0)
2615 unlocked
+= PAGESIZE
;
2620 extern u_longlong_t
randtick(void);
2621 /* number of locks to reserve/skip by spt_lockpages() and spt_unlockpages() */
2622 #define NLCK (NCPU_P2)
2623 /* Random number with a range [0, n-1], n must be power of two */
2624 #define RAND_P2(n) \
2625 ((((long)curthread >> PTR24_LSB) ^ (long)randtick()) & ((n) - 1))
2628 spt_lockpages(struct seg
*seg
, pgcnt_t anon_index
, pgcnt_t npages
,
2629 page_t
**ppa
, ulong_t
*lockmap
, size_t pos
,
2632 struct shm_data
*shmd
= seg
->s_data
;
2633 struct spt_data
*sptd
= shmd
->shm_sptseg
->s_data
;
2638 int use_reserved
= 1;
2640 /* return the number of bytes actually locked */
2644 * To avoid contention on freemem_lock, availrmem and pages_locked
2645 * global counters are updated only every nlck locked pages instead of
2646 * every time. Reserve nlck locks up front and deduct from this
2647 * reservation for each page that requires a lock. When the reservation
2648 * is consumed, reserve again. nlck is randomized, so the competing
2649 * threads do not fall into a cyclic lock contention pattern. When
2650 * memory is low, the lock ahead is disabled, and instead page_pp_lock()
2651 * is used to lock pages.
2653 for (i
= 0; i
< npages
; anon_index
++, pos
++, i
++) {
2654 if (nlck
== 0 && use_reserved
== 1) {
2655 nlck
= NLCK
+ RAND_P2(NLCK
);
2656 /* if fewer loops left, decrease nlck */
2657 nlck
= MIN(nlck
, npages
- i
);
2659 * Reserve nlck locks up front and deduct from this
2660 * reservation for each page that requires a lock. When
2661 * the reservation is consumed, reserve again.
2663 mutex_enter(&freemem_lock
);
2664 if ((availrmem
- nlck
) < pages_pp_maximum
) {
2665 /* Do not do advance memory reserves */
2669 pages_locked
+= nlck
;
2671 mutex_exit(&freemem_lock
);
2673 if (!(shmd
->shm_vpage
[anon_index
] & DISM_PG_LOCKED
)) {
2674 if (sptd
->spt_ppa_lckcnt
[anon_index
] <
2675 (ushort_t
)DISM_LOCK_MAX
) {
2676 if (++sptd
->spt_ppa_lckcnt
[anon_index
] ==
2677 (ushort_t
)DISM_LOCK_MAX
) {
2679 "DISM page lock limit "
2680 "reached on DISM offset 0x%lx\n",
2681 anon_index
<< PAGESHIFT
);
2683 kernel
= (sptd
->spt_ppa
&&
2684 sptd
->spt_ppa
[anon_index
]);
2685 if (!page_pp_lock(ppa
[i
], 0, kernel
||
2687 sptd
->spt_ppa_lckcnt
[anon_index
]--;
2691 /* if this is a newly locked page, count it */
2692 if (ppa
[i
]->p_lckcnt
== 1) {
2693 if (kernel
== 0 && use_reserved
== 1)
2695 *locked
+= PAGESIZE
;
2698 shmd
->shm_vpage
[anon_index
] |= DISM_PG_LOCKED
;
2699 if (lockmap
!= NULL
)
2700 BT_SET(lockmap
, pos
);
2704 /* Return unused lock reservation */
2705 if (nlck
!= 0 && use_reserved
== 1) {
2706 mutex_enter(&freemem_lock
);
2708 pages_locked
-= nlck
;
2709 mutex_exit(&freemem_lock
);
2716 spt_unlockpages(struct seg
*seg
, pgcnt_t anon_index
, pgcnt_t npages
,
2717 rctl_qty_t
*unlocked
)
2719 struct shm_data
*shmd
= seg
->s_data
;
2720 struct spt_data
*sptd
= shmd
->shm_sptseg
->s_data
;
2721 struct anon_map
*amp
= sptd
->spt_amp
;
2727 anon_sync_obj_t cookie
;
2730 pgcnt_t nlck_limit
= NLCK
;
2732 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
2733 for (i
= 0; i
< npages
; i
++, anon_index
++) {
2734 if (shmd
->shm_vpage
[anon_index
] & DISM_PG_LOCKED
) {
2735 anon_array_enter(amp
, anon_index
, &cookie
);
2736 ap
= anon_get_ptr(amp
->ahp
, anon_index
);
2739 swap_xlate(ap
, &vp
, &off
);
2740 anon_array_exit(&cookie
);
2741 pp
= page_lookup(vp
, off
, SE_SHARED
);
2744 * availrmem is decremented only for pages which are not
2745 * in seg pcache, for pages in seg pcache availrmem was
2746 * decremented in _dismpagelock()
2748 kernel
= (sptd
->spt_ppa
&& sptd
->spt_ppa
[anon_index
]);
2749 ASSERT(pp
->p_lckcnt
> 0);
2752 * lock page but do not change availrmem, we do it
2753 * ourselves every nlck loops.
2755 page_pp_unlock(pp
, 0, 1);
2756 if (pp
->p_lckcnt
== 0) {
2759 *unlocked
+= PAGESIZE
;
2762 shmd
->shm_vpage
[anon_index
] &= ~DISM_PG_LOCKED
;
2763 sptd
->spt_ppa_lckcnt
[anon_index
]--;
2768 * To reduce freemem_lock contention, do not update availrmem
2769 * until at least NLCK pages have been unlocked.
2770 * 1. No need to update if nlck is zero
2771 * 2. Always update if the last iteration
2773 if (nlck
> 0 && (nlck
== nlck_limit
|| i
== npages
- 1)) {
2774 mutex_enter(&freemem_lock
);
2776 pages_locked
-= nlck
;
2777 mutex_exit(&freemem_lock
);
2779 nlck_limit
= NLCK
+ RAND_P2(NLCK
);
2782 ANON_LOCK_EXIT(&
->a_rwlock
);
2789 segspt_shmlockop(struct seg
*seg
, caddr_t addr
, size_t len
,
2790 int attr
, int op
, ulong_t
*lockmap
, size_t pos
)
2792 struct shm_data
*shmd
= seg
->s_data
;
2793 struct seg
*sptseg
= shmd
->shm_sptseg
;
2794 struct spt_data
*sptd
= sptseg
->s_data
;
2795 struct kshmid
*sp
= sptd
->spt_amp
->a_sp
;
2796 pgcnt_t npages
, a_npages
;
2798 pgcnt_t an_idx
, a_an_idx
, ppa_idx
;
2799 caddr_t spt_addr
, a_addr
; /* spt and aligned address */
2800 size_t a_len
; /* aligned len */
2804 rctl_qty_t unlocked
= 0;
2805 rctl_qty_t locked
= 0;
2806 struct proc
*p
= curproc
;
2809 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
2812 if ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0) {
2816 addr
= (caddr_t
)((uintptr_t)addr
& (uintptr_t)PAGEMASK
);
2817 an_idx
= seg_page(seg
, addr
);
2818 npages
= btopr(len
);
2820 if (an_idx
+ npages
> btopr(shmd
->shm_amp
->size
)) {
2825 * A shm's project never changes, so no lock needed.
2826 * The shm has a hold on the project, so it will not go away.
2827 * Since we have a mapping to shm within this zone, we know
2828 * that the zone will not go away.
2830 proj
= sp
->shm_perm
.ipc_proj
;
2832 if (op
== MC_LOCK
) {
2835 * Need to align addr and size request if they are not
2836 * aligned so we can always allocate large page(s) however
2837 * we only lock what was requested in initial request.
2839 share_sz
= page_get_pagesize(sptseg
->s_szc
);
2840 a_addr
= (caddr_t
)P2ALIGN((uintptr_t)(addr
), share_sz
);
2841 a_len
= P2ROUNDUP((uintptr_t)(((addr
+ len
) - a_addr
)),
2843 a_npages
= btop(a_len
);
2844 a_an_idx
= seg_page(seg
, a_addr
);
2845 spt_addr
= sptseg
->s_base
+ ptob(a_an_idx
);
2846 ppa_idx
= an_idx
- a_an_idx
;
2848 if ((ppa
= kmem_zalloc(((sizeof (page_t
*)) * a_npages
),
2849 KM_NOSLEEP
)) == NULL
) {
2854 * Don't cache any new pages for IO and
2855 * flush any cached pages.
2857 mutex_enter(&sptd
->spt_lock
);
2858 if (sptd
->spt_ppa
!= NULL
)
2859 sptd
->spt_flags
|= DISM_PPA_CHANGED
;
2861 sts
= spt_anon_getpages(sptseg
, spt_addr
, a_len
, ppa
);
2863 mutex_exit(&sptd
->spt_lock
);
2864 kmem_free(ppa
, ((sizeof (page_t
*)) * a_npages
));
2868 mutex_enter(&sp
->shm_mlock
);
2869 /* enforce locked memory rctl */
2870 unlocked
= spt_unlockedbytes(npages
, &ppa
[ppa_idx
]);
2872 mutex_enter(&p
->p_lock
);
2873 if (rctl_incr_locked_mem(p
, proj
, unlocked
, 0)) {
2874 mutex_exit(&p
->p_lock
);
2877 mutex_exit(&p
->p_lock
);
2878 sts
= spt_lockpages(seg
, an_idx
, npages
,
2879 &ppa
[ppa_idx
], lockmap
, pos
, &locked
);
2882 * correct locked count if not all pages could be
2885 if ((unlocked
- locked
) > 0) {
2886 rctl_decr_locked_mem(NULL
, proj
,
2887 (unlocked
- locked
), 0);
2893 for (i
= 0; i
< a_npages
; i
++)
2894 page_unlock(ppa
[i
]);
2895 if (sptd
->spt_ppa
!= NULL
)
2896 sptd
->spt_flags
|= DISM_PPA_CHANGED
;
2897 mutex_exit(&sp
->shm_mlock
);
2898 mutex_exit(&sptd
->spt_lock
);
2900 kmem_free(ppa
, ((sizeof (page_t
*)) * a_npages
));
2902 } else if (op
== MC_UNLOCK
) { /* unlock */
2905 mutex_enter(&sptd
->spt_lock
);
2906 if (shmd
->shm_lckpgs
== 0) {
2907 mutex_exit(&sptd
->spt_lock
);
2911 * Don't cache new IO pages.
2913 if (sptd
->spt_ppa
!= NULL
)
2914 sptd
->spt_flags
|= DISM_PPA_CHANGED
;
2916 mutex_enter(&sp
->shm_mlock
);
2917 sts
= spt_unlockpages(seg
, an_idx
, npages
, &unlocked
);
2918 if ((ppa
= sptd
->spt_ppa
) != NULL
)
2919 sptd
->spt_flags
|= DISM_PPA_CHANGED
;
2920 mutex_exit(&sptd
->spt_lock
);
2922 rctl_decr_locked_mem(NULL
, proj
, unlocked
, 0);
2923 mutex_exit(&sp
->shm_mlock
);
2926 seg_ppurge_wiredpp(ppa
);
2933 segspt_shmgetprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t
*protv
)
2935 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
2936 struct spt_data
*sptd
= (struct spt_data
*)shmd
->shm_sptseg
->s_data
;
2937 spgcnt_t pgno
= seg_page(seg
, addr
+len
) - seg_page(seg
, addr
) + 1;
2939 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
2942 * ISM segment is always rw.
2945 *protv
++ = sptd
->spt_prot
;
2951 segspt_shmgetoffset(struct seg
*seg
, caddr_t addr
)
2953 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
2955 /* Offset does not matter in ISM memory */
2957 return ((u_offset_t
)0);
2962 segspt_shmgettype(struct seg
*seg
, caddr_t addr
)
2964 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
2965 struct spt_data
*sptd
= (struct spt_data
*)shmd
->shm_sptseg
->s_data
;
2967 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
2970 * The shared memory mapping is always MAP_SHARED, SWAP is only
2973 return (MAP_SHARED
|
2974 ((sptd
->spt_flags
& SHM_PAGEABLE
) ? 0 : MAP_NORESERVE
));
2979 segspt_shmgetvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
)
2981 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
2982 struct spt_data
*sptd
= (struct spt_data
*)shmd
->shm_sptseg
->s_data
;
2984 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
2986 *vpp
= sptd
->spt_vp
;
2991 * We need to wait for pending IO to complete to a DISM segment in order for
2992 * pages to get kicked out of the seg_pcache. 120 seconds should be more
2993 * than enough time to wait.
2995 static clock_t spt_pcache_wait
= 120;
2999 segspt_shmadvise(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t behav
)
3001 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
3002 struct spt_data
*sptd
= (struct spt_data
*)shmd
->shm_sptseg
->s_data
;
3003 struct anon_map
*amp
;
3010 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
3012 if (behav
== MADV_FREE
|| behav
== MADV_PURGE
) {
3013 if ((sptd
->spt_flags
& SHM_PAGEABLE
) == 0)
3016 amp
= sptd
->spt_amp
;
3017 pg_idx
= seg_page(seg
, addr
);
3019 mutex_enter(&sptd
->spt_lock
);
3020 if ((ppa
= sptd
->spt_ppa
) == NULL
) {
3021 mutex_exit(&sptd
->spt_lock
);
3022 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
3023 (void) anon_disclaim(amp
, pg_idx
, len
, behav
, NULL
);
3024 ANON_LOCK_EXIT(&
->a_rwlock
);
3028 sptd
->spt_flags
|= DISM_PPA_CHANGED
;
3029 gen
= sptd
->spt_gen
;
3031 mutex_exit(&sptd
->spt_lock
);
3034 * Purge all DISM cached pages
3036 seg_ppurge_wiredpp(ppa
);
3039 * Drop the AS_LOCK so that other threads can grab it
3040 * in the as_pageunlock path and hopefully get the segment
3041 * kicked out of the seg_pcache. We bump the shm_softlockcnt
3042 * to keep this segment resident.
3044 writer
= AS_WRITE_HELD(seg
->s_as
);
3045 atomic_inc_ulong((ulong_t
*)(&(shmd
->shm_softlockcnt
)));
3046 AS_LOCK_EXIT(seg
->s_as
);
3048 mutex_enter(&sptd
->spt_lock
);
3050 end_lbolt
= ddi_get_lbolt() + (hz
* spt_pcache_wait
);
3053 * Try to wait for pages to get kicked out of the seg_pcache.
3055 while (sptd
->spt_gen
== gen
&&
3056 (sptd
->spt_flags
& DISM_PPA_CHANGED
) &&
3057 ddi_get_lbolt() < end_lbolt
) {
3058 if (!cv_timedwait_sig(&sptd
->spt_cv
,
3059 &sptd
->spt_lock
, end_lbolt
)) {
3064 mutex_exit(&sptd
->spt_lock
);
3066 /* Regrab the AS_LOCK and release our hold on the segment */
3067 AS_LOCK_ENTER(seg
->s_as
, writer
? RW_WRITER
: RW_READER
);
3068 atomic_dec_ulong((ulong_t
*)(&(shmd
->shm_softlockcnt
)));
3069 if (shmd
->shm_softlockcnt
<= 0) {
3070 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
3071 mutex_enter(&seg
->s_as
->a_contents
);
3072 if (AS_ISUNMAPWAIT(seg
->s_as
)) {
3073 AS_CLRUNMAPWAIT(seg
->s_as
);
3074 cv_broadcast(&seg
->s_as
->a_cv
);
3076 mutex_exit(&seg
->s_as
->a_contents
);
3080 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
3081 (void) anon_disclaim(amp
, pg_idx
, len
, behav
, NULL
);
3082 ANON_LOCK_EXIT(&
->a_rwlock
);
3083 } else if (lgrp_optimizations() && (behav
== MADV_ACCESS_LWP
||
3084 behav
== MADV_ACCESS_MANY
|| behav
== MADV_ACCESS_DEFAULT
)) {
3087 lgrp_mem_policy_t policy
;
3091 struct seg
*sptseg
= shmd
->shm_sptseg
;
3092 caddr_t sptseg_addr
;
3095 * Align address and length to page size of underlying segment
3097 share_size
= page_get_pagesize(shmd
->shm_sptseg
->s_szc
);
3098 shm_addr
= (caddr_t
)P2ALIGN((uintptr_t)(addr
), share_size
);
3099 size
= P2ROUNDUP((uintptr_t)(((addr
+ len
) - shm_addr
)),
3102 amp
= shmd
->shm_amp
;
3103 anon_index
= seg_page(seg
, shm_addr
);
3106 * And now we may have to adjust size downward if we have
3107 * exceeded the realsize of the segment or initial anon
3110 sptseg_addr
= sptseg
->s_base
+ ptob(anon_index
);
3111 if ((sptseg_addr
+ size
) >
3112 (sptseg
->s_base
+ sptd
->spt_realsize
))
3113 size
= (sptseg
->s_base
+ sptd
->spt_realsize
) -
3117 * Set memory allocation policy for this segment
3119 policy
= lgrp_madv_to_policy(behav
, len
, MAP_SHARED
);
3120 already_set
= lgrp_shm_policy_set(policy
, amp
, anon_index
,
3124 * If random memory allocation policy set already,
3125 * don't bother reapplying it.
3127 if (already_set
&& !LGRP_MEM_POLICY_REAPPLICABLE(policy
))
3131 * Mark any existing pages in the given range for
3132 * migration, flushing the I/O page cache, and using
3133 * underlying segment to calculate anon index and get
3134 * anonmap and vnode pointer from
3136 if (shmd
->shm_softlockcnt
> 0)
3139 page_mark_migrate(seg
, shm_addr
, size
, amp
, 0, NULL
, 0, 0);
3147 segspt_shmdump(struct seg
*seg
)
3149 /* no-op for ISM segment */
3154 segspt_shmsetpgsz(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t szc
)
3160 * get a memory ID for an addr in a given segment
3163 segspt_shmgetmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
)
3165 struct shm_data
*shmd
= (struct shm_data
*)seg
->s_data
;
3168 struct anon_map
*amp
= shmd
->shm_amp
;
3169 struct spt_data
*sptd
= shmd
->shm_sptseg
->s_data
;
3170 struct seg
*sptseg
= shmd
->shm_sptseg
;
3171 anon_sync_obj_t cookie
;
3173 anon_index
= seg_page(seg
, addr
);
3175 if (addr
> (seg
->s_base
+ sptd
->spt_realsize
)) {
3179 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
3180 anon_array_enter(amp
, anon_index
, &cookie
);
3181 ap
= anon_get_ptr(amp
->ahp
, anon_index
);
3184 caddr_t spt_addr
= sptseg
->s_base
+ ptob(anon_index
);
3186 pp
= anon_zero(sptseg
, spt_addr
, &ap
, kcred
);
3188 anon_array_exit(&cookie
);
3189 ANON_LOCK_EXIT(&
->a_rwlock
);
3192 (void) anon_set_ptr(amp
->ahp
, anon_index
, ap
, ANON_SLEEP
);
3195 anon_array_exit(&cookie
);
3196 ANON_LOCK_EXIT(&
->a_rwlock
);
3197 memidp
->val
[0] = (uintptr_t)ap
;
3198 memidp
->val
[1] = (uintptr_t)addr
& PAGEOFFSET
;
3203 * Get memory allocation policy info for specified address in given segment
3205 static lgrp_mem_policy_info_t
*
3206 segspt_shmgetpolicy(struct seg
*seg
, caddr_t addr
)
3208 struct anon_map
*amp
;
3210 lgrp_mem_policy_info_t
*policy_info
;
3211 struct shm_data
*shm_data
;
3213 ASSERT(seg
!= NULL
);
3216 * Get anon_map from segshm
3218 * Assume that no lock needs to be held on anon_map, since
3219 * it should be protected by its reference count which must be
3220 * nonzero for an existing segment
3221 * Need to grab readers lock on policy tree though
3223 shm_data
= (struct shm_data
*)seg
->s_data
;
3224 if (shm_data
== NULL
)
3226 amp
= shm_data
->shm_amp
;
3227 ASSERT(amp
->refcnt
!= 0);
3232 * Assume starting anon index of 0
3234 anon_index
= seg_page(seg
, addr
);
3235 policy_info
= lgrp_shm_policy_get(amp
, anon_index
, NULL
, 0);
3237 return (policy_info
);
3242 segspt_shmcapable(struct seg
*seg
, segcapability_t capability
)