4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
35 * VM - generic vnode mapping segment.
37 * The segmap driver is used only by the kernel to get faster (than seg_vn)
38 * mappings [lower routine overhead; more persistent cache] to random
39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well.
42 #include <sys/types.h>
43 #include <sys/t_lock.h>
44 #include <sys/param.h>
45 #include <sys/sysmacros.h>
47 #include <sys/systm.h>
48 #include <sys/vnode.h>
50 #include <sys/errno.h>
53 #include <sys/vtrace.h>
54 #include <sys/cmn_err.h>
55 #include <sys/debug.h>
56 #include <sys/thread.h>
57 #include <sys/dumphdr.h>
58 #include <sys/bitmap.h>
61 #include <vm/seg_kmem.h>
65 #include <vm/seg_kpm.h>
66 #include <vm/seg_map.h>
72 * Private seg op routines.
74 static void segmap_free(struct seg
*seg
);
75 faultcode_t
segmap_fault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
76 size_t len
, enum fault_type type
, enum seg_rw rw
);
77 static faultcode_t
segmap_faulta(struct seg
*seg
, caddr_t addr
);
78 static int segmap_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
,
80 static int segmap_kluster(struct seg
*seg
, caddr_t addr
, ssize_t
);
81 static int segmap_getprot(struct seg
*seg
, caddr_t addr
, size_t len
,
83 static u_offset_t
segmap_getoffset(struct seg
*seg
, caddr_t addr
);
84 static int segmap_gettype(struct seg
*seg
, caddr_t addr
);
85 static int segmap_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
);
86 static void segmap_dump(struct seg
*seg
);
87 static int segmap_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
88 struct page
***ppp
, enum lock_type type
,
90 static void segmap_badop(void);
91 static int segmap_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
);
92 static lgrp_mem_policy_info_t
*segmap_getpolicy(struct seg
*seg
,
94 static int segmap_capable(struct seg
*seg
, segcapability_t capability
);
97 static caddr_t
segmap_pagecreate_kpm(struct seg
*, vnode_t
*, u_offset_t
,
98 struct smap
*, enum seg_rw
);
99 struct smap
*get_smap_kpm(caddr_t
, page_t
**);
101 #define SEGMAP_BADOP(t) (t(*)())segmap_badop
103 static struct seg_ops segmap_ops
= {
104 SEGMAP_BADOP(int), /* dup */
105 SEGMAP_BADOP(int), /* unmap */
109 SEGMAP_BADOP(int), /* setprot */
112 SEGMAP_BADOP(size_t), /* swapout */
113 SEGMAP_BADOP(int), /* sync */
114 SEGMAP_BADOP(size_t), /* incore */
115 SEGMAP_BADOP(int), /* lockop */
120 SEGMAP_BADOP(int), /* advise */
122 segmap_pagelock
, /* pagelock */
123 SEGMAP_BADOP(int), /* setpgsz */
124 segmap_getmemid
, /* getmemid */
125 segmap_getpolicy
, /* getpolicy */
126 segmap_capable
, /* capable */
130 * Private segmap routines.
132 static void segmap_unlock(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
133 size_t len
, enum seg_rw rw
, struct smap
*smp
);
134 static void segmap_smapadd(struct smap
*smp
);
135 static struct smap
*segmap_hashin(struct smap
*smp
, struct vnode
*vp
,
136 u_offset_t off
, int hashid
);
137 static void segmap_hashout(struct smap
*smp
);
141 * Statistics for segmap operations.
143 * No explicit locking to protect these stats.
145 struct segmapcnt segmapcnt
= {
146 { "fault", KSTAT_DATA_ULONG
},
147 { "faulta", KSTAT_DATA_ULONG
},
148 { "getmap", KSTAT_DATA_ULONG
},
149 { "get_use", KSTAT_DATA_ULONG
},
150 { "get_reclaim", KSTAT_DATA_ULONG
},
151 { "get_reuse", KSTAT_DATA_ULONG
},
152 { "get_unused", KSTAT_DATA_ULONG
},
153 { "get_nofree", KSTAT_DATA_ULONG
},
154 { "rel_async", KSTAT_DATA_ULONG
},
155 { "rel_write", KSTAT_DATA_ULONG
},
156 { "rel_free", KSTAT_DATA_ULONG
},
157 { "rel_abort", KSTAT_DATA_ULONG
},
158 { "rel_dontneed", KSTAT_DATA_ULONG
},
159 { "release", KSTAT_DATA_ULONG
},
160 { "pagecreate", KSTAT_DATA_ULONG
},
161 { "free_notfree", KSTAT_DATA_ULONG
},
162 { "free_dirty", KSTAT_DATA_ULONG
},
163 { "free", KSTAT_DATA_ULONG
},
164 { "stolen", KSTAT_DATA_ULONG
},
165 { "get_nomtx", KSTAT_DATA_ULONG
}
168 kstat_named_t
*segmapcnt_ptr
= (kstat_named_t
*)&segmapcnt
;
169 uint_t segmapcnt_ndata
= sizeof (segmapcnt
) / sizeof (kstat_named_t
);
172 * Return number of map pages in segment.
174 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT)
177 * Translate addr into smap number within segment.
179 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT)
182 * Translate addr in seg into struct smap pointer.
184 #define GET_SMAP(seg, addr) \
185 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
188 * Bit in map (16 bit bitmap).
190 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
192 static int smd_colormsk
= 0;
193 static int smd_ncolor
= 0;
194 static int smd_nfree
= 0;
195 static int smd_freemsk
= 0;
197 static int *colors_used
;
199 static struct smap
*smd_smap
;
200 static struct smaphash
*smd_hash
;
201 #ifdef SEGMAP_HASHSTATS
202 static unsigned int *smd_hash_len
;
204 static struct smfree
*smd_free
;
205 static ulong_t smd_hashmsk
= 0;
207 #define SEGMAP_MAXCOLOR 2
208 #define SEGMAP_CACHE_PAD 64
212 uint32_t scpu_free_ndx
[SEGMAP_MAXCOLOR
];
213 struct smap
*scpu_last_smap
;
215 ulong_t scpu_release
;
216 ulong_t scpu_get_reclaim
;
218 ulong_t scpu_pagecreate
;
219 ulong_t scpu_get_reuse
;
221 char scpu_pad
[SEGMAP_CACHE_PAD
];
223 static union segmap_cpu
*smd_cpu
;
226 * There are three locks in seg_map:
227 * - per freelist mutexes
228 * - per hashchain mutexes
231 * The lock ordering is to get the smap mutex to lock down the slot
232 * first then the hash lock (for hash in/out (vp, off) list) or the
233 * freelist lock to put the slot back on the free list.
235 * The hash search is done by only holding the hashchain lock, when a wanted
236 * slot is found, we drop the hashchain lock then lock the slot so there
237 * is no overlapping of hashchain and smap locks. After the slot is
238 * locked, we verify again if the slot is still what we are looking
241 * Allocation of a free slot is done by holding the freelist lock,
242 * then locking the smap slot at the head of the freelist. This is
243 * in reversed lock order so mutex_tryenter() is used.
245 * The smap lock protects all fields in smap structure except for
246 * the link fields for hash/free lists which are protected by
247 * hashchain and freelist locks.
250 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx)
252 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk])
253 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk)
255 #define SMAPMTX(smp) (&smp->sm_mtx)
257 #define SMAP_HASHFUNC(vp, off, hashid) \
259 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
260 ((off) >> MAXBSHIFT)) & smd_hashmsk); \
264 * The most frequently updated kstat counters are kept in the
265 * per cpu array to avoid hot cache blocks. The update function
266 * sums the cpu local counters to update the global counters.
271 segmap_kstat_update(kstat_t
*ksp
, int rw
)
274 ulong_t getmap
, release
, get_reclaim
;
275 ulong_t fault
, pagecreate
, get_reuse
;
277 if (rw
== KSTAT_WRITE
)
279 getmap
= release
= get_reclaim
= (ulong_t
)0;
280 fault
= pagecreate
= get_reuse
= (ulong_t
)0;
281 for (i
= 0; i
< max_ncpus
; i
++) {
282 getmap
+= smd_cpu
[i
].scpu
.scpu_getmap
;
283 release
+= smd_cpu
[i
].scpu
.scpu_release
;
284 get_reclaim
+= smd_cpu
[i
].scpu
.scpu_get_reclaim
;
285 fault
+= smd_cpu
[i
].scpu
.scpu_fault
;
286 pagecreate
+= smd_cpu
[i
].scpu
.scpu_pagecreate
;
287 get_reuse
+= smd_cpu
[i
].scpu
.scpu_get_reuse
;
289 segmapcnt
.smp_getmap
.value
.ul
= getmap
;
290 segmapcnt
.smp_release
.value
.ul
= release
;
291 segmapcnt
.smp_get_reclaim
.value
.ul
= get_reclaim
;
292 segmapcnt
.smp_fault
.value
.ul
= fault
;
293 segmapcnt
.smp_pagecreate
.value
.ul
= pagecreate
;
294 segmapcnt
.smp_get_reuse
.value
.ul
= get_reuse
;
299 segmap_create(struct seg
*seg
, void *argsp
)
301 struct segmap_data
*smd
;
304 struct segmap_crargs
*a
= (struct segmap_crargs
*)argsp
;
305 struct smaphash
*shashp
;
306 union segmap_cpu
*scpu
;
310 extern void prefetch_smap_w(void *);
311 extern int max_ncpus
;
313 ASSERT(seg
->s_as
&& RW_WRITE_HELD(&seg
->s_as
->a_lock
));
315 if (((uintptr_t)seg
->s_base
| seg
->s_size
) & MAXBOFFSET
) {
316 panic("segkmap not MAXBSIZE aligned");
320 smd
= kmem_zalloc(sizeof (struct segmap_data
), KM_SLEEP
);
322 seg
->s_data
= (void *)smd
;
323 seg
->s_ops
= &segmap_ops
;
324 smd
->smd_prot
= a
->prot
;
327 * Scale the number of smap freelists to be
328 * proportional to max_ncpus * number of virtual colors.
329 * The caller can over-ride this scaling by providing
330 * a non-zero a->nfreelist argument.
332 nfreelist
= a
->nfreelist
;
334 nfreelist
= max_ncpus
;
335 else if (nfreelist
< 0 || nfreelist
> 4 * max_ncpus
) {
336 cmn_err(CE_WARN
, "segmap_create: nfreelist out of range "
337 "%d, using %d", nfreelist
, max_ncpus
);
338 nfreelist
= max_ncpus
;
340 if (!ISP2(nfreelist
)) {
341 /* round up nfreelist to the next power of two. */
342 nfreelist
= 1 << (highbit(nfreelist
));
346 * Get the number of virtual colors - must be a power of 2.
349 smd_ncolor
= a
->shmsize
>> MAXBSHIFT
;
352 ASSERT((smd_ncolor
& (smd_ncolor
- 1)) == 0);
353 ASSERT(smd_ncolor
<= SEGMAP_MAXCOLOR
);
354 smd_colormsk
= smd_ncolor
- 1;
355 smd
->smd_nfree
= smd_nfree
= smd_ncolor
* nfreelist
;
356 smd_freemsk
= smd_nfree
- 1;
359 * Allocate and initialize the freelist headers.
360 * Note that sm_freeq[1] starts out as the release queue. This
361 * is known when the smap structures are initialized below.
363 smd_free
= smd
->smd_free
=
364 kmem_zalloc(smd_nfree
* sizeof (struct smfree
), KM_SLEEP
);
365 for (i
= 0; i
< smd_nfree
; i
++) {
366 sm
= &smd
->smd_free
[i
];
367 mutex_init(&sm
->sm_freeq
[0].smq_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
368 mutex_init(&sm
->sm_freeq
[1].smq_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
369 sm
->sm_allocq
= &sm
->sm_freeq
[0];
370 sm
->sm_releq
= &sm
->sm_freeq
[1];
374 * Allocate and initialize the smap hash chain headers.
375 * Compute hash size rounding down to the next power of two.
377 npages
= MAP_PAGES(seg
);
378 smd
->smd_npages
= npages
;
379 hashsz
= npages
/ SMAP_HASHAVELEN
;
380 hashsz
= 1 << (highbit(hashsz
)-1);
381 smd_hashmsk
= hashsz
- 1;
382 smd_hash
= smd
->smd_hash
=
383 kmem_alloc(hashsz
* sizeof (struct smaphash
), KM_SLEEP
);
384 #ifdef SEGMAP_HASHSTATS
386 kmem_zalloc(hashsz
* sizeof (unsigned int), KM_SLEEP
);
388 for (i
= 0, shashp
= smd_hash
; i
< hashsz
; i
++, shashp
++) {
389 shashp
->sh_hash_list
= NULL
;
390 mutex_init(&shashp
->sh_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
394 * Allocate and initialize the smap structures.
395 * Link all slots onto the appropriate freelist.
396 * The smap array is large enough to affect boot time
397 * on large systems, so use memory prefetching and only
398 * go through the array 1 time. Inline a optimized version
399 * of segmap_smapadd to add structures to freelists with
400 * knowledge that no locks are needed here.
402 smd_smap
= smd
->smd_sm
=
403 kmem_alloc(sizeof (struct smap
) * npages
, KM_SLEEP
);
405 for (smp
= &smd
->smd_sm
[MAP_PAGES(seg
) - 1];
406 smp
>= smd
->smd_sm
; smp
--) {
407 struct smap
*smpfreelist
;
408 struct sm_freeq
*releq
;
410 prefetch_smap_w((char *)smp
);
417 mutex_init(&smp
->sm_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
418 smp
->sm_free_ndx
= SMP2SMF_NDX(smp
);
421 releq
= sm
->sm_releq
;
423 smpfreelist
= releq
->smq_free
;
424 if (smpfreelist
== 0) {
425 releq
->smq_free
= smp
->sm_next
= smp
->sm_prev
= smp
;
427 smp
->sm_next
= smpfreelist
;
428 smp
->sm_prev
= smpfreelist
->sm_prev
;
429 smpfreelist
->sm_prev
= smp
;
430 smp
->sm_prev
->sm_next
= smp
;
431 releq
->smq_free
= smp
->sm_next
;
435 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
439 #ifdef SEGKPM_SUPPORT
441 * Due to the fragile prefetch loop no
442 * separate function is used here.
444 smp
->sm_kpme_next
= NULL
;
445 smp
->sm_kpme_prev
= NULL
;
446 smp
->sm_kpme_page
= NULL
;
451 * Allocate the per color indices that distribute allocation
452 * requests over the free lists. Each cpu will have a private
453 * rotor index to spread the allocations even across the available
454 * smap freelists. Init the scpu_last_smap field to the first
455 * smap element so there is no need to check for NULL.
458 kmem_zalloc(sizeof (union segmap_cpu
) * max_ncpus
, KM_SLEEP
);
459 for (i
= 0, scpu
= smd_cpu
; i
< max_ncpus
; i
++, scpu
++) {
461 for (j
= 0; j
< smd_ncolor
; j
++)
462 scpu
->scpu
.scpu_free_ndx
[j
] = j
;
463 scpu
->scpu
.scpu_last_smap
= smd_smap
;
470 * Keep track of which colors are used more often.
472 colors_used
= kmem_zalloc(smd_nfree
* sizeof (int), KM_SLEEP
);
482 ASSERT(seg
->s_as
&& RW_WRITE_HELD(&seg
->s_as
->a_lock
));
486 * Do a F_SOFTUNLOCK call over the range requested.
487 * The range must have already been F_SOFTLOCK'ed.
504 ASSERT(smp
->sm_refcnt
> 0);
510 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
513 * We're called only from segmap_fault and this was a
514 * NOP in case of a kpm based smap, so dangerous things
515 * must have happened in the meantime. Pages are prefaulted
516 * and locked in segmap_getmapflt and they will not be
517 * unlocked until segmap_release.
519 panic("segmap_unlock: called with kpm addr %p", (void *)addr
);
524 off
= smp
->sm_off
+ (u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
);
526 hat_unlock(hat
, addr
, P2ROUNDUP(len
, PAGESIZE
));
527 for (adr
= addr
; adr
< addr
+ len
; adr
+= PAGESIZE
, off
+= PAGESIZE
) {
531 * Use page_find() instead of page_lookup() to
532 * find the page since we know that it has
535 pp
= page_find(vp
, off
);
537 panic("segmap_unlock: page not found");
543 } else if (rw
!= S_OTHER
) {
544 TRACE_3(TR_FAC_VM
, TR_SEGMAP_FAULT
,
545 "segmap_fault:pp %p vp %p offset %llx", pp
, vp
, off
);
550 * Clear bitmap, if the bit corresponding to "off" is set,
551 * since the page and translation are being unlocked.
553 bitmask
= SMAP_BIT_MASK((off
- smp
->sm_off
) >> PAGESHIFT
);
556 * Large Files: Following assertion is to verify
557 * the correctness of the cast to (int) above.
559 ASSERT((u_offset_t
)(off
- smp
->sm_off
) <= INT_MAX
);
562 if (smp
->sm_bitmap
& bitmask
) {
563 smp
->sm_bitmap
&= ~bitmask
;
571 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */
574 * This routine is called via a machine specific fault handling
575 * routine. It is also called by software routines wishing to
576 * lock or unlock a range of addresses.
578 * Note that this routine expects a page-aligned "addr".
586 enum fault_type type
,
589 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
594 page_t
*pl
[MAXPPB
+ 1];
602 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
607 * Pages are successfully prefaulted and locked in
608 * segmap_getmapflt and can't be unlocked until
609 * segmap_release. No hat mappings have to be locked
610 * and they also can't be unlocked as long as the
611 * caller owns an active kpm addr.
614 if (type
!= F_SOFTUNLOCK
)
618 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
619 panic("segmap_fault: smap not found "
620 "for addr %p", (void *)addr
);
626 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
628 cmn_err(CE_WARN
, "segmap_fault: newpage? smp %p",
632 if (type
!= F_SOFTUNLOCK
) {
639 sm_off
= smp
->sm_off
;
642 return (FC_MAKE_ERR(EIO
));
644 ASSERT(smp
->sm_refcnt
> 0);
646 addroff
= (u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
);
647 if (addroff
+ len
> MAXBSIZE
)
648 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
649 (void *)(addr
+ len
));
651 off
= sm_off
+ addroff
;
653 pp
= page_find(vp
, off
);
656 panic("segmap_fault: softunlock page not found");
659 * Set ref bit also here in case of S_OTHER to avoid the
660 * overhead of supporting other cases than F_SOFTUNLOCK
661 * with segkpm. We can do this because the underlying
662 * pages are locked anyway.
667 TRACE_3(TR_FAC_VM
, TR_SEGMAP_FAULT
,
668 "segmap_fault:pp %p vp %p offset %llx",
676 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_fault
++;
677 smp
= GET_SMAP(seg
, addr
);
679 sm_off
= smp
->sm_off
;
682 return (FC_MAKE_ERR(EIO
));
684 ASSERT(smp
->sm_refcnt
> 0);
686 addroff
= (u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
);
687 if (addroff
+ len
> MAXBSIZE
) {
688 panic("segmap_fault: endaddr %p "
689 "exceeds MAXBSIZE chunk", (void *)(addr
+ len
));
692 off
= sm_off
+ addroff
;
695 * First handle the easy stuff
697 if (type
== F_SOFTUNLOCK
) {
698 segmap_unlock(hat
, seg
, addr
, len
, rw
, smp
);
702 TRACE_3(TR_FAC_VM
, TR_SEGMAP_GETPAGE
,
703 "segmap_getpage:seg %p addr %p vp %p", seg
, addr
, vp
);
704 err
= VOP_GETPAGE(vp
, (offset_t
)off
, len
, &prot
, pl
, MAXBSIZE
,
705 seg
, addr
, rw
, CRED(), NULL
);
708 return (FC_MAKE_ERR(err
));
710 prot
&= smd
->smd_prot
;
713 * Handle all pages returned in the pl[] array.
714 * This loop is coded on the assumption that if
715 * there was no error from the VOP_GETPAGE routine,
716 * that the page list returned will contain all the
717 * needed pages for the vp from [off..off + len].
720 while ((pp
= *ppp
++) != NULL
) {
722 ASSERT(pp
->p_vnode
== vp
);
726 * Verify that the pages returned are within the range
727 * of this segmap region. Note that it is theoretically
728 * possible for pages outside this range to be returned,
729 * but it is not very likely. If we cannot use the
730 * page here, just release it and go on to the next one.
732 if (pp
->p_offset
< sm_off
||
733 pp
->p_offset
>= sm_off
+ MAXBSIZE
) {
734 (void) page_release(pp
, 1);
738 ASSERT(hat
== kas
.a_hat
);
740 adr
= addr
+ (poff
- off
);
741 if (adr
>= addr
&& adr
< addr
+ len
) {
743 TRACE_3(TR_FAC_VM
, TR_SEGMAP_FAULT
,
744 "segmap_fault:pp %p vp %p offset %llx",
746 if (type
== F_SOFTLOCK
)
747 hat_flag
= HAT_LOAD_LOCK
;
751 * Deal with VMODSORT pages here. If we know this is a write
752 * do the setmod now and allow write protection.
753 * As long as it's modified or not S_OTHER, remove write
754 * protection. With S_OTHER it's up to the FS to deal with this.
756 if (IS_VMODSORT(vp
)) {
759 else if (rw
!= S_OTHER
&& !hat_ismod(pp
))
763 hat_memload(hat
, adr
, pp
, prot
, hat_flag
);
764 if (hat_flag
!= HAT_LOAD_LOCK
)
771 * This routine is used to start I/O on pages asynchronously.
774 segmap_faulta(struct seg
*seg
, caddr_t addr
)
781 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
786 * Pages are successfully prefaulted and locked in
787 * segmap_getmapflt and can't be unlocked until
788 * segmap_release. No hat mappings have to be locked
789 * and they also can't be unlocked as long as the
790 * caller owns an active kpm addr.
793 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
794 panic("segmap_faulta: smap not found "
795 "for addr %p", (void *)addr
);
800 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
803 cmn_err(CE_WARN
, "segmap_faulta: newpage? smp %p",
809 segmapcnt
.smp_faulta
.value
.ul
++;
810 smp
= GET_SMAP(seg
, addr
);
812 ASSERT(smp
->sm_refcnt
> 0);
818 cmn_err(CE_WARN
, "segmap_faulta - no vp");
819 return (FC_MAKE_ERR(EIO
));
822 TRACE_3(TR_FAC_VM
, TR_SEGMAP_GETPAGE
,
823 "segmap_getpage:seg %p addr %p vp %p", seg
, addr
, vp
);
825 err
= VOP_GETPAGE(vp
, (offset_t
)(off
+ ((offset_t
)((uintptr_t)addr
826 & MAXBOFFSET
))), PAGESIZE
, (uint_t
*)NULL
, (page_t
**)NULL
, 0,
827 seg
, addr
, S_READ
, CRED(), NULL
);
830 return (FC_MAKE_ERR(err
));
836 segmap_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
838 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
840 ASSERT(seg
->s_as
&& RW_LOCK_HELD(&seg
->s_as
->a_lock
));
843 * Need not acquire the segment lock since
844 * "smd_prot" is a read-only field.
846 return (((smd
->smd_prot
& prot
) != prot
) ? EACCES
: 0);
850 segmap_getprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t
*protv
)
852 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
853 size_t pgno
= seg_page(seg
, addr
+ len
) - seg_page(seg
, addr
) + 1;
855 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
, &seg
->s_as
->a_lock
));
859 protv
[--pgno
] = smd
->smd_prot
;
866 segmap_getoffset(struct seg
*seg
, caddr_t addr
)
868 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
870 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
872 return ((u_offset_t
)smd
->smd_sm
->sm_off
+ (addr
- seg
->s_base
));
877 segmap_gettype(struct seg
*seg
, caddr_t addr
)
879 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
886 segmap_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
)
888 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
890 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
892 /* XXX - This doesn't make any sense */
893 *vpp
= smd
->smd_sm
->sm_vp
;
898 * Check to see if it makes sense to do kluster/read ahead to
899 * addr + delta relative to the mapping at addr. We assume here
900 * that delta is a signed PAGESIZE'd multiple (which can be negative).
902 * For segmap we always "approve" of this action from our standpoint.
906 segmap_kluster(struct seg
*seg
, caddr_t addr
, ssize_t delta
)
914 panic("segmap_badop");
919 * Special private segmap operations
923 * Add smap to the appropriate free list.
926 segmap_smapadd(struct smap
*smp
)
929 struct smap
*smpfreelist
;
930 struct sm_freeq
*releq
;
932 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
934 if (smp
->sm_refcnt
!= 0) {
935 panic("segmap_smapadd");
939 sm
= &smd_free
[smp
->sm_free_ndx
];
941 * Add to the tail of the release queue
942 * Note that sm_releq and sm_allocq could toggle
943 * before we get the lock. This does not affect
944 * correctness as the 2 queues are only maintained
945 * to reduce lock pressure.
947 releq
= sm
->sm_releq
;
948 if (releq
== &sm
->sm_freeq
[0])
949 smp
->sm_flags
|= SM_QNDX_ZERO
;
951 smp
->sm_flags
&= ~SM_QNDX_ZERO
;
952 mutex_enter(&releq
->smq_mtx
);
953 smpfreelist
= releq
->smq_free
;
954 if (smpfreelist
== 0) {
957 releq
->smq_free
= smp
->sm_next
= smp
->sm_prev
= smp
;
959 * Both queue mutexes held to set sm_want;
960 * snapshot the value before dropping releq mutex.
961 * If sm_want appears after the releq mutex is dropped,
962 * then the smap just freed is already gone.
965 mutex_exit(&releq
->smq_mtx
);
967 * See if there was a waiter before dropping the releq mutex
968 * then recheck after obtaining sm_freeq[0] mutex as
969 * the another thread may have already signaled.
972 mutex_enter(&sm
->sm_freeq
[0].smq_mtx
);
974 cv_signal(&sm
->sm_free_cv
);
975 mutex_exit(&sm
->sm_freeq
[0].smq_mtx
);
978 smp
->sm_next
= smpfreelist
;
979 smp
->sm_prev
= smpfreelist
->sm_prev
;
980 smpfreelist
->sm_prev
= smp
;
981 smp
->sm_prev
->sm_next
= smp
;
982 mutex_exit(&releq
->smq_mtx
);
988 segmap_hashin(struct smap
*smp
, struct vnode
*vp
, u_offset_t off
, int hashid
)
994 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
995 ASSERT(smp
->sm_vp
== NULL
);
996 ASSERT(smp
->sm_hash
== NULL
);
997 ASSERT(smp
->sm_prev
== NULL
);
998 ASSERT(smp
->sm_next
== NULL
);
999 ASSERT(hashid
>= 0 && hashid
<= smd_hashmsk
);
1001 hmtx
= SHASHMTX(hashid
);
1005 * First we need to verify that no one has created a smp
1006 * with (vp,off) as its tag before we us.
1008 for (tmp
= smd_hash
[hashid
].sh_hash_list
;
1009 tmp
!= NULL
; tmp
= tmp
->sm_hash
)
1010 if (tmp
->sm_vp
== vp
&& tmp
->sm_off
== off
)
1015 * No one created one yet.
1017 * Funniness here - we don't increment the ref count on the
1018 * vnode * even though we have another pointer to it here.
1019 * The reason for this is that we don't want the fact that
1020 * a seg_map entry somewhere refers to a vnode to prevent the
1021 * vnode * itself from going away. This is because this
1022 * reference to the vnode is a "soft one". In the case where
1023 * a mapping is being used by a rdwr [or directory routine?]
1024 * there already has to be a non-zero ref count on the vnode.
1025 * In the case where the vp has been freed and the the smap
1026 * structure is on the free list, there are no pages in memory
1027 * that can refer to the vnode. Thus even if we reuse the same
1028 * vnode/smap structure for a vnode which has the same
1029 * address but represents a different object, we are ok.
1034 hpp
= &smd_hash
[hashid
].sh_hash_list
;
1035 smp
->sm_hash
= *hpp
;
1037 #ifdef SEGMAP_HASHSTATS
1038 smd_hash_len
[hashid
]++;
1047 segmap_hashout(struct smap
*smp
)
1049 struct smap
**hpp
, *hp
;
1055 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
1060 SMAP_HASHFUNC(vp
, off
, hashid
); /* macro assigns hashid */
1061 mtx
= SHASHMTX(hashid
);
1064 hpp
= &smd_hash
[hashid
].sh_hash_list
;
1068 panic("segmap_hashout");
1076 *hpp
= smp
->sm_hash
;
1077 smp
->sm_hash
= NULL
;
1078 #ifdef SEGMAP_HASHSTATS
1079 smd_hash_len
[hashid
]--;
1084 smp
->sm_off
= (u_offset_t
)0;
1089 * Attempt to free unmodified, unmapped, and non locked segmap
1093 segmap_pagefree(struct vnode
*vp
, u_offset_t off
)
1098 for (pgoff
= off
; pgoff
< off
+ MAXBSIZE
; pgoff
+= PAGESIZE
) {
1100 if ((pp
= page_lookup_nowait(vp
, pgoff
, SE_EXCL
)) == NULL
)
1103 switch (page_release(pp
, 1)) {
1105 segmapcnt
.smp_free_notfree
.value
.ul
++;
1108 segmapcnt
.smp_free_dirty
.value
.ul
++;
1111 segmapcnt
.smp_free
.value
.ul
++;
1118 * Locks held on entry: smap lock
1119 * Locks held on exit : smap lock.
1123 grab_smp(struct smap
*smp
, page_t
*pp
)
1125 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
1126 ASSERT(smp
->sm_refcnt
== 0);
1128 if (smp
->sm_vp
!= (struct vnode
*)NULL
) {
1129 struct vnode
*vp
= smp
->sm_vp
;
1130 u_offset_t off
= smp
->sm_off
;
1132 * Destroy old vnode association and
1133 * unload any hardware translations to
1136 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_get_reuse
++;
1137 segmap_hashout(smp
);
1140 * This node is off freelist and hashlist,
1141 * so there is no reason to drop/reacquire sm_mtx
1142 * across calls to hat_unload.
1146 int hat_unload_needed
= 0;
1149 * unload kpm mapping
1152 vaddr
= hat_kpm_page2va(pp
, 1);
1153 hat_kpm_mapout(pp
, GET_KPME(smp
), vaddr
);
1158 * Check if we have (also) the rare case of a
1161 if (smp
->sm_flags
& SM_NOTKPM_RELEASED
) {
1162 hat_unload_needed
= 1;
1163 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
1166 if (hat_unload_needed
) {
1167 hat_unload(kas
.a_hat
, segkmap
->s_base
+
1168 ((smp
- smd_smap
) * MAXBSIZE
),
1169 MAXBSIZE
, HAT_UNLOAD
);
1173 ASSERT(smp
->sm_flags
& SM_NOTKPM_RELEASED
);
1174 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
1175 hat_unload(kas
.a_hat
, segkmap
->s_base
+
1176 ((smp
- smd_smap
) * MAXBSIZE
),
1177 MAXBSIZE
, HAT_UNLOAD
);
1179 segmap_pagefree(vp
, off
);
1183 static struct smap
*
1184 get_free_smp(int free_ndx
)
1188 struct smap
*smp
, *first
;
1189 struct sm_freeq
*allocq
, *releq
;
1192 int end_ndx
, page_locked
= 0;
1195 sm
= &smd_free
[free_ndx
];
1198 allocq
= sm
->sm_allocq
;
1199 mutex_enter(&allocq
->smq_mtx
);
1201 if ((smp
= allocq
->smq_free
) == NULL
) {
1205 * The alloc list is empty or this queue is being skipped;
1206 * first see if the allocq toggled.
1208 if (sm
->sm_allocq
!= allocq
) {
1210 mutex_exit(&allocq
->smq_mtx
);
1213 releq
= sm
->sm_releq
;
1214 if (!mutex_tryenter(&releq
->smq_mtx
)) {
1215 /* cannot get releq; a free smp may be there now */
1216 mutex_exit(&allocq
->smq_mtx
);
1219 * This loop could spin forever if this thread has
1220 * higher priority than the thread that is holding
1221 * releq->smq_mtx. In order to force the other thread
1222 * to run, we'll lock/unlock the mutex which is safe
1223 * since we just unlocked the allocq mutex.
1225 mutex_enter(&releq
->smq_mtx
);
1226 mutex_exit(&releq
->smq_mtx
);
1229 if (releq
->smq_free
== NULL
) {
1231 * This freelist is empty.
1232 * This should not happen unless clients
1233 * are failing to release the segmap
1234 * window after accessing the data.
1235 * Before resorting to sleeping, try
1236 * the next list of the same color.
1238 free_ndx
= (free_ndx
+ smd_ncolor
) & smd_freemsk
;
1239 if (free_ndx
!= end_ndx
) {
1240 mutex_exit(&releq
->smq_mtx
);
1241 mutex_exit(&allocq
->smq_mtx
);
1242 sm
= &smd_free
[free_ndx
];
1246 * Tried all freelists of the same color once,
1247 * wait on this list and hope something gets freed.
1249 segmapcnt
.smp_get_nofree
.value
.ul
++;
1251 mutex_exit(&sm
->sm_freeq
[1].smq_mtx
);
1252 cv_wait(&sm
->sm_free_cv
,
1253 &sm
->sm_freeq
[0].smq_mtx
);
1255 mutex_exit(&sm
->sm_freeq
[0].smq_mtx
);
1256 sm
= &smd_free
[free_ndx
];
1260 * Something on the rele queue; flip the alloc
1261 * and rele queues and retry.
1263 sm
->sm_allocq
= releq
;
1264 sm
->sm_releq
= allocq
;
1265 mutex_exit(&allocq
->smq_mtx
);
1266 mutex_exit(&releq
->smq_mtx
);
1275 * Fastpath the case we get the smap mutex
1280 smtx
= SMAPMTX(smp
);
1281 if (!mutex_tryenter(smtx
)) {
1283 * Another thread is trying to reclaim this slot.
1284 * Skip to the next queue or smap.
1286 if ((smp
= smp
->sm_next
) == first
) {
1293 * if kpme exists, get shared lock on the page
1295 if (segmap_kpm
&& smp
->sm_vp
!= NULL
) {
1297 kpme
= GET_KPME(smp
);
1298 pp
= kpme
->kpe_page
;
1301 if (!page_trylock(pp
, SE_SHARED
)) {
1314 if (kpme
->kpe_page
== NULL
) {
1323 * At this point, we've selected smp. Remove smp
1324 * from its freelist. If smp is the first one in
1325 * the freelist, update the head of the freelist.
1328 ASSERT(first
== allocq
->smq_free
);
1329 allocq
->smq_free
= smp
->sm_next
;
1333 * if the head of the freelist still points to smp,
1334 * then there are no more free smaps in that list.
1336 if (allocq
->smq_free
== smp
)
1340 allocq
->smq_free
= NULL
;
1342 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1343 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1345 mutex_exit(&allocq
->smq_mtx
);
1346 smp
->sm_prev
= smp
->sm_next
= NULL
;
1349 * if pp != NULL, pp must have been locked;
1350 * grab_smp() unlocks pp.
1352 ASSERT((pp
== NULL
) || PAGE_LOCKED(pp
));
1354 /* return smp locked. */
1355 ASSERT(SMAPMTX(smp
) == smtx
);
1356 ASSERT(MUTEX_HELD(smtx
));
1363 * Special public segmap operations
1367 * Create pages (without using VOP_GETPAGE) and load up translations to them.
1368 * If softlock is TRUE, then set things up so that it looks like a call
1369 * to segmap_fault with F_SOFTLOCK.
1371 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1373 * All fields in the generic segment (struct seg) are considered to be
1374 * read-only for "segmap" even though the kernel address space (kas) may
1375 * not be locked, hence no lock is needed to access them.
1378 segmap_pagecreate(struct seg
*seg
, caddr_t addr
, size_t len
, int softlock
)
1380 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
1391 ASSERT(seg
->s_as
== &kas
);
1393 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1395 * Pages are successfully prefaulted and locked in
1396 * segmap_getmapflt and can't be unlocked until
1397 * segmap_release. The SM_KPM_NEWPAGE flag is set
1398 * in segmap_pagecreate_kpm when new pages are created.
1399 * and it is returned as "newpage" indication here.
1401 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
1402 panic("segmap_pagecreate: smap not found "
1403 "for addr %p", (void *)addr
);
1407 smtx
= SMAPMTX(smp
);
1408 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
1409 smp
->sm_flags
&= ~SM_KPM_NEWPAGE
;
1415 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_pagecreate
++;
1418 addr
= (caddr_t
)((uintptr_t)addr
& (uintptr_t)PAGEMASK
);
1420 smp
= GET_SMAP(seg
, addr
);
1423 * We don't grab smp mutex here since we assume the smp
1424 * has a refcnt set already which prevents the slot from
1427 ASSERT(smp
->sm_refcnt
> 0);
1430 off
= smp
->sm_off
+ ((u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
));
1431 prot
= smd
->smd_prot
;
1433 for (; addr
< eaddr
; addr
+= PAGESIZE
, off
+= PAGESIZE
) {
1434 hat_flag
= HAT_LOAD
;
1435 pp
= page_lookup(vp
, off
, SE_SHARED
);
1439 if ((pp
= page_create_va(vp
, off
,
1440 PAGESIZE
, PG_WAIT
, seg
, addr
)) == NULL
) {
1441 panic("segmap_pagecreate: page_create failed");
1448 * Since pages created here do not contain valid
1449 * data until the caller writes into them, the
1450 * "exclusive" lock will not be dropped to prevent
1451 * other users from accessing the page. We also
1452 * have to lock the translation to prevent a fault
1453 * from occurring when the virtual address mapped by
1454 * this page is written into. This is necessary to
1455 * avoid a deadlock since we haven't dropped the
1458 bitindex
= (ushort_t
)((off
- smp
->sm_off
) >> PAGESHIFT
);
1461 * Large Files: The following assertion is to
1462 * verify the cast above.
1464 ASSERT((u_offset_t
)(off
- smp
->sm_off
) <= INT_MAX
);
1465 smtx
= SMAPMTX(smp
);
1467 smp
->sm_bitmap
|= SMAP_BIT_MASK(bitindex
);
1470 hat_flag
= HAT_LOAD_LOCK
;
1471 } else if (softlock
) {
1472 hat_flag
= HAT_LOAD_LOCK
;
1475 if (IS_VMODSORT(pp
->p_vnode
) && (prot
& PROT_WRITE
))
1478 hat_memload(kas
.a_hat
, addr
, pp
, prot
, hat_flag
);
1480 if (hat_flag
!= HAT_LOAD_LOCK
)
1483 TRACE_5(TR_FAC_VM
, TR_SEGMAP_PAGECREATE
,
1484 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
1485 seg
, addr
, pp
, vp
, off
);
1492 segmap_pageunlock(struct seg
*seg
, caddr_t addr
, size_t len
, enum seg_rw rw
)
1502 ASSERT(seg
->s_as
== &kas
);
1505 addr
= (caddr_t
)((uintptr_t)addr
& (uintptr_t)PAGEMASK
);
1507 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1509 * Pages are successfully prefaulted and locked in
1510 * segmap_getmapflt and can't be unlocked until
1511 * segmap_release, so no pages or hat mappings have
1512 * to be unlocked at this point.
1515 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
1516 panic("segmap_pageunlock: smap not found "
1517 "for addr %p", (void *)addr
);
1521 ASSERT(smp
->sm_refcnt
> 0);
1522 mutex_exit(SMAPMTX(smp
));
1527 smp
= GET_SMAP(seg
, addr
);
1528 smtx
= SMAPMTX(smp
);
1530 ASSERT(smp
->sm_refcnt
> 0);
1533 off
= smp
->sm_off
+ ((u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
));
1535 for (; addr
< eaddr
; addr
+= PAGESIZE
, off
+= PAGESIZE
) {
1536 bitmask
= SMAP_BIT_MASK((int)(off
- smp
->sm_off
) >> PAGESHIFT
);
1539 * Large Files: Following assertion is to verify
1540 * the correctness of the cast to (int) above.
1542 ASSERT((u_offset_t
)(off
- smp
->sm_off
) <= INT_MAX
);
1545 * If the bit corresponding to "off" is set,
1546 * clear this bit in the bitmap, unlock translations,
1547 * and release the "exclusive" lock on the page.
1549 if (smp
->sm_bitmap
& bitmask
) {
1551 smp
->sm_bitmap
&= ~bitmask
;
1554 hat_unlock(kas
.a_hat
, addr
, PAGESIZE
);
1557 * Use page_find() instead of page_lookup() to
1558 * find the page since we know that it has
1561 pp
= page_find(vp
, off
);
1563 panic("segmap_pageunlock: page not found");
1566 if (rw
== S_WRITE
) {
1568 } else if (rw
!= S_OTHER
) {
1578 segmap_getmap(struct seg
*seg
, struct vnode
*vp
, u_offset_t off
)
1580 return (segmap_getmapflt(seg
, vp
, off
, MAXBSIZE
, 0, S_OTHER
));
1584 * This is the magic virtual address that offset 0 of an ELF
1585 * file gets mapped to in user space. This is used to pick
1586 * the vac color on the freelist.
1588 #define ELF_OFFZERO_VA (0x10000)
1590 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1591 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1592 * The return address is always MAXBSIZE aligned.
1594 * If forcefault is nonzero and the MMU translations haven't yet been created,
1595 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1606 struct smap
*smp
, *nsmp
;
1607 extern struct vnode
*common_specvp();
1608 caddr_t baseaddr
; /* MAXBSIZE aligned */
1613 kmutex_t
*hashmtx
, *smapmtx
;
1619 page_t
*pl
[MAXPPB
+ 1];
1623 ASSERT(seg
->s_as
== &kas
);
1624 ASSERT(seg
== segkmap
);
1626 baseoff
= off
& (offset_t
)MAXBMASK
;
1627 if (off
+ len
> baseoff
+ MAXBSIZE
) {
1628 panic("segmap_getmap bad len");
1633 * If this is a block device we have to be sure to use the
1634 * "common" block device vnode for the mapping.
1636 if (vp
->v_type
== VBLK
)
1637 vp
= common_specvp(vp
);
1639 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_getmap
++;
1641 if (segmap_kpm
== 0 ||
1642 (forcefault
== SM_PAGECREATE
&& rw
!= S_WRITE
)) {
1646 SMAP_HASHFUNC(vp
, off
, hashid
); /* macro assigns hashid */
1647 hashmtx
= SHASHMTX(hashid
);
1650 mutex_enter(hashmtx
);
1651 for (smp
= smd_hash
[hashid
].sh_hash_list
;
1652 smp
!= NULL
; smp
= smp
->sm_hash
)
1653 if (smp
->sm_vp
== vp
&& smp
->sm_off
== baseoff
)
1655 mutex_exit(hashmtx
);
1660 ASSERT(vp
->v_count
!= 0);
1663 * Get smap lock and recheck its tag. The hash lock
1664 * is dropped since the hash is based on (vp, off)
1665 * and (vp, off) won't change when we have smap mtx.
1667 smapmtx
= SMAPMTX(smp
);
1668 mutex_enter(smapmtx
);
1669 if (smp
->sm_vp
!= vp
|| smp
->sm_off
!= baseoff
) {
1670 mutex_exit(smapmtx
);
1674 if (smp
->sm_refcnt
== 0) {
1676 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_get_reclaim
++;
1679 * Could still be on the free list. However, this
1680 * could also be an smp that is transitioning from
1681 * the free list when we have too much contention
1682 * for the smapmtx's. In this case, we have an
1683 * unlocked smp that is not on the free list any
1684 * longer, but still has a 0 refcnt. The only way
1685 * to be sure is to check the freelist pointers.
1686 * Since we now have the smapmtx, we are guaranteed
1687 * that the (vp, off) won't change, so we are safe
1688 * to reclaim it. get_free_smp() knows that this
1689 * can happen, and it will check the refcnt.
1692 if ((smp
->sm_next
!= NULL
)) {
1693 struct sm_freeq
*freeq
;
1695 ASSERT(smp
->sm_prev
!= NULL
);
1696 sm
= &smd_free
[smp
->sm_free_ndx
];
1698 if (smp
->sm_flags
& SM_QNDX_ZERO
)
1699 freeq
= &sm
->sm_freeq
[0];
1701 freeq
= &sm
->sm_freeq
[1];
1703 mutex_enter(&freeq
->smq_mtx
);
1704 if (freeq
->smq_free
!= smp
) {
1706 * fastpath normal case
1708 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1709 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1710 } else if (smp
== smp
->sm_next
) {
1712 * Taking the last smap on freelist
1714 freeq
->smq_free
= NULL
;
1717 * Reclaiming 1st smap on list
1719 freeq
->smq_free
= smp
->sm_next
;
1720 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1721 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1723 mutex_exit(&freeq
->smq_mtx
);
1724 smp
->sm_prev
= smp
->sm_next
= NULL
;
1726 ASSERT(smp
->sm_prev
== NULL
);
1727 segmapcnt
.smp_stolen
.value
.ul
++;
1731 segmapcnt
.smp_get_use
.value
.ul
++;
1733 smp
->sm_refcnt
++; /* another user */
1736 * We don't invoke segmap_fault via TLB miss, so we set ref
1737 * and mod bits in advance. For S_OTHER we set them in
1738 * segmap_fault F_SOFTUNLOCK.
1741 if (rw
== S_WRITE
) {
1742 smp
->sm_flags
|= SM_WRITE_DATA
;
1743 } else if (rw
== S_READ
) {
1744 smp
->sm_flags
|= SM_READ_DATA
;
1747 mutex_exit(smapmtx
);
1752 uint32_t free_ndx
, *free_ndxp
;
1753 union segmap_cpu
*scpu
;
1756 * On a PAC machine or a machine with anti-alias
1757 * hardware, smd_colormsk will be zero.
1759 * On a VAC machine- pick color by offset in the file
1760 * so we won't get VAC conflicts on elf files.
1761 * On data files, color does not matter but we
1762 * don't know what kind of file it is so we always
1763 * pick color by offset. This causes color
1764 * corresponding to file offset zero to be used more
1767 color
= (baseoff
>> MAXBSHIFT
) & smd_colormsk
;
1768 scpu
= smd_cpu
+CPU
->cpu_seqid
;
1769 free_ndxp
= &scpu
->scpu
.scpu_free_ndx
[color
];
1770 free_ndx
= (*free_ndxp
+= smd_ncolor
) & smd_freemsk
;
1772 colors_used
[free_ndx
]++;
1776 * Get a locked smp slot from the free list.
1778 smp
= get_free_smp(free_ndx
);
1779 smapmtx
= SMAPMTX(smp
);
1781 ASSERT(smp
->sm_vp
== NULL
);
1783 if ((nsmp
= segmap_hashin(smp
, vp
, baseoff
, hashid
)) != NULL
) {
1785 * Failed to hashin, there exists one now.
1786 * Return the smp we just allocated.
1788 segmap_smapadd(smp
);
1789 mutex_exit(smapmtx
);
1794 smp
->sm_refcnt
++; /* another user */
1797 * We don't invoke segmap_fault via TLB miss, so we set ref
1798 * and mod bits in advance. For S_OTHER we set them in
1799 * segmap_fault F_SOFTUNLOCK.
1802 if (rw
== S_WRITE
) {
1803 smp
->sm_flags
|= SM_WRITE_DATA
;
1804 } else if (rw
== S_READ
) {
1805 smp
->sm_flags
|= SM_READ_DATA
;
1808 mutex_exit(smapmtx
);
1814 goto use_segmap_range
;
1819 /* Lint directive required until 6746211 is fixed */
1821 ASSERT(PAGESIZE
== MAXBSIZE
);
1824 * remember the last smp faulted on this cpu.
1826 (smd_cpu
+CPU
->cpu_seqid
)->scpu
.scpu_last_smap
= smp
;
1828 if (forcefault
== SM_PAGECREATE
) {
1829 baseaddr
= segmap_pagecreate_kpm(seg
, vp
, baseoff
, smp
, rw
);
1834 (pp
= GET_KPME(smp
)->kpe_page
) != NULL
) {
1840 if (page_trylock(pp
, SE_SHARED
)) {
1841 if (PP_ISFREE(pp
) ||
1842 !(pp
->p_vnode
== vp
&&
1843 pp
->p_offset
== baseoff
)) {
1845 pp
= page_lookup(vp
, baseoff
,
1849 pp
= page_lookup(vp
, baseoff
, SE_SHARED
);
1853 ASSERT(GET_KPME(smp
)->kpe_page
== NULL
);
1857 if (rw
== S_WRITE
&&
1858 hat_page_getattr(pp
, P_MOD
| P_REF
) !=
1865 * We have the p_selock as reader, grab_smp
1866 * can't hit us, we have bumped the smap
1867 * refcnt and hat_pageunload needs the
1868 * p_selock exclusive.
1870 kpme
= GET_KPME(smp
);
1871 if (kpme
->kpe_page
== pp
) {
1872 baseaddr
= hat_kpm_page2va(pp
, 0);
1873 } else if (kpme
->kpe_page
== NULL
) {
1874 baseaddr
= hat_kpm_mapin(pp
, kpme
);
1876 panic("segmap_getmapflt: stale "
1877 "kpme page, kpme %p", (void *)kpme
);
1882 * We don't invoke segmap_fault via TLB miss,
1883 * so we set ref and mod bits in advance.
1884 * For S_OTHER and we set them in segmap_fault
1887 if (rw
== S_READ
&& !hat_isref(pp
))
1896 base
= segkpm_create_va(baseoff
);
1897 error
= VOP_GETPAGE(vp
, (offset_t
)baseoff
, len
, &prot
, pl
, MAXBSIZE
,
1898 seg
, base
, rw
, CRED(), NULL
);
1901 if (error
|| pp
== NULL
) {
1903 * Use segmap address slot and let segmap_fault deal
1904 * with the error cases. There is no error return
1907 goto use_segmap_range
;
1910 ASSERT(pl
[1] == NULL
);
1913 * When prot is not returned w/ PROT_ALL the returned pages
1914 * are not backed by fs blocks. For most of the segmap users
1915 * this is no problem, they don't write to the pages in the
1916 * same request and therefore don't rely on a following
1917 * trap driven segmap_fault. With SM_LOCKPROTO users it
1918 * is more secure to use segkmap adresses to allow
1919 * protection segmap_fault's.
1921 if (prot
!= PROT_ALL
&& forcefault
== SM_LOCKPROTO
) {
1923 * Use segmap address slot and let segmap_fault
1924 * do the error return.
1926 ASSERT(rw
!= S_WRITE
);
1927 ASSERT(PAGE_LOCKED(pp
));
1930 goto use_segmap_range
;
1934 * We have the p_selock as reader, grab_smp can't hit us, we
1935 * have bumped the smap refcnt and hat_pageunload needs the
1936 * p_selock exclusive.
1938 kpme
= GET_KPME(smp
);
1939 if (kpme
->kpe_page
== pp
) {
1940 baseaddr
= hat_kpm_page2va(pp
, 0);
1941 } else if (kpme
->kpe_page
== NULL
) {
1942 baseaddr
= hat_kpm_mapin(pp
, kpme
);
1944 panic("segmap_getmapflt: stale kpme page after "
1945 "VOP_GETPAGE, kpme %p", (void *)kpme
);
1949 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_fault
++;
1955 baseaddr
= seg
->s_base
+ ((smp
- smd_smap
) * MAXBSIZE
);
1956 TRACE_4(TR_FAC_VM
, TR_SEGMAP_GETMAP
,
1957 "segmap_getmap:seg %p addr %p vp %p offset %llx",
1958 seg
, baseaddr
, vp
, baseoff
);
1961 * Prefault the translations
1963 vaddr
= baseaddr
+ (off
- baseoff
);
1964 if (forcefault
&& (newslot
|| !hat_probe(kas
.a_hat
, vaddr
))) {
1966 caddr_t pgaddr
= (caddr_t
)((uintptr_t)vaddr
&
1967 (uintptr_t)PAGEMASK
);
1969 (void) segmap_fault(kas
.a_hat
, seg
, pgaddr
,
1970 (vaddr
+ len
- pgaddr
+ PAGESIZE
- 1) & (uintptr_t)PAGEMASK
,
1978 segmap_release(struct seg
*seg
, caddr_t addr
, uint_t flags
)
1989 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1991 if (((uintptr_t)addr
& MAXBOFFSET
) != 0) {
1992 panic("segmap_release: addr %p not "
1993 "MAXBSIZE aligned", (void *)addr
);
1997 if ((smp
= get_smap_kpm(addr
, &pp
)) == NULL
) {
1998 panic("segmap_release: smap not found "
1999 "for addr %p", (void *)addr
);
2003 TRACE_3(TR_FAC_VM
, TR_SEGMAP_RELMAP
,
2004 "segmap_relmap:seg %p addr %p smp %p",
2007 smtx
= SMAPMTX(smp
);
2010 * For compatibility reasons segmap_pagecreate_kpm sets this
2011 * flag to allow a following segmap_pagecreate to return
2012 * this as "newpage" flag. When segmap_pagecreate is not
2013 * called at all we clear it now.
2015 smp
->sm_flags
&= ~SM_KPM_NEWPAGE
;
2017 if (smp
->sm_flags
& SM_WRITE_DATA
) {
2019 } else if (smp
->sm_flags
& SM_READ_DATA
) {
2023 if (addr
< seg
->s_base
|| addr
>= seg
->s_base
+ seg
->s_size
||
2024 ((uintptr_t)addr
& MAXBOFFSET
) != 0) {
2025 panic("segmap_release: bad addr %p", (void *)addr
);
2028 smp
= GET_SMAP(seg
, addr
);
2030 TRACE_3(TR_FAC_VM
, TR_SEGMAP_RELMAP
,
2031 "segmap_relmap:seg %p addr %p smp %p",
2034 smtx
= SMAPMTX(smp
);
2036 smp
->sm_flags
|= SM_NOTKPM_RELEASED
;
2039 ASSERT(smp
->sm_refcnt
> 0);
2042 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2045 if ((flags
& ~SM_DONTNEED
) != 0) {
2046 if (flags
& SM_WRITE
)
2047 segmapcnt
.smp_rel_write
.value
.ul
++;
2048 if (flags
& SM_ASYNC
) {
2050 segmapcnt
.smp_rel_async
.value
.ul
++;
2052 if (flags
& SM_INVAL
) {
2054 segmapcnt
.smp_rel_abort
.value
.ul
++;
2056 if (flags
& SM_DESTROY
) {
2057 bflags
|= (B_INVAL
|B_TRUNC
);
2058 segmapcnt
.smp_rel_abort
.value
.ul
++;
2060 if (smp
->sm_refcnt
== 1) {
2062 * We only bother doing the FREE and DONTNEED flags
2063 * if no one else is still referencing this mapping.
2065 if (flags
& SM_FREE
) {
2067 segmapcnt
.smp_rel_free
.value
.ul
++;
2069 if (flags
& SM_DONTNEED
) {
2070 bflags
|= B_DONTNEED
;
2071 segmapcnt
.smp_rel_dontneed
.value
.ul
++;
2075 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_release
++;
2079 offset
= smp
->sm_off
;
2081 if (--smp
->sm_refcnt
== 0) {
2083 smp
->sm_flags
&= ~(SM_WRITE_DATA
| SM_READ_DATA
);
2085 if (flags
& (SM_INVAL
|SM_DESTROY
)) {
2086 segmap_hashout(smp
); /* remove map info */
2088 hat_kpm_mapout(pp
, GET_KPME(smp
), addr
);
2089 if (smp
->sm_flags
& SM_NOTKPM_RELEASED
) {
2090 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
2091 hat_unload(kas
.a_hat
, segkmap
->s_base
+
2092 ((smp
- smd_smap
) * MAXBSIZE
),
2093 MAXBSIZE
, HAT_UNLOAD
);
2098 segkpm_mapout_validkpme(GET_KPME(smp
));
2100 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
2101 hat_unload(kas
.a_hat
, addr
, MAXBSIZE
,
2105 segmap_smapadd(smp
); /* add to free list */
2113 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2116 if ((flags
& ~SM_DONTNEED
) != 0) {
2117 error
= VOP_PUTPAGE(vp
, offset
, MAXBSIZE
,
2118 bflags
, CRED(), NULL
);
2127 * Dump the pages belonging to this segmap segment.
2130 segmap_dump(struct seg
*seg
)
2132 struct segmap_data
*smd
;
2133 struct smap
*smp
, *smp_end
;
2139 smd
= (struct segmap_data
*)seg
->s_data
;
2141 for (smp
= smd
->smd_sm
, smp_end
= smp
+ smd
->smd_npages
;
2142 smp
< smp_end
; smp
++) {
2144 if (smp
->sm_refcnt
) {
2145 for (off
= 0; off
< MAXBSIZE
; off
+= PAGESIZE
) {
2149 * If pp == NULL, the page either does
2150 * not exist or is exclusively locked.
2151 * So determine if it exists before
2154 if ((pp
= page_lookup_nowait(smp
->sm_vp
,
2155 smp
->sm_off
+ off
, SE_SHARED
)))
2158 pp
= page_exists(smp
->sm_vp
,
2162 pfn
= page_pptonum(pp
);
2163 dump_addpage(seg
->s_as
,
2168 dump_timeleft
= dump_timeout
;
2177 segmap_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
2178 struct page
***ppp
, enum lock_type type
, enum seg_rw rw
)
2184 segmap_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
)
2186 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
2188 memidp
->val
[0] = (uintptr_t)smd
->smd_sm
->sm_vp
;
2189 memidp
->val
[1] = smd
->smd_sm
->sm_off
+ (uintptr_t)(addr
- seg
->s_base
);
2194 static lgrp_mem_policy_info_t
*
2195 segmap_getpolicy(struct seg
*seg
, caddr_t addr
)
2202 segmap_capable(struct seg
*seg
, segcapability_t capability
)
2208 #ifdef SEGKPM_SUPPORT
2211 * segkpm support routines
2215 segmap_pagecreate_kpm(struct seg
*seg
, vnode_t
*vp
, u_offset_t off
,
2216 struct smap
*smp
, enum seg_rw rw
)
2223 ASSERT(smp
->sm_refcnt
> 0);
2225 if ((pp
= page_lookup(vp
, off
, SE_SHARED
)) == NULL
) {
2228 base
= segkpm_create_va(off
);
2230 if ((pp
= page_create_va(vp
, off
, PAGESIZE
, PG_WAIT
,
2231 seg
, base
)) == NULL
) {
2232 panic("segmap_pagecreate_kpm: "
2233 "page_create failed");
2239 ASSERT((u_offset_t
)(off
- smp
->sm_off
) <= INT_MAX
);
2242 * Mark this here until the following segmap_pagecreate
2243 * or segmap_release.
2245 smtx
= SMAPMTX(smp
);
2247 smp
->sm_flags
|= SM_KPM_NEWPAGE
;
2251 kpme
= GET_KPME(smp
);
2252 if (!newpage
&& kpme
->kpe_page
== pp
)
2253 base
= hat_kpm_page2va(pp
, 0);
2255 base
= hat_kpm_mapin(pp
, kpme
);
2258 * FS code may decide not to call segmap_pagecreate and we
2259 * don't invoke segmap_fault via TLB miss, so we have to set
2260 * ref and mod bits in advance.
2262 if (rw
== S_WRITE
) {
2265 ASSERT(rw
== S_READ
);
2269 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_pagecreate
++;
2275 * Find the smap structure corresponding to the
2276 * KPM addr and return it locked.
2279 get_smap_kpm(caddr_t addr
, page_t
**ppp
)
2284 caddr_t baseaddr
= (caddr_t
)((uintptr_t)addr
& MAXBMASK
);
2288 union segmap_cpu
*scpu
;
2290 pp
= hat_kpm_vaddr2page(baseaddr
);
2292 ASSERT(pp
&& !PP_ISFREE(pp
));
2293 ASSERT(PAGE_LOCKED(pp
));
2294 ASSERT(((uintptr_t)pp
->p_offset
& MAXBOFFSET
) == 0);
2297 offset
= pp
->p_offset
;
2301 * Assume the last smap used on this cpu is the one needed.
2303 scpu
= smd_cpu
+CPU
->cpu_seqid
;
2304 smp
= scpu
->scpu
.scpu_last_smap
;
2305 mutex_enter(&smp
->sm_mtx
);
2306 if (smp
->sm_vp
== vp
&& smp
->sm_off
== offset
) {
2307 ASSERT(smp
->sm_refcnt
> 0);
2310 * Assumption wrong, find the smap on the hash chain.
2312 mutex_exit(&smp
->sm_mtx
);
2313 SMAP_HASHFUNC(vp
, offset
, hashid
); /* macro assigns hashid */
2314 hashmtx
= SHASHMTX(hashid
);
2316 mutex_enter(hashmtx
);
2317 smp
= smd_hash
[hashid
].sh_hash_list
;
2318 for (; smp
!= NULL
; smp
= smp
->sm_hash
) {
2319 if (smp
->sm_vp
== vp
&& smp
->sm_off
== offset
)
2322 mutex_exit(hashmtx
);
2324 mutex_enter(&smp
->sm_mtx
);
2325 ASSERT(smp
->sm_vp
== vp
&& smp
->sm_off
== offset
);
2330 *ppp
= smp
? pp
: NULL
;
2335 #else /* SEGKPM_SUPPORT */
2341 segmap_pagecreate_kpm(struct seg
*seg
, vnode_t
*vp
, u_offset_t off
,
2342 struct smap
*smp
, enum seg_rw rw
)
2349 get_smap_kpm(caddr_t addr
, page_t
**ppp
)
2354 #endif /* SEGKPM_SUPPORT */