4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
35 * VM - generic vnode mapping segment.
37 * The segmap driver is used only by the kernel to get faster (than seg_vn)
38 * mappings [lower routine overhead; more persistent cache] to random
39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well.
42 #include <sys/types.h>
43 #include <sys/t_lock.h>
44 #include <sys/param.h>
45 #include <sys/sysmacros.h>
47 #include <sys/systm.h>
48 #include <sys/vnode.h>
50 #include <sys/errno.h>
53 #include <sys/vtrace.h>
54 #include <sys/cmn_err.h>
55 #include <sys/debug.h>
56 #include <sys/thread.h>
57 #include <sys/dumphdr.h>
58 #include <sys/bitmap.h>
61 #include <vm/seg_kmem.h>
65 #include <vm/seg_kpm.h>
66 #include <vm/seg_map.h>
72 * Private seg op routines.
74 static void segmap_free(struct seg
*seg
);
75 faultcode_t
segmap_fault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
76 size_t len
, enum fault_type type
, enum seg_rw rw
);
77 static faultcode_t
segmap_faulta(struct seg
*seg
, caddr_t addr
);
78 static int segmap_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
,
80 static int segmap_kluster(struct seg
*seg
, caddr_t addr
, ssize_t
);
81 static int segmap_getprot(struct seg
*seg
, caddr_t addr
, size_t len
,
83 static u_offset_t
segmap_getoffset(struct seg
*seg
, caddr_t addr
);
84 static int segmap_gettype(struct seg
*seg
, caddr_t addr
);
85 static int segmap_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
);
86 static void segmap_dump(struct seg
*seg
);
87 static int segmap_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
88 struct page
***ppp
, enum lock_type type
,
90 static void segmap_badop(void);
91 static int segmap_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
);
92 static lgrp_mem_policy_info_t
*segmap_getpolicy(struct seg
*seg
,
94 static int segmap_capable(struct seg
*seg
, segcapability_t capability
);
97 static caddr_t
segmap_pagecreate_kpm(struct seg
*, vnode_t
*, u_offset_t
,
98 struct smap
*, enum seg_rw
);
99 struct smap
*get_smap_kpm(caddr_t
, page_t
**);
101 #define SEGMAP_BADOP(t) (t(*)())segmap_badop
103 static struct seg_ops segmap_ops
= {
104 SEGMAP_BADOP(int), /* dup */
105 SEGMAP_BADOP(int), /* unmap */
109 SEGMAP_BADOP(int), /* setprot */
112 SEGMAP_BADOP(size_t), /* swapout */
113 SEGMAP_BADOP(int), /* sync */
114 SEGMAP_BADOP(size_t), /* incore */
115 SEGMAP_BADOP(int), /* lockop */
120 SEGMAP_BADOP(int), /* advise */
122 segmap_pagelock
, /* pagelock */
123 SEGMAP_BADOP(int), /* setpgsz */
124 segmap_getmemid
, /* getmemid */
125 segmap_getpolicy
, /* getpolicy */
126 segmap_capable
, /* capable */
127 seg_inherit_notsup
/* inherit */
131 * Private segmap routines.
133 static void segmap_unlock(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
134 size_t len
, enum seg_rw rw
, struct smap
*smp
);
135 static void segmap_smapadd(struct smap
*smp
);
136 static struct smap
*segmap_hashin(struct smap
*smp
, struct vnode
*vp
,
137 u_offset_t off
, int hashid
);
138 static void segmap_hashout(struct smap
*smp
);
142 * Statistics for segmap operations.
144 * No explicit locking to protect these stats.
146 struct segmapcnt segmapcnt
= {
147 { "fault", KSTAT_DATA_ULONG
},
148 { "faulta", KSTAT_DATA_ULONG
},
149 { "getmap", KSTAT_DATA_ULONG
},
150 { "get_use", KSTAT_DATA_ULONG
},
151 { "get_reclaim", KSTAT_DATA_ULONG
},
152 { "get_reuse", KSTAT_DATA_ULONG
},
153 { "get_unused", KSTAT_DATA_ULONG
},
154 { "get_nofree", KSTAT_DATA_ULONG
},
155 { "rel_async", KSTAT_DATA_ULONG
},
156 { "rel_write", KSTAT_DATA_ULONG
},
157 { "rel_free", KSTAT_DATA_ULONG
},
158 { "rel_abort", KSTAT_DATA_ULONG
},
159 { "rel_dontneed", KSTAT_DATA_ULONG
},
160 { "release", KSTAT_DATA_ULONG
},
161 { "pagecreate", KSTAT_DATA_ULONG
},
162 { "free_notfree", KSTAT_DATA_ULONG
},
163 { "free_dirty", KSTAT_DATA_ULONG
},
164 { "free", KSTAT_DATA_ULONG
},
165 { "stolen", KSTAT_DATA_ULONG
},
166 { "get_nomtx", KSTAT_DATA_ULONG
}
169 kstat_named_t
*segmapcnt_ptr
= (kstat_named_t
*)&segmapcnt
;
170 uint_t segmapcnt_ndata
= sizeof (segmapcnt
) / sizeof (kstat_named_t
);
173 * Return number of map pages in segment.
175 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT)
178 * Translate addr into smap number within segment.
180 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT)
183 * Translate addr in seg into struct smap pointer.
185 #define GET_SMAP(seg, addr) \
186 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
189 * Bit in map (16 bit bitmap).
191 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
193 static int smd_colormsk
= 0;
194 static int smd_ncolor
= 0;
195 static int smd_nfree
= 0;
196 static int smd_freemsk
= 0;
198 static int *colors_used
;
200 static struct smap
*smd_smap
;
201 static struct smaphash
*smd_hash
;
202 #ifdef SEGMAP_HASHSTATS
203 static unsigned int *smd_hash_len
;
205 static struct smfree
*smd_free
;
206 static ulong_t smd_hashmsk
= 0;
208 #define SEGMAP_MAXCOLOR 2
209 #define SEGMAP_CACHE_PAD 64
213 uint32_t scpu_free_ndx
[SEGMAP_MAXCOLOR
];
214 struct smap
*scpu_last_smap
;
216 ulong_t scpu_release
;
217 ulong_t scpu_get_reclaim
;
219 ulong_t scpu_pagecreate
;
220 ulong_t scpu_get_reuse
;
222 char scpu_pad
[SEGMAP_CACHE_PAD
];
224 static union segmap_cpu
*smd_cpu
;
227 * There are three locks in seg_map:
228 * - per freelist mutexes
229 * - per hashchain mutexes
232 * The lock ordering is to get the smap mutex to lock down the slot
233 * first then the hash lock (for hash in/out (vp, off) list) or the
234 * freelist lock to put the slot back on the free list.
236 * The hash search is done by only holding the hashchain lock, when a wanted
237 * slot is found, we drop the hashchain lock then lock the slot so there
238 * is no overlapping of hashchain and smap locks. After the slot is
239 * locked, we verify again if the slot is still what we are looking
242 * Allocation of a free slot is done by holding the freelist lock,
243 * then locking the smap slot at the head of the freelist. This is
244 * in reversed lock order so mutex_tryenter() is used.
246 * The smap lock protects all fields in smap structure except for
247 * the link fields for hash/free lists which are protected by
248 * hashchain and freelist locks.
251 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx)
253 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk])
254 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk)
256 #define SMAPMTX(smp) (&smp->sm_mtx)
258 #define SMAP_HASHFUNC(vp, off, hashid) \
260 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
261 ((off) >> MAXBSHIFT)) & smd_hashmsk); \
265 * The most frequently updated kstat counters are kept in the
266 * per cpu array to avoid hot cache blocks. The update function
267 * sums the cpu local counters to update the global counters.
272 segmap_kstat_update(kstat_t
*ksp
, int rw
)
275 ulong_t getmap
, release
, get_reclaim
;
276 ulong_t fault
, pagecreate
, get_reuse
;
278 if (rw
== KSTAT_WRITE
)
280 getmap
= release
= get_reclaim
= (ulong_t
)0;
281 fault
= pagecreate
= get_reuse
= (ulong_t
)0;
282 for (i
= 0; i
< max_ncpus
; i
++) {
283 getmap
+= smd_cpu
[i
].scpu
.scpu_getmap
;
284 release
+= smd_cpu
[i
].scpu
.scpu_release
;
285 get_reclaim
+= smd_cpu
[i
].scpu
.scpu_get_reclaim
;
286 fault
+= smd_cpu
[i
].scpu
.scpu_fault
;
287 pagecreate
+= smd_cpu
[i
].scpu
.scpu_pagecreate
;
288 get_reuse
+= smd_cpu
[i
].scpu
.scpu_get_reuse
;
290 segmapcnt
.smp_getmap
.value
.ul
= getmap
;
291 segmapcnt
.smp_release
.value
.ul
= release
;
292 segmapcnt
.smp_get_reclaim
.value
.ul
= get_reclaim
;
293 segmapcnt
.smp_fault
.value
.ul
= fault
;
294 segmapcnt
.smp_pagecreate
.value
.ul
= pagecreate
;
295 segmapcnt
.smp_get_reuse
.value
.ul
= get_reuse
;
300 segmap_create(struct seg
*seg
, void *argsp
)
302 struct segmap_data
*smd
;
305 struct segmap_crargs
*a
= (struct segmap_crargs
*)argsp
;
306 struct smaphash
*shashp
;
307 union segmap_cpu
*scpu
;
311 extern void prefetch_smap_w(void *);
312 extern int max_ncpus
;
314 ASSERT(seg
->s_as
&& RW_WRITE_HELD(&seg
->s_as
->a_lock
));
316 if (((uintptr_t)seg
->s_base
| seg
->s_size
) & MAXBOFFSET
) {
317 panic("segkmap not MAXBSIZE aligned");
321 smd
= kmem_zalloc(sizeof (struct segmap_data
), KM_SLEEP
);
323 seg
->s_data
= (void *)smd
;
324 seg
->s_ops
= &segmap_ops
;
325 smd
->smd_prot
= a
->prot
;
328 * Scale the number of smap freelists to be
329 * proportional to max_ncpus * number of virtual colors.
330 * The caller can over-ride this scaling by providing
331 * a non-zero a->nfreelist argument.
333 nfreelist
= a
->nfreelist
;
335 nfreelist
= max_ncpus
;
336 else if (nfreelist
< 0 || nfreelist
> 4 * max_ncpus
) {
337 cmn_err(CE_WARN
, "segmap_create: nfreelist out of range "
338 "%d, using %d", nfreelist
, max_ncpus
);
339 nfreelist
= max_ncpus
;
341 if (!ISP2(nfreelist
)) {
342 /* round up nfreelist to the next power of two. */
343 nfreelist
= 1 << (highbit(nfreelist
));
347 * Get the number of virtual colors - must be a power of 2.
350 smd_ncolor
= a
->shmsize
>> MAXBSHIFT
;
353 ASSERT((smd_ncolor
& (smd_ncolor
- 1)) == 0);
354 ASSERT(smd_ncolor
<= SEGMAP_MAXCOLOR
);
355 smd_colormsk
= smd_ncolor
- 1;
356 smd
->smd_nfree
= smd_nfree
= smd_ncolor
* nfreelist
;
357 smd_freemsk
= smd_nfree
- 1;
360 * Allocate and initialize the freelist headers.
361 * Note that sm_freeq[1] starts out as the release queue. This
362 * is known when the smap structures are initialized below.
364 smd_free
= smd
->smd_free
=
365 kmem_zalloc(smd_nfree
* sizeof (struct smfree
), KM_SLEEP
);
366 for (i
= 0; i
< smd_nfree
; i
++) {
367 sm
= &smd
->smd_free
[i
];
368 mutex_init(&sm
->sm_freeq
[0].smq_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
369 mutex_init(&sm
->sm_freeq
[1].smq_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
370 sm
->sm_allocq
= &sm
->sm_freeq
[0];
371 sm
->sm_releq
= &sm
->sm_freeq
[1];
375 * Allocate and initialize the smap hash chain headers.
376 * Compute hash size rounding down to the next power of two.
378 npages
= MAP_PAGES(seg
);
379 smd
->smd_npages
= npages
;
380 hashsz
= npages
/ SMAP_HASHAVELEN
;
381 hashsz
= 1 << (highbit(hashsz
)-1);
382 smd_hashmsk
= hashsz
- 1;
383 smd_hash
= smd
->smd_hash
=
384 kmem_alloc(hashsz
* sizeof (struct smaphash
), KM_SLEEP
);
385 #ifdef SEGMAP_HASHSTATS
387 kmem_zalloc(hashsz
* sizeof (unsigned int), KM_SLEEP
);
389 for (i
= 0, shashp
= smd_hash
; i
< hashsz
; i
++, shashp
++) {
390 shashp
->sh_hash_list
= NULL
;
391 mutex_init(&shashp
->sh_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
395 * Allocate and initialize the smap structures.
396 * Link all slots onto the appropriate freelist.
397 * The smap array is large enough to affect boot time
398 * on large systems, so use memory prefetching and only
399 * go through the array 1 time. Inline a optimized version
400 * of segmap_smapadd to add structures to freelists with
401 * knowledge that no locks are needed here.
403 smd_smap
= smd
->smd_sm
=
404 kmem_alloc(sizeof (struct smap
) * npages
, KM_SLEEP
);
406 for (smp
= &smd
->smd_sm
[MAP_PAGES(seg
) - 1];
407 smp
>= smd
->smd_sm
; smp
--) {
408 struct smap
*smpfreelist
;
409 struct sm_freeq
*releq
;
411 prefetch_smap_w((char *)smp
);
418 mutex_init(&smp
->sm_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
419 smp
->sm_free_ndx
= SMP2SMF_NDX(smp
);
422 releq
= sm
->sm_releq
;
424 smpfreelist
= releq
->smq_free
;
425 if (smpfreelist
== 0) {
426 releq
->smq_free
= smp
->sm_next
= smp
->sm_prev
= smp
;
428 smp
->sm_next
= smpfreelist
;
429 smp
->sm_prev
= smpfreelist
->sm_prev
;
430 smpfreelist
->sm_prev
= smp
;
431 smp
->sm_prev
->sm_next
= smp
;
432 releq
->smq_free
= smp
->sm_next
;
436 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
440 #ifdef SEGKPM_SUPPORT
442 * Due to the fragile prefetch loop no
443 * separate function is used here.
445 smp
->sm_kpme_next
= NULL
;
446 smp
->sm_kpme_prev
= NULL
;
447 smp
->sm_kpme_page
= NULL
;
452 * Allocate the per color indices that distribute allocation
453 * requests over the free lists. Each cpu will have a private
454 * rotor index to spread the allocations even across the available
455 * smap freelists. Init the scpu_last_smap field to the first
456 * smap element so there is no need to check for NULL.
459 kmem_zalloc(sizeof (union segmap_cpu
) * max_ncpus
, KM_SLEEP
);
460 for (i
= 0, scpu
= smd_cpu
; i
< max_ncpus
; i
++, scpu
++) {
462 for (j
= 0; j
< smd_ncolor
; j
++)
463 scpu
->scpu
.scpu_free_ndx
[j
] = j
;
464 scpu
->scpu
.scpu_last_smap
= smd_smap
;
471 * Keep track of which colors are used more often.
473 colors_used
= kmem_zalloc(smd_nfree
* sizeof (int), KM_SLEEP
);
483 ASSERT(seg
->s_as
&& RW_WRITE_HELD(&seg
->s_as
->a_lock
));
487 * Do a F_SOFTUNLOCK call over the range requested.
488 * The range must have already been F_SOFTLOCK'ed.
505 ASSERT(smp
->sm_refcnt
> 0);
511 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
514 * We're called only from segmap_fault and this was a
515 * NOP in case of a kpm based smap, so dangerous things
516 * must have happened in the meantime. Pages are prefaulted
517 * and locked in segmap_getmapflt and they will not be
518 * unlocked until segmap_release.
520 panic("segmap_unlock: called with kpm addr %p", (void *)addr
);
525 off
= smp
->sm_off
+ (u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
);
527 hat_unlock(hat
, addr
, P2ROUNDUP(len
, PAGESIZE
));
528 for (adr
= addr
; adr
< addr
+ len
; adr
+= PAGESIZE
, off
+= PAGESIZE
) {
532 * Use page_find() instead of page_lookup() to
533 * find the page since we know that it has
536 pp
= page_find(vp
, off
);
538 panic("segmap_unlock: page not found");
544 } else if (rw
!= S_OTHER
) {
545 TRACE_3(TR_FAC_VM
, TR_SEGMAP_FAULT
,
546 "segmap_fault:pp %p vp %p offset %llx", pp
, vp
, off
);
551 * Clear bitmap, if the bit corresponding to "off" is set,
552 * since the page and translation are being unlocked.
554 bitmask
= SMAP_BIT_MASK((off
- smp
->sm_off
) >> PAGESHIFT
);
557 * Large Files: Following assertion is to verify
558 * the correctness of the cast to (int) above.
560 ASSERT((u_offset_t
)(off
- smp
->sm_off
) <= INT_MAX
);
563 if (smp
->sm_bitmap
& bitmask
) {
564 smp
->sm_bitmap
&= ~bitmask
;
572 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */
575 * This routine is called via a machine specific fault handling
576 * routine. It is also called by software routines wishing to
577 * lock or unlock a range of addresses.
579 * Note that this routine expects a page-aligned "addr".
587 enum fault_type type
,
590 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
595 page_t
*pl
[MAXPPB
+ 1];
603 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
608 * Pages are successfully prefaulted and locked in
609 * segmap_getmapflt and can't be unlocked until
610 * segmap_release. No hat mappings have to be locked
611 * and they also can't be unlocked as long as the
612 * caller owns an active kpm addr.
615 if (type
!= F_SOFTUNLOCK
)
619 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
620 panic("segmap_fault: smap not found "
621 "for addr %p", (void *)addr
);
627 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
629 cmn_err(CE_WARN
, "segmap_fault: newpage? smp %p",
633 if (type
!= F_SOFTUNLOCK
) {
640 sm_off
= smp
->sm_off
;
643 return (FC_MAKE_ERR(EIO
));
645 ASSERT(smp
->sm_refcnt
> 0);
647 addroff
= (u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
);
648 if (addroff
+ len
> MAXBSIZE
)
649 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
650 (void *)(addr
+ len
));
652 off
= sm_off
+ addroff
;
654 pp
= page_find(vp
, off
);
657 panic("segmap_fault: softunlock page not found");
660 * Set ref bit also here in case of S_OTHER to avoid the
661 * overhead of supporting other cases than F_SOFTUNLOCK
662 * with segkpm. We can do this because the underlying
663 * pages are locked anyway.
668 TRACE_3(TR_FAC_VM
, TR_SEGMAP_FAULT
,
669 "segmap_fault:pp %p vp %p offset %llx",
677 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_fault
++;
678 smp
= GET_SMAP(seg
, addr
);
680 sm_off
= smp
->sm_off
;
683 return (FC_MAKE_ERR(EIO
));
685 ASSERT(smp
->sm_refcnt
> 0);
687 addroff
= (u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
);
688 if (addroff
+ len
> MAXBSIZE
) {
689 panic("segmap_fault: endaddr %p "
690 "exceeds MAXBSIZE chunk", (void *)(addr
+ len
));
693 off
= sm_off
+ addroff
;
696 * First handle the easy stuff
698 if (type
== F_SOFTUNLOCK
) {
699 segmap_unlock(hat
, seg
, addr
, len
, rw
, smp
);
703 TRACE_3(TR_FAC_VM
, TR_SEGMAP_GETPAGE
,
704 "segmap_getpage:seg %p addr %p vp %p", seg
, addr
, vp
);
705 err
= VOP_GETPAGE(vp
, (offset_t
)off
, len
, &prot
, pl
, MAXBSIZE
,
706 seg
, addr
, rw
, CRED(), NULL
);
709 return (FC_MAKE_ERR(err
));
711 prot
&= smd
->smd_prot
;
714 * Handle all pages returned in the pl[] array.
715 * This loop is coded on the assumption that if
716 * there was no error from the VOP_GETPAGE routine,
717 * that the page list returned will contain all the
718 * needed pages for the vp from [off..off + len].
721 while ((pp
= *ppp
++) != NULL
) {
723 ASSERT(pp
->p_vnode
== vp
);
727 * Verify that the pages returned are within the range
728 * of this segmap region. Note that it is theoretically
729 * possible for pages outside this range to be returned,
730 * but it is not very likely. If we cannot use the
731 * page here, just release it and go on to the next one.
733 if (pp
->p_offset
< sm_off
||
734 pp
->p_offset
>= sm_off
+ MAXBSIZE
) {
735 (void) page_release(pp
, 1);
739 ASSERT(hat
== kas
.a_hat
);
741 adr
= addr
+ (poff
- off
);
742 if (adr
>= addr
&& adr
< addr
+ len
) {
744 TRACE_3(TR_FAC_VM
, TR_SEGMAP_FAULT
,
745 "segmap_fault:pp %p vp %p offset %llx",
747 if (type
== F_SOFTLOCK
)
748 hat_flag
= HAT_LOAD_LOCK
;
752 * Deal with VMODSORT pages here. If we know this is a write
753 * do the setmod now and allow write protection.
754 * As long as it's modified or not S_OTHER, remove write
755 * protection. With S_OTHER it's up to the FS to deal with this.
757 if (IS_VMODSORT(vp
)) {
760 else if (rw
!= S_OTHER
&& !hat_ismod(pp
))
764 hat_memload(hat
, adr
, pp
, prot
, hat_flag
);
765 if (hat_flag
!= HAT_LOAD_LOCK
)
772 * This routine is used to start I/O on pages asynchronously.
775 segmap_faulta(struct seg
*seg
, caddr_t addr
)
782 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
787 * Pages are successfully prefaulted and locked in
788 * segmap_getmapflt and can't be unlocked until
789 * segmap_release. No hat mappings have to be locked
790 * and they also can't be unlocked as long as the
791 * caller owns an active kpm addr.
794 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
795 panic("segmap_faulta: smap not found "
796 "for addr %p", (void *)addr
);
801 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
804 cmn_err(CE_WARN
, "segmap_faulta: newpage? smp %p",
810 segmapcnt
.smp_faulta
.value
.ul
++;
811 smp
= GET_SMAP(seg
, addr
);
813 ASSERT(smp
->sm_refcnt
> 0);
819 cmn_err(CE_WARN
, "segmap_faulta - no vp");
820 return (FC_MAKE_ERR(EIO
));
823 TRACE_3(TR_FAC_VM
, TR_SEGMAP_GETPAGE
,
824 "segmap_getpage:seg %p addr %p vp %p", seg
, addr
, vp
);
826 err
= VOP_GETPAGE(vp
, (offset_t
)(off
+ ((offset_t
)((uintptr_t)addr
827 & MAXBOFFSET
))), PAGESIZE
, (uint_t
*)NULL
, (page_t
**)NULL
, 0,
828 seg
, addr
, S_READ
, CRED(), NULL
);
831 return (FC_MAKE_ERR(err
));
837 segmap_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
839 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
841 ASSERT(seg
->s_as
&& RW_LOCK_HELD(&seg
->s_as
->a_lock
));
844 * Need not acquire the segment lock since
845 * "smd_prot" is a read-only field.
847 return (((smd
->smd_prot
& prot
) != prot
) ? EACCES
: 0);
851 segmap_getprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t
*protv
)
853 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
854 size_t pgno
= seg_page(seg
, addr
+ len
) - seg_page(seg
, addr
) + 1;
856 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
860 protv
[--pgno
] = smd
->smd_prot
;
867 segmap_getoffset(struct seg
*seg
, caddr_t addr
)
869 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
871 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
873 return ((u_offset_t
)smd
->smd_sm
->sm_off
+ (addr
- seg
->s_base
));
878 segmap_gettype(struct seg
*seg
, caddr_t addr
)
880 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
887 segmap_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
)
889 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
891 ASSERT(seg
->s_as
&& RW_READ_HELD(&seg
->s_as
->a_lock
));
893 /* XXX - This doesn't make any sense */
894 *vpp
= smd
->smd_sm
->sm_vp
;
899 * Check to see if it makes sense to do kluster/read ahead to
900 * addr + delta relative to the mapping at addr. We assume here
901 * that delta is a signed PAGESIZE'd multiple (which can be negative).
903 * For segmap we always "approve" of this action from our standpoint.
907 segmap_kluster(struct seg
*seg
, caddr_t addr
, ssize_t delta
)
915 panic("segmap_badop");
920 * Special private segmap operations
924 * Add smap to the appropriate free list.
927 segmap_smapadd(struct smap
*smp
)
930 struct smap
*smpfreelist
;
931 struct sm_freeq
*releq
;
933 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
935 if (smp
->sm_refcnt
!= 0) {
936 panic("segmap_smapadd");
940 sm
= &smd_free
[smp
->sm_free_ndx
];
942 * Add to the tail of the release queue
943 * Note that sm_releq and sm_allocq could toggle
944 * before we get the lock. This does not affect
945 * correctness as the 2 queues are only maintained
946 * to reduce lock pressure.
948 releq
= sm
->sm_releq
;
949 if (releq
== &sm
->sm_freeq
[0])
950 smp
->sm_flags
|= SM_QNDX_ZERO
;
952 smp
->sm_flags
&= ~SM_QNDX_ZERO
;
953 mutex_enter(&releq
->smq_mtx
);
954 smpfreelist
= releq
->smq_free
;
955 if (smpfreelist
== 0) {
958 releq
->smq_free
= smp
->sm_next
= smp
->sm_prev
= smp
;
960 * Both queue mutexes held to set sm_want;
961 * snapshot the value before dropping releq mutex.
962 * If sm_want appears after the releq mutex is dropped,
963 * then the smap just freed is already gone.
966 mutex_exit(&releq
->smq_mtx
);
968 * See if there was a waiter before dropping the releq mutex
969 * then recheck after obtaining sm_freeq[0] mutex as
970 * the another thread may have already signaled.
973 mutex_enter(&sm
->sm_freeq
[0].smq_mtx
);
975 cv_signal(&sm
->sm_free_cv
);
976 mutex_exit(&sm
->sm_freeq
[0].smq_mtx
);
979 smp
->sm_next
= smpfreelist
;
980 smp
->sm_prev
= smpfreelist
->sm_prev
;
981 smpfreelist
->sm_prev
= smp
;
982 smp
->sm_prev
->sm_next
= smp
;
983 mutex_exit(&releq
->smq_mtx
);
989 segmap_hashin(struct smap
*smp
, struct vnode
*vp
, u_offset_t off
, int hashid
)
995 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
996 ASSERT(smp
->sm_vp
== NULL
);
997 ASSERT(smp
->sm_hash
== NULL
);
998 ASSERT(smp
->sm_prev
== NULL
);
999 ASSERT(smp
->sm_next
== NULL
);
1000 ASSERT(hashid
>= 0 && hashid
<= smd_hashmsk
);
1002 hmtx
= SHASHMTX(hashid
);
1006 * First we need to verify that no one has created a smp
1007 * with (vp,off) as its tag before we us.
1009 for (tmp
= smd_hash
[hashid
].sh_hash_list
;
1010 tmp
!= NULL
; tmp
= tmp
->sm_hash
)
1011 if (tmp
->sm_vp
== vp
&& tmp
->sm_off
== off
)
1016 * No one created one yet.
1018 * Funniness here - we don't increment the ref count on the
1019 * vnode * even though we have another pointer to it here.
1020 * The reason for this is that we don't want the fact that
1021 * a seg_map entry somewhere refers to a vnode to prevent the
1022 * vnode * itself from going away. This is because this
1023 * reference to the vnode is a "soft one". In the case where
1024 * a mapping is being used by a rdwr [or directory routine?]
1025 * there already has to be a non-zero ref count on the vnode.
1026 * In the case where the vp has been freed and the the smap
1027 * structure is on the free list, there are no pages in memory
1028 * that can refer to the vnode. Thus even if we reuse the same
1029 * vnode/smap structure for a vnode which has the same
1030 * address but represents a different object, we are ok.
1035 hpp
= &smd_hash
[hashid
].sh_hash_list
;
1036 smp
->sm_hash
= *hpp
;
1038 #ifdef SEGMAP_HASHSTATS
1039 smd_hash_len
[hashid
]++;
1048 segmap_hashout(struct smap
*smp
)
1050 struct smap
**hpp
, *hp
;
1056 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
1061 SMAP_HASHFUNC(vp
, off
, hashid
); /* macro assigns hashid */
1062 mtx
= SHASHMTX(hashid
);
1065 hpp
= &smd_hash
[hashid
].sh_hash_list
;
1069 panic("segmap_hashout");
1077 *hpp
= smp
->sm_hash
;
1078 smp
->sm_hash
= NULL
;
1079 #ifdef SEGMAP_HASHSTATS
1080 smd_hash_len
[hashid
]--;
1085 smp
->sm_off
= (u_offset_t
)0;
1090 * Attempt to free unmodified, unmapped, and non locked segmap
1094 segmap_pagefree(struct vnode
*vp
, u_offset_t off
)
1099 for (pgoff
= off
; pgoff
< off
+ MAXBSIZE
; pgoff
+= PAGESIZE
) {
1101 if ((pp
= page_lookup_nowait(vp
, pgoff
, SE_EXCL
)) == NULL
)
1104 switch (page_release(pp
, 1)) {
1106 segmapcnt
.smp_free_notfree
.value
.ul
++;
1109 segmapcnt
.smp_free_dirty
.value
.ul
++;
1112 segmapcnt
.smp_free
.value
.ul
++;
1119 * Locks held on entry: smap lock
1120 * Locks held on exit : smap lock.
1124 grab_smp(struct smap
*smp
, page_t
*pp
)
1126 ASSERT(MUTEX_HELD(SMAPMTX(smp
)));
1127 ASSERT(smp
->sm_refcnt
== 0);
1129 if (smp
->sm_vp
!= (struct vnode
*)NULL
) {
1130 struct vnode
*vp
= smp
->sm_vp
;
1131 u_offset_t off
= smp
->sm_off
;
1133 * Destroy old vnode association and
1134 * unload any hardware translations to
1137 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_get_reuse
++;
1138 segmap_hashout(smp
);
1141 * This node is off freelist and hashlist,
1142 * so there is no reason to drop/reacquire sm_mtx
1143 * across calls to hat_unload.
1147 int hat_unload_needed
= 0;
1150 * unload kpm mapping
1153 vaddr
= hat_kpm_page2va(pp
, 1);
1154 hat_kpm_mapout(pp
, GET_KPME(smp
), vaddr
);
1159 * Check if we have (also) the rare case of a
1162 if (smp
->sm_flags
& SM_NOTKPM_RELEASED
) {
1163 hat_unload_needed
= 1;
1164 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
1167 if (hat_unload_needed
) {
1168 hat_unload(kas
.a_hat
, segkmap
->s_base
+
1169 ((smp
- smd_smap
) * MAXBSIZE
),
1170 MAXBSIZE
, HAT_UNLOAD
);
1174 ASSERT(smp
->sm_flags
& SM_NOTKPM_RELEASED
);
1175 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
1176 hat_unload(kas
.a_hat
, segkmap
->s_base
+
1177 ((smp
- smd_smap
) * MAXBSIZE
),
1178 MAXBSIZE
, HAT_UNLOAD
);
1180 segmap_pagefree(vp
, off
);
1184 static struct smap
*
1185 get_free_smp(int free_ndx
)
1189 struct smap
*smp
, *first
;
1190 struct sm_freeq
*allocq
, *releq
;
1193 int end_ndx
, page_locked
= 0;
1196 sm
= &smd_free
[free_ndx
];
1199 allocq
= sm
->sm_allocq
;
1200 mutex_enter(&allocq
->smq_mtx
);
1202 if ((smp
= allocq
->smq_free
) == NULL
) {
1206 * The alloc list is empty or this queue is being skipped;
1207 * first see if the allocq toggled.
1209 if (sm
->sm_allocq
!= allocq
) {
1211 mutex_exit(&allocq
->smq_mtx
);
1214 releq
= sm
->sm_releq
;
1215 if (!mutex_tryenter(&releq
->smq_mtx
)) {
1216 /* cannot get releq; a free smp may be there now */
1217 mutex_exit(&allocq
->smq_mtx
);
1220 * This loop could spin forever if this thread has
1221 * higher priority than the thread that is holding
1222 * releq->smq_mtx. In order to force the other thread
1223 * to run, we'll lock/unlock the mutex which is safe
1224 * since we just unlocked the allocq mutex.
1226 mutex_enter(&releq
->smq_mtx
);
1227 mutex_exit(&releq
->smq_mtx
);
1230 if (releq
->smq_free
== NULL
) {
1232 * This freelist is empty.
1233 * This should not happen unless clients
1234 * are failing to release the segmap
1235 * window after accessing the data.
1236 * Before resorting to sleeping, try
1237 * the next list of the same color.
1239 free_ndx
= (free_ndx
+ smd_ncolor
) & smd_freemsk
;
1240 if (free_ndx
!= end_ndx
) {
1241 mutex_exit(&releq
->smq_mtx
);
1242 mutex_exit(&allocq
->smq_mtx
);
1243 sm
= &smd_free
[free_ndx
];
1247 * Tried all freelists of the same color once,
1248 * wait on this list and hope something gets freed.
1250 segmapcnt
.smp_get_nofree
.value
.ul
++;
1252 mutex_exit(&sm
->sm_freeq
[1].smq_mtx
);
1253 cv_wait(&sm
->sm_free_cv
,
1254 &sm
->sm_freeq
[0].smq_mtx
);
1256 mutex_exit(&sm
->sm_freeq
[0].smq_mtx
);
1257 sm
= &smd_free
[free_ndx
];
1261 * Something on the rele queue; flip the alloc
1262 * and rele queues and retry.
1264 sm
->sm_allocq
= releq
;
1265 sm
->sm_releq
= allocq
;
1266 mutex_exit(&allocq
->smq_mtx
);
1267 mutex_exit(&releq
->smq_mtx
);
1276 * Fastpath the case we get the smap mutex
1281 smtx
= SMAPMTX(smp
);
1282 if (!mutex_tryenter(smtx
)) {
1284 * Another thread is trying to reclaim this slot.
1285 * Skip to the next queue or smap.
1287 if ((smp
= smp
->sm_next
) == first
) {
1294 * if kpme exists, get shared lock on the page
1296 if (segmap_kpm
&& smp
->sm_vp
!= NULL
) {
1298 kpme
= GET_KPME(smp
);
1299 pp
= kpme
->kpe_page
;
1302 if (!page_trylock(pp
, SE_SHARED
)) {
1315 if (kpme
->kpe_page
== NULL
) {
1324 * At this point, we've selected smp. Remove smp
1325 * from its freelist. If smp is the first one in
1326 * the freelist, update the head of the freelist.
1329 ASSERT(first
== allocq
->smq_free
);
1330 allocq
->smq_free
= smp
->sm_next
;
1334 * if the head of the freelist still points to smp,
1335 * then there are no more free smaps in that list.
1337 if (allocq
->smq_free
== smp
)
1341 allocq
->smq_free
= NULL
;
1343 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1344 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1346 mutex_exit(&allocq
->smq_mtx
);
1347 smp
->sm_prev
= smp
->sm_next
= NULL
;
1350 * if pp != NULL, pp must have been locked;
1351 * grab_smp() unlocks pp.
1353 ASSERT((pp
== NULL
) || PAGE_LOCKED(pp
));
1355 /* return smp locked. */
1356 ASSERT(SMAPMTX(smp
) == smtx
);
1357 ASSERT(MUTEX_HELD(smtx
));
1364 * Special public segmap operations
1368 * Create pages (without using VOP_GETPAGE) and load up translations to them.
1369 * If softlock is TRUE, then set things up so that it looks like a call
1370 * to segmap_fault with F_SOFTLOCK.
1372 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1374 * All fields in the generic segment (struct seg) are considered to be
1375 * read-only for "segmap" even though the kernel address space (kas) may
1376 * not be locked, hence no lock is needed to access them.
1379 segmap_pagecreate(struct seg
*seg
, caddr_t addr
, size_t len
, int softlock
)
1381 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
1392 ASSERT(seg
->s_as
== &kas
);
1394 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1396 * Pages are successfully prefaulted and locked in
1397 * segmap_getmapflt and can't be unlocked until
1398 * segmap_release. The SM_KPM_NEWPAGE flag is set
1399 * in segmap_pagecreate_kpm when new pages are created.
1400 * and it is returned as "newpage" indication here.
1402 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
1403 panic("segmap_pagecreate: smap not found "
1404 "for addr %p", (void *)addr
);
1408 smtx
= SMAPMTX(smp
);
1409 newpage
= smp
->sm_flags
& SM_KPM_NEWPAGE
;
1410 smp
->sm_flags
&= ~SM_KPM_NEWPAGE
;
1416 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_pagecreate
++;
1419 addr
= (caddr_t
)((uintptr_t)addr
& (uintptr_t)PAGEMASK
);
1421 smp
= GET_SMAP(seg
, addr
);
1424 * We don't grab smp mutex here since we assume the smp
1425 * has a refcnt set already which prevents the slot from
1428 ASSERT(smp
->sm_refcnt
> 0);
1431 off
= smp
->sm_off
+ ((u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
));
1432 prot
= smd
->smd_prot
;
1434 for (; addr
< eaddr
; addr
+= PAGESIZE
, off
+= PAGESIZE
) {
1435 hat_flag
= HAT_LOAD
;
1436 pp
= page_lookup(vp
, off
, SE_SHARED
);
1440 if ((pp
= page_create_va(vp
, off
,
1441 PAGESIZE
, PG_WAIT
, seg
, addr
)) == NULL
) {
1442 panic("segmap_pagecreate: page_create failed");
1449 * Since pages created here do not contain valid
1450 * data until the caller writes into them, the
1451 * "exclusive" lock will not be dropped to prevent
1452 * other users from accessing the page. We also
1453 * have to lock the translation to prevent a fault
1454 * from occurring when the virtual address mapped by
1455 * this page is written into. This is necessary to
1456 * avoid a deadlock since we haven't dropped the
1459 bitindex
= (ushort_t
)((off
- smp
->sm_off
) >> PAGESHIFT
);
1462 * Large Files: The following assertion is to
1463 * verify the cast above.
1465 ASSERT((u_offset_t
)(off
- smp
->sm_off
) <= INT_MAX
);
1466 smtx
= SMAPMTX(smp
);
1468 smp
->sm_bitmap
|= SMAP_BIT_MASK(bitindex
);
1471 hat_flag
= HAT_LOAD_LOCK
;
1472 } else if (softlock
) {
1473 hat_flag
= HAT_LOAD_LOCK
;
1476 if (IS_VMODSORT(pp
->p_vnode
) && (prot
& PROT_WRITE
))
1479 hat_memload(kas
.a_hat
, addr
, pp
, prot
, hat_flag
);
1481 if (hat_flag
!= HAT_LOAD_LOCK
)
1484 TRACE_5(TR_FAC_VM
, TR_SEGMAP_PAGECREATE
,
1485 "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
1486 seg
, addr
, pp
, vp
, off
);
1493 segmap_pageunlock(struct seg
*seg
, caddr_t addr
, size_t len
, enum seg_rw rw
)
1503 ASSERT(seg
->s_as
== &kas
);
1506 addr
= (caddr_t
)((uintptr_t)addr
& (uintptr_t)PAGEMASK
);
1508 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1510 * Pages are successfully prefaulted and locked in
1511 * segmap_getmapflt and can't be unlocked until
1512 * segmap_release, so no pages or hat mappings have
1513 * to be unlocked at this point.
1516 if ((smp
= get_smap_kpm(addr
, NULL
)) == NULL
) {
1517 panic("segmap_pageunlock: smap not found "
1518 "for addr %p", (void *)addr
);
1522 ASSERT(smp
->sm_refcnt
> 0);
1523 mutex_exit(SMAPMTX(smp
));
1528 smp
= GET_SMAP(seg
, addr
);
1529 smtx
= SMAPMTX(smp
);
1531 ASSERT(smp
->sm_refcnt
> 0);
1534 off
= smp
->sm_off
+ ((u_offset_t
)((uintptr_t)addr
& MAXBOFFSET
));
1536 for (; addr
< eaddr
; addr
+= PAGESIZE
, off
+= PAGESIZE
) {
1537 bitmask
= SMAP_BIT_MASK((int)(off
- smp
->sm_off
) >> PAGESHIFT
);
1540 * Large Files: Following assertion is to verify
1541 * the correctness of the cast to (int) above.
1543 ASSERT((u_offset_t
)(off
- smp
->sm_off
) <= INT_MAX
);
1546 * If the bit corresponding to "off" is set,
1547 * clear this bit in the bitmap, unlock translations,
1548 * and release the "exclusive" lock on the page.
1550 if (smp
->sm_bitmap
& bitmask
) {
1552 smp
->sm_bitmap
&= ~bitmask
;
1555 hat_unlock(kas
.a_hat
, addr
, PAGESIZE
);
1558 * Use page_find() instead of page_lookup() to
1559 * find the page since we know that it has
1562 pp
= page_find(vp
, off
);
1564 panic("segmap_pageunlock: page not found");
1567 if (rw
== S_WRITE
) {
1569 } else if (rw
!= S_OTHER
) {
1579 segmap_getmap(struct seg
*seg
, struct vnode
*vp
, u_offset_t off
)
1581 return (segmap_getmapflt(seg
, vp
, off
, MAXBSIZE
, 0, S_OTHER
));
1585 * This is the magic virtual address that offset 0 of an ELF
1586 * file gets mapped to in user space. This is used to pick
1587 * the vac color on the freelist.
1589 #define ELF_OFFZERO_VA (0x10000)
1591 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1592 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1593 * The return address is always MAXBSIZE aligned.
1595 * If forcefault is nonzero and the MMU translations haven't yet been created,
1596 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1607 struct smap
*smp
, *nsmp
;
1608 extern struct vnode
*common_specvp();
1609 caddr_t baseaddr
; /* MAXBSIZE aligned */
1614 kmutex_t
*hashmtx
, *smapmtx
;
1620 page_t
*pl
[MAXPPB
+ 1];
1624 ASSERT(seg
->s_as
== &kas
);
1625 ASSERT(seg
== segkmap
);
1627 baseoff
= off
& (offset_t
)MAXBMASK
;
1628 if (off
+ len
> baseoff
+ MAXBSIZE
) {
1629 panic("segmap_getmap bad len");
1634 * If this is a block device we have to be sure to use the
1635 * "common" block device vnode for the mapping.
1637 if (vp
->v_type
== VBLK
)
1638 vp
= common_specvp(vp
);
1640 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_getmap
++;
1642 if (segmap_kpm
== 0 ||
1643 (forcefault
== SM_PAGECREATE
&& rw
!= S_WRITE
)) {
1647 SMAP_HASHFUNC(vp
, off
, hashid
); /* macro assigns hashid */
1648 hashmtx
= SHASHMTX(hashid
);
1651 mutex_enter(hashmtx
);
1652 for (smp
= smd_hash
[hashid
].sh_hash_list
;
1653 smp
!= NULL
; smp
= smp
->sm_hash
)
1654 if (smp
->sm_vp
== vp
&& smp
->sm_off
== baseoff
)
1656 mutex_exit(hashmtx
);
1661 ASSERT(vp
->v_count
!= 0);
1664 * Get smap lock and recheck its tag. The hash lock
1665 * is dropped since the hash is based on (vp, off)
1666 * and (vp, off) won't change when we have smap mtx.
1668 smapmtx
= SMAPMTX(smp
);
1669 mutex_enter(smapmtx
);
1670 if (smp
->sm_vp
!= vp
|| smp
->sm_off
!= baseoff
) {
1671 mutex_exit(smapmtx
);
1675 if (smp
->sm_refcnt
== 0) {
1677 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_get_reclaim
++;
1680 * Could still be on the free list. However, this
1681 * could also be an smp that is transitioning from
1682 * the free list when we have too much contention
1683 * for the smapmtx's. In this case, we have an
1684 * unlocked smp that is not on the free list any
1685 * longer, but still has a 0 refcnt. The only way
1686 * to be sure is to check the freelist pointers.
1687 * Since we now have the smapmtx, we are guaranteed
1688 * that the (vp, off) won't change, so we are safe
1689 * to reclaim it. get_free_smp() knows that this
1690 * can happen, and it will check the refcnt.
1693 if ((smp
->sm_next
!= NULL
)) {
1694 struct sm_freeq
*freeq
;
1696 ASSERT(smp
->sm_prev
!= NULL
);
1697 sm
= &smd_free
[smp
->sm_free_ndx
];
1699 if (smp
->sm_flags
& SM_QNDX_ZERO
)
1700 freeq
= &sm
->sm_freeq
[0];
1702 freeq
= &sm
->sm_freeq
[1];
1704 mutex_enter(&freeq
->smq_mtx
);
1705 if (freeq
->smq_free
!= smp
) {
1707 * fastpath normal case
1709 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1710 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1711 } else if (smp
== smp
->sm_next
) {
1713 * Taking the last smap on freelist
1715 freeq
->smq_free
= NULL
;
1718 * Reclaiming 1st smap on list
1720 freeq
->smq_free
= smp
->sm_next
;
1721 smp
->sm_prev
->sm_next
= smp
->sm_next
;
1722 smp
->sm_next
->sm_prev
= smp
->sm_prev
;
1724 mutex_exit(&freeq
->smq_mtx
);
1725 smp
->sm_prev
= smp
->sm_next
= NULL
;
1727 ASSERT(smp
->sm_prev
== NULL
);
1728 segmapcnt
.smp_stolen
.value
.ul
++;
1732 segmapcnt
.smp_get_use
.value
.ul
++;
1734 smp
->sm_refcnt
++; /* another user */
1737 * We don't invoke segmap_fault via TLB miss, so we set ref
1738 * and mod bits in advance. For S_OTHER we set them in
1739 * segmap_fault F_SOFTUNLOCK.
1742 if (rw
== S_WRITE
) {
1743 smp
->sm_flags
|= SM_WRITE_DATA
;
1744 } else if (rw
== S_READ
) {
1745 smp
->sm_flags
|= SM_READ_DATA
;
1748 mutex_exit(smapmtx
);
1753 uint32_t free_ndx
, *free_ndxp
;
1754 union segmap_cpu
*scpu
;
1757 * On a PAC machine or a machine with anti-alias
1758 * hardware, smd_colormsk will be zero.
1760 * On a VAC machine- pick color by offset in the file
1761 * so we won't get VAC conflicts on elf files.
1762 * On data files, color does not matter but we
1763 * don't know what kind of file it is so we always
1764 * pick color by offset. This causes color
1765 * corresponding to file offset zero to be used more
1768 color
= (baseoff
>> MAXBSHIFT
) & smd_colormsk
;
1769 scpu
= smd_cpu
+CPU
->cpu_seqid
;
1770 free_ndxp
= &scpu
->scpu
.scpu_free_ndx
[color
];
1771 free_ndx
= (*free_ndxp
+= smd_ncolor
) & smd_freemsk
;
1773 colors_used
[free_ndx
]++;
1777 * Get a locked smp slot from the free list.
1779 smp
= get_free_smp(free_ndx
);
1780 smapmtx
= SMAPMTX(smp
);
1782 ASSERT(smp
->sm_vp
== NULL
);
1784 if ((nsmp
= segmap_hashin(smp
, vp
, baseoff
, hashid
)) != NULL
) {
1786 * Failed to hashin, there exists one now.
1787 * Return the smp we just allocated.
1789 segmap_smapadd(smp
);
1790 mutex_exit(smapmtx
);
1795 smp
->sm_refcnt
++; /* another user */
1798 * We don't invoke segmap_fault via TLB miss, so we set ref
1799 * and mod bits in advance. For S_OTHER we set them in
1800 * segmap_fault F_SOFTUNLOCK.
1803 if (rw
== S_WRITE
) {
1804 smp
->sm_flags
|= SM_WRITE_DATA
;
1805 } else if (rw
== S_READ
) {
1806 smp
->sm_flags
|= SM_READ_DATA
;
1809 mutex_exit(smapmtx
);
1815 goto use_segmap_range
;
1820 /* Lint directive required until 6746211 is fixed */
1822 ASSERT(PAGESIZE
== MAXBSIZE
);
1825 * remember the last smp faulted on this cpu.
1827 (smd_cpu
+CPU
->cpu_seqid
)->scpu
.scpu_last_smap
= smp
;
1829 if (forcefault
== SM_PAGECREATE
) {
1830 baseaddr
= segmap_pagecreate_kpm(seg
, vp
, baseoff
, smp
, rw
);
1835 (pp
= GET_KPME(smp
)->kpe_page
) != NULL
) {
1841 if (page_trylock(pp
, SE_SHARED
)) {
1842 if (PP_ISFREE(pp
) ||
1843 !(pp
->p_vnode
== vp
&&
1844 pp
->p_offset
== baseoff
)) {
1846 pp
= page_lookup(vp
, baseoff
,
1850 pp
= page_lookup(vp
, baseoff
, SE_SHARED
);
1854 ASSERT(GET_KPME(smp
)->kpe_page
== NULL
);
1858 if (rw
== S_WRITE
&&
1859 hat_page_getattr(pp
, P_MOD
| P_REF
) !=
1866 * We have the p_selock as reader, grab_smp
1867 * can't hit us, we have bumped the smap
1868 * refcnt and hat_pageunload needs the
1869 * p_selock exclusive.
1871 kpme
= GET_KPME(smp
);
1872 if (kpme
->kpe_page
== pp
) {
1873 baseaddr
= hat_kpm_page2va(pp
, 0);
1874 } else if (kpme
->kpe_page
== NULL
) {
1875 baseaddr
= hat_kpm_mapin(pp
, kpme
);
1877 panic("segmap_getmapflt: stale "
1878 "kpme page, kpme %p", (void *)kpme
);
1883 * We don't invoke segmap_fault via TLB miss,
1884 * so we set ref and mod bits in advance.
1885 * For S_OTHER and we set them in segmap_fault
1888 if (rw
== S_READ
&& !hat_isref(pp
))
1897 base
= segkpm_create_va(baseoff
);
1898 error
= VOP_GETPAGE(vp
, (offset_t
)baseoff
, len
, &prot
, pl
, MAXBSIZE
,
1899 seg
, base
, rw
, CRED(), NULL
);
1902 if (error
|| pp
== NULL
) {
1904 * Use segmap address slot and let segmap_fault deal
1905 * with the error cases. There is no error return
1908 goto use_segmap_range
;
1911 ASSERT(pl
[1] == NULL
);
1914 * When prot is not returned w/ PROT_ALL the returned pages
1915 * are not backed by fs blocks. For most of the segmap users
1916 * this is no problem, they don't write to the pages in the
1917 * same request and therefore don't rely on a following
1918 * trap driven segmap_fault. With SM_LOCKPROTO users it
1919 * is more secure to use segkmap adresses to allow
1920 * protection segmap_fault's.
1922 if (prot
!= PROT_ALL
&& forcefault
== SM_LOCKPROTO
) {
1924 * Use segmap address slot and let segmap_fault
1925 * do the error return.
1927 ASSERT(rw
!= S_WRITE
);
1928 ASSERT(PAGE_LOCKED(pp
));
1931 goto use_segmap_range
;
1935 * We have the p_selock as reader, grab_smp can't hit us, we
1936 * have bumped the smap refcnt and hat_pageunload needs the
1937 * p_selock exclusive.
1939 kpme
= GET_KPME(smp
);
1940 if (kpme
->kpe_page
== pp
) {
1941 baseaddr
= hat_kpm_page2va(pp
, 0);
1942 } else if (kpme
->kpe_page
== NULL
) {
1943 baseaddr
= hat_kpm_mapin(pp
, kpme
);
1945 panic("segmap_getmapflt: stale kpme page after "
1946 "VOP_GETPAGE, kpme %p", (void *)kpme
);
1950 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_fault
++;
1956 baseaddr
= seg
->s_base
+ ((smp
- smd_smap
) * MAXBSIZE
);
1957 TRACE_4(TR_FAC_VM
, TR_SEGMAP_GETMAP
,
1958 "segmap_getmap:seg %p addr %p vp %p offset %llx",
1959 seg
, baseaddr
, vp
, baseoff
);
1962 * Prefault the translations
1964 vaddr
= baseaddr
+ (off
- baseoff
);
1965 if (forcefault
&& (newslot
|| !hat_probe(kas
.a_hat
, vaddr
))) {
1967 caddr_t pgaddr
= (caddr_t
)((uintptr_t)vaddr
&
1968 (uintptr_t)PAGEMASK
);
1970 (void) segmap_fault(kas
.a_hat
, seg
, pgaddr
,
1971 (vaddr
+ len
- pgaddr
+ PAGESIZE
- 1) & (uintptr_t)PAGEMASK
,
1979 segmap_release(struct seg
*seg
, caddr_t addr
, uint_t flags
)
1990 if (segmap_kpm
&& IS_KPM_ADDR(addr
)) {
1992 if (((uintptr_t)addr
& MAXBOFFSET
) != 0) {
1993 panic("segmap_release: addr %p not "
1994 "MAXBSIZE aligned", (void *)addr
);
1998 if ((smp
= get_smap_kpm(addr
, &pp
)) == NULL
) {
1999 panic("segmap_release: smap not found "
2000 "for addr %p", (void *)addr
);
2004 TRACE_3(TR_FAC_VM
, TR_SEGMAP_RELMAP
,
2005 "segmap_relmap:seg %p addr %p smp %p",
2008 smtx
= SMAPMTX(smp
);
2011 * For compatibility reasons segmap_pagecreate_kpm sets this
2012 * flag to allow a following segmap_pagecreate to return
2013 * this as "newpage" flag. When segmap_pagecreate is not
2014 * called at all we clear it now.
2016 smp
->sm_flags
&= ~SM_KPM_NEWPAGE
;
2018 if (smp
->sm_flags
& SM_WRITE_DATA
) {
2020 } else if (smp
->sm_flags
& SM_READ_DATA
) {
2024 if (addr
< seg
->s_base
|| addr
>= seg
->s_base
+ seg
->s_size
||
2025 ((uintptr_t)addr
& MAXBOFFSET
) != 0) {
2026 panic("segmap_release: bad addr %p", (void *)addr
);
2029 smp
= GET_SMAP(seg
, addr
);
2031 TRACE_3(TR_FAC_VM
, TR_SEGMAP_RELMAP
,
2032 "segmap_relmap:seg %p addr %p smp %p",
2035 smtx
= SMAPMTX(smp
);
2037 smp
->sm_flags
|= SM_NOTKPM_RELEASED
;
2040 ASSERT(smp
->sm_refcnt
> 0);
2043 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2046 if ((flags
& ~SM_DONTNEED
) != 0) {
2047 if (flags
& SM_WRITE
)
2048 segmapcnt
.smp_rel_write
.value
.ul
++;
2049 if (flags
& SM_ASYNC
) {
2051 segmapcnt
.smp_rel_async
.value
.ul
++;
2053 if (flags
& SM_INVAL
) {
2055 segmapcnt
.smp_rel_abort
.value
.ul
++;
2057 if (flags
& SM_DESTROY
) {
2058 bflags
|= (B_INVAL
|B_TRUNC
);
2059 segmapcnt
.smp_rel_abort
.value
.ul
++;
2061 if (smp
->sm_refcnt
== 1) {
2063 * We only bother doing the FREE and DONTNEED flags
2064 * if no one else is still referencing this mapping.
2066 if (flags
& SM_FREE
) {
2068 segmapcnt
.smp_rel_free
.value
.ul
++;
2070 if (flags
& SM_DONTNEED
) {
2071 bflags
|= B_DONTNEED
;
2072 segmapcnt
.smp_rel_dontneed
.value
.ul
++;
2076 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_release
++;
2080 offset
= smp
->sm_off
;
2082 if (--smp
->sm_refcnt
== 0) {
2084 smp
->sm_flags
&= ~(SM_WRITE_DATA
| SM_READ_DATA
);
2086 if (flags
& (SM_INVAL
|SM_DESTROY
)) {
2087 segmap_hashout(smp
); /* remove map info */
2089 hat_kpm_mapout(pp
, GET_KPME(smp
), addr
);
2090 if (smp
->sm_flags
& SM_NOTKPM_RELEASED
) {
2091 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
2092 hat_unload(kas
.a_hat
, segkmap
->s_base
+
2093 ((smp
- smd_smap
) * MAXBSIZE
),
2094 MAXBSIZE
, HAT_UNLOAD
);
2099 segkpm_mapout_validkpme(GET_KPME(smp
));
2101 smp
->sm_flags
&= ~SM_NOTKPM_RELEASED
;
2102 hat_unload(kas
.a_hat
, addr
, MAXBSIZE
,
2106 segmap_smapadd(smp
); /* add to free list */
2114 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2117 if ((flags
& ~SM_DONTNEED
) != 0) {
2118 error
= VOP_PUTPAGE(vp
, offset
, MAXBSIZE
,
2119 bflags
, CRED(), NULL
);
2128 * Dump the pages belonging to this segmap segment.
2131 segmap_dump(struct seg
*seg
)
2133 struct segmap_data
*smd
;
2134 struct smap
*smp
, *smp_end
;
2140 smd
= (struct segmap_data
*)seg
->s_data
;
2142 for (smp
= smd
->smd_sm
, smp_end
= smp
+ smd
->smd_npages
;
2143 smp
< smp_end
; smp
++) {
2145 if (smp
->sm_refcnt
) {
2146 for (off
= 0; off
< MAXBSIZE
; off
+= PAGESIZE
) {
2150 * If pp == NULL, the page either does
2151 * not exist or is exclusively locked.
2152 * So determine if it exists before
2155 if ((pp
= page_lookup_nowait(smp
->sm_vp
,
2156 smp
->sm_off
+ off
, SE_SHARED
)))
2159 pp
= page_exists(smp
->sm_vp
,
2163 pfn
= page_pptonum(pp
);
2164 dump_addpage(seg
->s_as
,
2169 dump_timeleft
= dump_timeout
;
2178 segmap_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
2179 struct page
***ppp
, enum lock_type type
, enum seg_rw rw
)
2185 segmap_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
)
2187 struct segmap_data
*smd
= (struct segmap_data
*)seg
->s_data
;
2189 memidp
->val
[0] = (uintptr_t)smd
->smd_sm
->sm_vp
;
2190 memidp
->val
[1] = smd
->smd_sm
->sm_off
+ (uintptr_t)(addr
- seg
->s_base
);
2195 static lgrp_mem_policy_info_t
*
2196 segmap_getpolicy(struct seg
*seg
, caddr_t addr
)
2203 segmap_capable(struct seg
*seg
, segcapability_t capability
)
2209 #ifdef SEGKPM_SUPPORT
2212 * segkpm support routines
2216 segmap_pagecreate_kpm(struct seg
*seg
, vnode_t
*vp
, u_offset_t off
,
2217 struct smap
*smp
, enum seg_rw rw
)
2224 ASSERT(smp
->sm_refcnt
> 0);
2226 if ((pp
= page_lookup(vp
, off
, SE_SHARED
)) == NULL
) {
2229 base
= segkpm_create_va(off
);
2231 if ((pp
= page_create_va(vp
, off
, PAGESIZE
, PG_WAIT
,
2232 seg
, base
)) == NULL
) {
2233 panic("segmap_pagecreate_kpm: "
2234 "page_create failed");
2240 ASSERT((u_offset_t
)(off
- smp
->sm_off
) <= INT_MAX
);
2243 * Mark this here until the following segmap_pagecreate
2244 * or segmap_release.
2246 smtx
= SMAPMTX(smp
);
2248 smp
->sm_flags
|= SM_KPM_NEWPAGE
;
2252 kpme
= GET_KPME(smp
);
2253 if (!newpage
&& kpme
->kpe_page
== pp
)
2254 base
= hat_kpm_page2va(pp
, 0);
2256 base
= hat_kpm_mapin(pp
, kpme
);
2259 * FS code may decide not to call segmap_pagecreate and we
2260 * don't invoke segmap_fault via TLB miss, so we have to set
2261 * ref and mod bits in advance.
2263 if (rw
== S_WRITE
) {
2266 ASSERT(rw
== S_READ
);
2270 smd_cpu
[CPU
->cpu_seqid
].scpu
.scpu_pagecreate
++;
2276 * Find the smap structure corresponding to the
2277 * KPM addr and return it locked.
2280 get_smap_kpm(caddr_t addr
, page_t
**ppp
)
2285 caddr_t baseaddr
= (caddr_t
)((uintptr_t)addr
& MAXBMASK
);
2289 union segmap_cpu
*scpu
;
2291 pp
= hat_kpm_vaddr2page(baseaddr
);
2293 ASSERT(pp
&& !PP_ISFREE(pp
));
2294 ASSERT(PAGE_LOCKED(pp
));
2295 ASSERT(((uintptr_t)pp
->p_offset
& MAXBOFFSET
) == 0);
2298 offset
= pp
->p_offset
;
2302 * Assume the last smap used on this cpu is the one needed.
2304 scpu
= smd_cpu
+CPU
->cpu_seqid
;
2305 smp
= scpu
->scpu
.scpu_last_smap
;
2306 mutex_enter(&smp
->sm_mtx
);
2307 if (smp
->sm_vp
== vp
&& smp
->sm_off
== offset
) {
2308 ASSERT(smp
->sm_refcnt
> 0);
2311 * Assumption wrong, find the smap on the hash chain.
2313 mutex_exit(&smp
->sm_mtx
);
2314 SMAP_HASHFUNC(vp
, offset
, hashid
); /* macro assigns hashid */
2315 hashmtx
= SHASHMTX(hashid
);
2317 mutex_enter(hashmtx
);
2318 smp
= smd_hash
[hashid
].sh_hash_list
;
2319 for (; smp
!= NULL
; smp
= smp
->sm_hash
) {
2320 if (smp
->sm_vp
== vp
&& smp
->sm_off
== offset
)
2323 mutex_exit(hashmtx
);
2325 mutex_enter(&smp
->sm_mtx
);
2326 ASSERT(smp
->sm_vp
== vp
&& smp
->sm_off
== offset
);
2331 *ppp
= smp
? pp
: NULL
;
2336 #else /* SEGKPM_SUPPORT */
2342 segmap_pagecreate_kpm(struct seg
*seg
, vnode_t
*vp
, u_offset_t off
,
2343 struct smap
*smp
, enum seg_rw rw
)
2350 get_smap_kpm(caddr_t addr
, page_t
**ppp
)
2355 #endif /* SEGKPM_SUPPORT */