4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
28 * VM - generic vnode page mapping interfaces.
30 * Mechanism to provide temporary mappings to vnode pages.
31 * The typical use would be to copy/access file data.
34 #include <sys/types.h>
35 #include <sys/t_lock.h>
36 #include <sys/param.h>
37 #include <sys/sysmacros.h>
39 #include <sys/systm.h>
40 #include <sys/vnode.h>
42 #include <sys/errno.h>
45 #include <sys/vtrace.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/thread.h>
49 #include <sys/dumphdr.h>
50 #include <sys/bitmap.h>
53 #include <vm/seg_kmem.h>
57 #include <vm/seg_kpm.h>
58 #include <vm/seg_map.h>
67 * VPM can be disabled by setting vpm_enable = 0 in
82 int vpm_cache_enable
= 1;
83 long vpm_cache_percent
= 12;
85 int vpm_nfreelist
= 0;
95 char vpm_pad
[VPM_S_PAD
];
97 static union vpm_cpu
*vpmd_cpu
;
99 #define vfree_ndx vcpu.vcpu_free_ndx
101 int vpm_cachemode
= VPMCACHE_LRU
;
103 #define PPMTX(pp) (&(pp)->p_ilock)
105 static struct vpmap
*vpmd_vpmap
; /* list of vpmap structs preallocated */
106 static struct vpmfree
*vpmd_free
;
107 #define VPMAPMTX(vpm) (&vpm->vpm_mtx)
108 #define VPMAP2VMF(vpm) (&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk])
109 #define VPMAP2VMF_NDX(vpm) (ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk)
110 #define VPMP(id) (&vpmd_vpmap[id - 1])
111 #define VPMID(vpm) (uint_t)((vpm - vpmd_vpmap) + 1)
119 int vpmd_prevpagelocked
;
120 int vpmd_getpagefailed
;
122 int vpmd_emptyfreelist
;
126 #define VPM_DEBUG(x) ((vpm_debug.x)++)
131 int contend_mtbf
= 127;
133 #define VPM_MTBF(v, f) (((++(v)) & (f)) != (f))
137 #define VPM_MTBF(v, f) (1)
138 #define VPM_DEBUG(x) /* nothing */
145 * The main purpose of having a cache here is to speed up page_lookup()
146 * operations and also provide an LRU(default) behaviour of file pages. The
147 * page_lookup() operation tends to be expensive if a page has to be
148 * reclaimed from the system page cache("cachelist"). Once we speed up the
149 * page_lookup()->page_reclaim() path then there there should be no need for
150 * this cache. The system page cache(cachelist) should effectively serve the
151 * purpose of caching file pages.
153 * This cache is very similar to segmap's smap cache. Each page in the
154 * cache is tracked by the structure vpmap_t. But unlike segmap, there is no
155 * hash table. The page_t has a reference to the vpmap_t when cached. For a
156 * given vnode, offset the page is found by means of a page_lookup() operation.
157 * Any page which has a mapping(i.e when cached) will not be in the
158 * system 'cachelist'. Hence the page_lookup() will not have to do a
159 * page_reclaim(). That is how the cache serves to speed up page_lookup()
162 * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system.
170 struct vpmfree
*vpmflp
;
172 extern void prefetch_smap_w(void *);
178 if (!vpm_enable
|| !vpm_cache_enable
) {
183 * Set the size of the cache.
185 vpm_cache_size
= mmu_ptob((physmem
* vpm_cache_percent
)/100);
186 if (vpm_cache_size
< VPMAP_MINCACHE
) {
187 vpm_cache_size
= VPMAP_MINCACHE
;
190 if (vpm_cache_size
> VPMAP_MAXCACHE
) {
191 vpm_cache_size
= VPMAP_MAXCACHE
;
195 * Number of freelists.
197 if (vpm_nfreelist
== 0) {
198 vpm_nfreelist
= max_ncpus
;
199 } else if (vpm_nfreelist
< 0 || vpm_nfreelist
> 2 * max_ncpus
) {
200 cmn_err(CE_WARN
, "vpmap create : number of freelist "
201 "vpm_nfreelist %d using %d", vpm_nfreelist
, max_ncpus
);
202 vpm_nfreelist
= 2 * max_ncpus
;
206 * Round it up to the next power of 2
208 if (!ISP2(vpm_nfreelist
)) {
209 vpm_nfreelist
= 1 << (highbit(vpm_nfreelist
));
211 vpmd_freemsk
= vpm_nfreelist
- 1;
214 * Use a per cpu rotor index to spread the allocations evenly
215 * across the available vpm freelists.
217 vpmd_cpu
= kmem_zalloc(sizeof (union vpm_cpu
) * max_ncpus
, KM_SLEEP
);
219 for (i
= 0; i
< max_ncpus
; i
++) {
221 vpmd_cpu
[i
].vfree_ndx
= ndx
;
222 ndx
= (ndx
+ 1) & vpmd_freemsk
;
226 * Allocate and initialize the freelist.
228 vpmd_free
= kmem_zalloc(vpm_nfreelist
* sizeof (struct vpmfree
),
230 for (i
= 0; i
< vpm_nfreelist
; i
++) {
232 vpmflp
= &vpmd_free
[i
];
234 * Set up initial queue pointers. They will get flipped
237 vpmflp
->vpm_allocq
= &vpmflp
->vpm_freeq
[VPMALLOCQ
];
238 vpmflp
->vpm_releq
= &vpmflp
->vpm_freeq
[VPMRELEQ
];
241 npages
= mmu_btop(vpm_cache_size
);
245 * Allocate and initialize the vpmap structs. We need to
246 * walk the array backwards as the prefetch happens in reverse
249 vpmd_vpmap
= kmem_alloc(sizeof (struct vpmap
) * npages
, KM_SLEEP
);
250 for (vpm
= &vpmd_vpmap
[npages
- 1]; vpm
>= vpmd_vpmap
; vpm
--) {
251 struct vpmfree
*vpmflp
;
252 union vpm_freeq
*releq
;
253 struct vpmap
*vpmapf
;
256 * Use prefetch as we have to walk thru a large number of
257 * these data structures. We just use the smap's prefetch
258 * routine as it does the same.
260 prefetch_smap_w((void *)vpm
);
266 mutex_init(&vpm
->vpm_mtx
, NULL
, MUTEX_DEFAULT
, NULL
);
267 vpm
->vpm_free_ndx
= VPMAP2VMF_NDX(vpm
);
269 vpmflp
= VPMAP2VMF(vpm
);
270 releq
= vpmflp
->vpm_releq
;
272 vpmapf
= releq
->vpmq_free
;
273 if (vpmapf
== NULL
) {
274 releq
->vpmq_free
= vpm
->vpm_next
= vpm
->vpm_prev
= vpm
;
276 vpm
->vpm_next
= vpmapf
;
277 vpm
->vpm_prev
= vpmapf
->vpm_prev
;
278 vpmapf
->vpm_prev
= vpm
;
279 vpm
->vpm_prev
->vpm_next
= vpm
;
280 releq
->vpmq_free
= vpm
->vpm_next
;
284 * Indicate that the vpmap is on the releq at start
286 vpm
->vpm_ndxflg
= VPMRELEQ
;
292 * unhooks vpm from the freelist if it is still on the freelist.
294 #define VPMAP_RMFREELIST(vpm) \
296 if (vpm->vpm_next != NULL) { \
297 union vpm_freeq *freeq; \
298 struct vpmfree *vpmflp; \
299 vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \
300 freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \
301 mutex_enter(&freeq->vpmq_mtx); \
302 if (freeq->vpmq_free != vpm) { \
303 vpm->vpm_prev->vpm_next = vpm->vpm_next; \
304 vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
305 } else if (vpm == vpm->vpm_next) { \
306 freeq->vpmq_free = NULL; \
308 freeq->vpmq_free = vpm->vpm_next; \
309 vpm->vpm_prev->vpm_next = vpm->vpm_next; \
310 vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
312 mutex_exit(&freeq->vpmq_mtx); \
313 vpm->vpm_next = vpm->vpm_prev = NULL; \
318 get_freelndx(int mode
)
322 ndx
= vpmd_cpu
[CPU
->cpu_seqid
].vfree_ndx
& vpmd_freemsk
;
327 vpmd_cpu
[CPU
->cpu_seqid
].vfree_ndx
++;
335 * Find one vpmap structure from the free lists and use it for the newpage.
336 * The previous page it cached is dissociated and released. The page_t's
337 * p_vpmref is cleared only when the vpm it is pointing to is locked(or
338 * for AMD64 when the page is exclusively locked in page_unload. That is
339 * because the p_vpmref is treated as mapping).
341 * The page's p_vpmref is set when the page is
342 * locked(at least SHARED locked).
344 static struct vpmap
*
345 get_free_vpmap(page_t
*newpage
)
347 struct vpmfree
*vpmflp
;
349 struct vpmap
*vpm
, *first
;
350 union vpm_freeq
*allocq
, *releq
;
352 int end_ndx
, page_locked
= 0;
356 * get the freelist bin index.
358 free_ndx
= get_freelndx(vpm_cachemode
);
361 vpmflp
= &vpmd_free
[free_ndx
];
364 allocq
= vpmflp
->vpm_allocq
;
365 mutex_enter(&allocq
->vpmq_mtx
);
367 if ((vpm
= allocq
->vpmq_free
) == NULL
) {
371 * The alloc list is empty or this queue is being skipped;
372 * first see if the allocq toggled.
374 if (vpmflp
->vpm_allocq
!= allocq
) {
376 mutex_exit(&allocq
->vpmq_mtx
);
379 releq
= vpmflp
->vpm_releq
;
380 if (!mutex_tryenter(&releq
->vpmq_mtx
)) {
381 /* cannot get releq; a free vpmap may be there now */
382 mutex_exit(&allocq
->vpmq_mtx
);
385 * This loop could spin forever if this thread has
386 * higher priority than the thread that is holding
387 * releq->vpmq_mtx. In order to force the other thread
388 * to run, we'll lock/unlock the mutex which is safe
389 * since we just unlocked the allocq mutex.
391 mutex_enter(&releq
->vpmq_mtx
);
392 mutex_exit(&releq
->vpmq_mtx
);
395 if (releq
->vpmq_free
== NULL
) {
396 VPM_DEBUG(vpmd_emptyfreelist
);
398 * This freelist is empty.
399 * This should not happen unless clients
400 * are failing to release the vpmap after
401 * accessing the data. Before resorting
402 * to sleeping, try the next list of the same color.
404 free_ndx
= (free_ndx
+ 1) & vpmd_freemsk
;
405 if (free_ndx
!= end_ndx
) {
406 mutex_exit(&releq
->vpmq_mtx
);
407 mutex_exit(&allocq
->vpmq_mtx
);
408 vpmflp
= &vpmd_free
[free_ndx
];
412 * Tried all freelists.
413 * wait on this list and hope something gets freed.
416 mutex_exit(&vpmflp
->vpm_freeq
[1].vpmq_mtx
);
417 cv_wait(&vpmflp
->vpm_free_cv
,
418 &vpmflp
->vpm_freeq
[0].vpmq_mtx
);
420 mutex_exit(&vpmflp
->vpm_freeq
[0].vpmq_mtx
);
421 vpmflp
= &vpmd_free
[free_ndx
];
422 VPM_DEBUG(vpmd_nofreevpms
);
426 * Something on the rele queue; flip the alloc
427 * and rele queues and retry.
429 vpmflp
->vpm_allocq
= releq
;
430 vpmflp
->vpm_releq
= allocq
;
431 mutex_exit(&allocq
->vpmq_mtx
);
432 mutex_exit(&releq
->vpmq_mtx
);
445 * Fastpath the case we get the vpmap mutex
450 vmtx
= VPMAPMTX(vpm
);
451 if (!mutex_tryenter(vmtx
)) {
453 * Another thread is trying to reclaim this slot.
454 * Skip to the next queue or vpmap.
456 if ((vpm
= vpm
->vpm_next
) == first
) {
464 * Assign this vpm to the newpage.
466 pmtx
= PPMTX(newpage
);
471 * Check if some other thread already assigned a vpm to
474 if ((vpmref
= newpage
->p_vpmref
) == 0) {
475 newpage
->p_vpmref
= VPMID(vpm
);
478 VPM_DEBUG(vpmd_contend
);
486 * At this point, we've selected the vpm. Remove vpm
487 * from its freelist. If vpm is the first one in
488 * the freelist, update the head of the freelist.
491 ASSERT(first
== allocq
->vpmq_free
);
492 allocq
->vpmq_free
= vpm
->vpm_next
;
496 * If the head of the freelist still points to vpm,
497 * then there are no more free vpmaps in that list.
499 if (allocq
->vpmq_free
== vpm
)
503 allocq
->vpmq_free
= NULL
;
505 vpm
->vpm_prev
->vpm_next
= vpm
->vpm_next
;
506 vpm
->vpm_next
->vpm_prev
= vpm
->vpm_prev
;
508 mutex_exit(&allocq
->vpmq_mtx
);
509 vpm
->vpm_prev
= vpm
->vpm_next
= NULL
;
512 * Disassociate the previous page.
513 * p_vpmref is used as a mapping reference to the page.
515 if ((pp
= vpm
->vpm_pp
) != NULL
&&
516 vpm
->vpm_vp
== pp
->p_vnode
&&
517 vpm
->vpm_off
== pp
->p_offset
) {
520 if (page_trylock(pp
, SE_SHARED
)) {
522 * Now verify that it is the correct
523 * page. If not someone else stole it,
524 * so just unlock it and leave.
528 vpm
->vpm_vp
!= pp
->p_vnode
||
529 vpm
->vpm_off
!= pp
->p_offset
||
530 pp
->p_vpmref
!= VPMID(vpm
)) {
540 (void) page_release(pp
, 1);
544 * If the page cannot be locked, just
545 * clear the p_vpmref and go.
548 if (pp
->p_vpmref
== VPMID(vpm
)) {
552 VPM_DEBUG(vpmd_prevpagelocked
);
557 * Setup vpm to point to the new page.
559 vpm
->vpm_pp
= newpage
;
560 vpm
->vpm_vp
= newpage
->p_vnode
;
561 vpm
->vpm_off
= newpage
->p_offset
;
564 int steal
= !VPM_MTBF(steals
, steals_mtbf
);
566 * Page already has a vpm assigned just use that.
567 * Grab the vpm mutex and verify that it is still
568 * the correct one. The pp->p_vpmref should not change
569 * once we have the vpm mutex and the page lock.
571 mutex_exit(&allocq
->vpmq_mtx
);
573 vmtx
= VPMAPMTX(vpm
);
575 if ((steal
&& vpm
->vpm_refcnt
== 0) ||
576 vpm
->vpm_pp
!= newpage
) {
578 * The vpm got stolen, retry.
579 * clear the p_vpmref.
581 pmtx
= PPMTX(newpage
);
583 if (newpage
->p_vpmref
== vpmref
) {
584 newpage
->p_vpmref
= 0;
589 VPM_DEBUG(vpmd_steals
);
591 } else if (vpm
->vpm_refcnt
== 0) {
593 * Remove it from the free list if it
596 VPMAP_RMFREELIST(vpm
);
604 free_vpmap(struct vpmap
*vpm
)
606 struct vpmfree
*vpmflp
;
607 struct vpmap
*vpmfreelist
;
608 union vpm_freeq
*releq
;
610 ASSERT(MUTEX_HELD(VPMAPMTX(vpm
)));
612 if (vpm
->vpm_refcnt
!= 0) {
617 vpmflp
= &vpmd_free
[vpm
->vpm_free_ndx
];
619 * Add to the tail of the release queue
620 * Note that vpm_releq and vpm_allocq could toggle
621 * before we get the lock. This does not affect
622 * correctness as the 2 queues are only maintained
623 * to reduce lock pressure.
625 releq
= vpmflp
->vpm_releq
;
626 if (releq
== &vpmflp
->vpm_freeq
[0]) {
631 mutex_enter(&releq
->vpmq_mtx
);
632 vpmfreelist
= releq
->vpmq_free
;
633 if (vpmfreelist
== 0) {
636 releq
->vpmq_free
= vpm
->vpm_next
= vpm
->vpm_prev
= vpm
;
638 * Both queue mutexes are held to set vpm_want;
639 * snapshot the value before dropping releq mutex.
640 * If vpm_want appears after the releq mutex is dropped,
641 * then the vpmap just freed is already gone.
643 want
= vpmflp
->vpm_want
;
644 mutex_exit(&releq
->vpmq_mtx
);
646 * See if there was a waiter before dropping the releq mutex
647 * then recheck after obtaining vpm_freeq[0] mutex as
648 * the another thread may have already signaled.
651 mutex_enter(&vpmflp
->vpm_freeq
[0].vpmq_mtx
);
652 if (vpmflp
->vpm_want
)
653 cv_signal(&vpmflp
->vpm_free_cv
);
654 mutex_exit(&vpmflp
->vpm_freeq
[0].vpmq_mtx
);
657 vpm
->vpm_next
= vpmfreelist
;
658 vpm
->vpm_prev
= vpmfreelist
->vpm_prev
;
659 vpmfreelist
->vpm_prev
= vpm
;
660 vpm
->vpm_prev
->vpm_next
= vpm
;
661 mutex_exit(&releq
->vpmq_mtx
);
666 * Get the vpmap for the page.
667 * The refcnt of this vpm is incremented.
669 static struct vpmap
*
670 get_vpmap(page_t
*pp
)
672 struct vpmap
*vpm
= NULL
;
677 ASSERT((pp
!= NULL
) && PAGE_LOCKED(pp
));
679 if (VPM_MTBF(contend
, contend_mtbf
) && (refid
= pp
->p_vpmref
) != 0) {
681 vmtx
= VPMAPMTX(vpm
);
684 * Since we have the page lock and the vpm mutex, the
685 * pp->p_vpmref cannot change.
687 if (vpm
->vpm_pp
!= pp
) {
691 * Clear the p_vpmref as it is incorrect.
692 * This can happen if the page was stolen.
693 * On x64 this should not happen as p_vpmref
694 * is treated as a mapping on the page. So
695 * if the page is stolen, the mapping would have
696 * been cleared in page_unload().
699 if (pp
->p_vpmref
== refid
)
705 } else if (vpm
->vpm_refcnt
== 0) {
707 * Got the vpm, remove it from the free
708 * list if it exists there.
710 VPMAP_RMFREELIST(vpm
);
715 * get_free_vpmap() returns with the vpmap mutex held.
717 vpm
= get_free_vpmap(pp
);
718 vmtx
= VPMAPMTX(vpm
);
719 vpmd_cpu
[CPU
->cpu_seqid
].vcpu
.vcpu_misses
++;
721 vpmd_cpu
[CPU
->cpu_seqid
].vcpu
.vcpu_hits
++;
730 /* END --- vpm cache ---- */
733 * The vnode page mapping(vpm) interface routines.
737 * Find or create the pages starting form baseoff for specified
752 uoff_t off
= baseoff
;
754 ASSERT(nseg
>= MINVMAPS
&& nseg
<= MAXVMAPS
);
756 for (i
= 0; len
> 0; len
-= PAGESIZE
, i
++) {
760 if ((pp
= page_lookup(&vp
->v_object
, off
, SE_SHARED
)) == NULL
) {
762 base
= segkpm_create_va(off
);
765 * the seg pointer passed in is just advisor. Just
766 * pass segkmap for now like segmap does with
767 * segmap_kpm enabled.
769 if ((pp
= page_create_va(&vp
->v_object
, off
, PAGESIZE
,
772 panic("segmap_pagecreate_vpm: "
773 "page_create failed");
783 * Get the vpm for this page_t.
785 if (vpm_cache_enable
) {
787 vml
[i
].vs_data
= (void *)&vpm
->vpm_pp
;
789 vml
[i
].vs_data
= (void *)pp
;
793 vml
[i
].vs_addr
= hat_kpm_mapin(pp
, 0);
794 vml
[i
].vs_len
= PAGESIZE
;
798 vml
[i
].vs_data
= NULL
;
799 vml
[i
].vs_addr
= NULL
;
805 * Returns vpm mappings of pages in the range [off, off+len], where
806 * len is rounded up to the PAGESIZE boundary. The list of pages and
807 * the page addresses are returned in the SGL vml (vmap_t) array passed in.
808 * The nseg is the number of vmap_t entries in the array.
810 * The segmap's SM_LOCKPROTO usage is not supported by these interfaces.
811 * For such cases, use the seg_map interfaces.
824 extern struct vnode
*common_specvp();
828 page_t
*pp
, *pplist
[MAXVMAPS
];
833 ASSERT(nseg
>= MINVMAPS
&& nseg
<= MAXVMAPS
);
834 baseoff
= off
& (offset_t
)PAGEMASK
;
835 vml
[0].vs_data
= NULL
;
836 vml
[0].vs_addr
= NULL
;
838 tlen
= P2ROUNDUP(off
+ len
, PAGESIZE
) - baseoff
;
840 * Restrict it to VPMMAXLEN.
842 if (tlen
> (VPMMAXPGS
* PAGESIZE
)) {
843 tlen
= VPMMAXPGS
* PAGESIZE
;
846 * Ensure length fits within the vml[] array. One element of
847 * the array is used to mark the end of the scatter/gather list
848 * of valid mappings by setting its vs_addr = NULL. Leave space
851 if (tlen
> ((nseg
- 1) * PAGESIZE
)) {
852 tlen
= ((nseg
- 1) * PAGESIZE
);
857 * If this is a block device we have to be sure to use the
858 * "common" block device vnode for the mapping.
860 if (vp
->v_type
== VBLK
)
861 vp
= common_specvp(vp
);
865 return (vpm_pagecreate(vp
, baseoff
, len
, vml
, nseg
, newpage
));
867 for (i
= 0; len
> 0; len
-= PAGESIZE
, i
++, pplist
[i
] = NULL
) {
869 pp
= page_lookup(&vp
->v_object
, baseoff
, SE_SHARED
);
872 * If we did not find the page or if this page was not
873 * in vpm cache(p_vpmref == 0), then let fop_getpage get
875 * We need to call fop_getpage so that filesystems can do some
876 * (un)necessary tracking for sequential access.
879 if (pp
== NULL
|| (vpm_cache_enable
&& pp
->p_vpmref
== 0) ||
880 (rw
== S_WRITE
&& hat_page_getattr(pp
, P_MOD
| P_REF
)
881 != (P_MOD
| P_REF
))) {
887 * If we did not find the desired set of pages,
888 * from the page cache, just call fop_getpage to get
891 for (j
= 0; j
< i
; j
++) {
892 page_unlock(pplist
[j
]);
896 baseoff
= off
& (offset_t
)PAGEMASK
;
898 * Pass a dummy address as it will be required
899 * by page_create_va(). We pass segkmap as the seg
900 * as some file systems(UFS) check it.
902 base
= segkpm_create_va(baseoff
);
904 error
= fop_getpage(vp
, baseoff
, tlen
, &prot
, pplist
,
905 tlen
, segkmap
, base
, rw
, CRED(), NULL
);
907 VPM_DEBUG(vpmd_getpagefailed
);
918 for (i
= 0; pplist
[i
] != NULL
; i
++) {
919 page_unlock(pplist
[i
]);
922 vml
[0].vs_addr
= NULL
;
923 vml
[0].vs_data
= NULL
;
928 * Get the vpm's for pages.
930 for (i
= 0; pplist
[i
] != NULL
; i
++) {
931 if (vpm_cache_enable
) {
932 vpm
= get_vpmap(pplist
[i
]);
933 vml
[i
].vs_data
= (void *)&(vpm
->vpm_pp
);
935 vml
[i
].vs_data
= (void *)pplist
[i
];
936 pplist
[i
]->p_vpmref
= 0;
939 vml
[i
].vs_addr
= hat_kpm_mapin(pplist
[i
], 0);
940 vml
[i
].vs_len
= PAGESIZE
;
943 vml
[i
].vs_data
= NULL
;
944 vml
[i
].vs_addr
= NULL
;
950 * Release the vpm mappings on the pages and unlock them.
953 vpm_unmap_pages(vmap_t vml
[], enum seg_rw rw
)
960 for (i
= 0; vml
[i
].vs_data
!= NULL
; i
++) {
961 ASSERT(IS_KPM_ADDR(vml
[i
].vs_addr
));
963 if (vpm_cache_enable
) {
964 pp
= *(((page_t
**)vml
[i
].vs_data
));
966 pp
= (page_t
*)vml
[i
].vs_data
;
970 * Mark page as being modified or referenced, bacause vpm pages
971 * would not cause faults where it would be set normally.
976 ASSERT(rw
== S_READ
);
980 if (vpm_cache_enable
) {
981 vpm
= (struct vpmap
*)((char *)vml
[i
].vs_data
982 - offsetof(struct vpmap
, vpm_pp
));
983 hat_kpm_mapout(pp
, 0, vml
[i
].vs_addr
);
988 if (--vpm
->vpm_refcnt
== 0) {
993 hat_kpm_mapout(pp
, 0, vml
[i
].vs_addr
);
994 (void) page_release(pp
, 1);
996 vml
[i
].vs_data
= NULL
;
997 vml
[i
].vs_addr
= NULL
;
1002 * Given the vp, off and the uio structure, this routine will do the
1003 * the copy (uiomove). If the last page created is partially written,
1004 * the rest of the page is zeroed out. It also zeros the beginning of
1005 * the first page till the start offset if requested(zerostart).
1006 * If pages are to be fetched, it will call the filesystem's getpage
1007 * function (fop_getpage) to get them, otherwise they will be created if
1008 * not already present in the page cache.
1011 vpm_data_copy(struct vnode
*vp
,
1021 struct vmap vml
[MINVMAPS
];
1025 uiorw
= (rw
== S_WRITE
) ? UIO_WRITE
: UIO_READ
;
1027 * 'off' will be the offset where the I/O starts.
1028 * We get the pages starting at the (off & PAGEMASK)
1031 error
= vpm_map_pages(vp
, off
, (uint_t
)len
,
1032 fetchpage
, vml
, MINVMAPS
, &npages
, rw
);
1034 if (newpage
!= NULL
)
1037 int i
, pn
, slen
= len
;
1038 int pon
= off
& PAGEOFFSET
;
1041 * Clear from the beginning of the page to start offset
1044 if (!fetchpage
&& zerostart
) {
1045 (void) kzero(vml
[0].vs_addr
, (uint_t
)pon
);
1046 VPM_DEBUG(vpmd_zerostart
);
1049 for (i
= 0; !error
&& slen
> 0 &&
1050 vml
[i
].vs_addr
!= NULL
; i
++) {
1051 pn
= (int)MIN(slen
, (PAGESIZE
- pon
));
1052 error
= uiomove(vml
[i
].vs_addr
+ pon
,
1053 (long)pn
, uiorw
, uio
);
1059 * When new pages are created, zero out part of the
1060 * page we did not copy to.
1062 if (!fetchpage
&& npages
&&
1063 uio
->uio_loffset
< roundup(off
+ len
, PAGESIZE
)) {
1066 pon
= (uio
->uio_loffset
& PAGEOFFSET
);
1067 nzero
= PAGESIZE
- pon
;
1068 i
= (uio
->uio_loffset
- (off
& PAGEMASK
)) / PAGESIZE
;
1069 (void) kzero(vml
[i
].vs_addr
+ pon
, (uint_t
)nzero
);
1071 vpm_unmap_pages(vml
, rw
);
1077 * called to flush pages for the given vnode covering
1078 * [off, off+len] range.
1081 vpm_sync_pages(struct vnode
*vp
,
1086 extern struct vnode
*common_specvp();
1089 size_t psize
= roundup(len
, PAGESIZE
);
1092 * If this is a block device we have to be sure to use the
1093 * "common" block device vnode for the mapping.
1095 if (vp
->v_type
== VBLK
)
1096 vp
= common_specvp(vp
);
1098 if ((flags
& ~SM_DONTNEED
) != 0) {
1099 if (flags
& SM_ASYNC
)
1101 if (flags
& SM_INVAL
)
1103 if (flags
& SM_DESTROY
)
1104 bflags
|= (B_INVAL
|B_TRUNC
);
1105 if (flags
& SM_FREE
)
1107 if (flags
& SM_DONTNEED
)
1108 bflags
|= B_DONTNEED
;
1110 error
= fop_putpage(vp
, off
, psize
, bflags
, CRED(), NULL
);
1117 #else /* SEGKPM_SUPPORT */
1155 vpm_data_copy(struct vnode
*vp
,
1169 vpm_unmap_pages(vmap_t vml
[], enum seg_rw rw
)
1174 vpm_sync_pages(struct vnode
*vp
,
1181 #endif /* SEGKPM_SUPPORT */