Merge branch master
[unleashed.git] / include / vm / vpm.h
blob20ef16e74d1c0d2285eff335ca3c3908171e0e61
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #ifndef _VM_VPM_H
27 #define _VM_VPM_H
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
35 * The vnode page mappings(VPM) interfaces.
36 * "Commitment level - Consolidation private". They are subject
37 * to change without notice. Use them at your own risk.
39 * At this stage these interfaces are provided only to utilize the
40 * segkpm mappings. Therefore these interfaces have to be used under
41 * the 'vpm_enable' check as an alternative to segmap interfaces where
42 * applicable.
44 * The VPM interfaces provide temporary mappings to file pages. They
45 * return the mappings in a scatter gather list(SGL).
46 * The SGL elements are the structure 'vmap_t'.
48 * typedef struct vmap {
49 * caddr_t vs_addr; / public - mapped address /
50 * size_t vs_len; / public - length of mapping /
51 * void *vs_data; / opaque - private data /
52 * } vmap_t;
54 * An array of this structure has to be passed to the interface routines
55 * along with the size(# of elements) of the SGL array. Depending on the
56 * requested length and mapped chunk sizes(PAGESIZE here), the number of
57 * valid mappings returned can be less then actual size of the SGL array.
58 * Always, an element in the SGL will have 'vs_addr' set to NULL which
59 * marks the end of the valid entires in the SGL.
61 * The vmap_t structure members are populated with the mapped address
62 * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
63 * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
64 * and the caller should not access or modify it.
66 * Using a scatter gather list to return the mappings and length makes it
67 * possible to provide mappings of variable length. Mapping length upto
68 * VPMMAXLEN is supported. The scatter gather list array size needs to
69 * be a minimum of MINVMAPS elements.
71 * Interfaces:
73 * int vpm_map_pages( struct vnode *vp, uoff_t off, size_t len,
74 * int fetchpage, vmap_t *vml, int vmlsz,
75 * int *newpagecreated, enum seg_rw rw);
77 * This function returns mappings to vnode pages.
79 * It takes a vnode, offset and length and returns mappings to the pages
80 * covering the range [off, off + len) in the vmap_t SGL array 'vml'.
81 * The length passed in should satisfy the following criteria
82 * '(off + len) <= ((off & PAGEMASK) + VPMMAXLEN)'
83 * The mapped address returned, in 'vs_addr', of first vml[] entry
84 * is at begining of page containing 'off'.
86 * The 'vmlsz' is the size(# elements) of the 'vml' array.
88 * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
89 * (calls fop_getpage) from the backing store(disk) if not found in the
90 * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
91 * given offset will be just created if they are not already present in the
92 * system page cache. The 'newpagecreated' flag is set on return if new pages
93 * are created when 'fetchpage == 0'(requested to just create new pages).
95 * The 'seg_rw rw' indicates the intended operation on these mappings
96 * (S_WRITE or S_READ).
98 * Currently these interfaces only return segkpm mappings. The vnode pages
99 * that are being accessed will be locked(at least SHARED locked) for the
100 * duration these mappings are in use. After use, the unmap function,
101 * vpm_unmap_pages(), has to be called and the same SGL array
102 * needs to be passed to the unmap function.
105 * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
107 * This function unmaps the pages that where mapped by vpm_map_pages.
108 * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
111 * ex:
112 * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
113 * 'buf' the following code snippet shows how to use the above two interfaces.
114 * Here the the copy length is till the MAXBSIZE boundary. This code can be
115 * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
117 * vmap_t vml[MINVMAPS];
118 * int err, i, newpage, len;
119 * int pon;
121 * pon = (off & PAGEOFFSET);
122 * len = MAXBSIZE - pon;
124 * if (vpm_enable) {
125 * err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
126 * &newpage, S_WRITE);
128 * if (err)
129 * return;
131 * for (i=0; vml[i].vs_addr != NULL); i++) {
132 * bcopy (buf, vml[i].vs_addr + pon,
133 * PAGESIZE - pon);
134 * buf += (PAGESIZE - pon);
135 * pon = 0;
138 * if (newpage) {
139 * pon = (off & PAGEOFFSET);
140 * bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
143 * vpm_unmap_pages(vml, S_WRITE);
149 * int vpm_data_copy(struct vnode *vp, uoff_t off, size_t len,
150 * struct uio *uio, int fetchpage, int *newpagecreated,
151 * int zerostart, enum seg_rw rw);
153 * This function can be called if the need is to just transfer data to/from
154 * the vnode pages. It takes a 'uio' structure and calls 'uiomove()' to
155 * do the data transfer. It can be used in the context of read and write
156 * system calls to transfer data between a user buffer, which is specified
157 * in the uio structure, and the vnode pages. If the data needs to be
158 * transferred between a kernel buffer and the pages, like in the above
159 * example, a uio structure can be set up accordingly and passed. The 'rw'
160 * parameter will determine the direction of the data transfer.
162 * The 'fetchpage' and 'newpagecreated' are same as explained before.
163 * The 'zerostart' flag when set will zero fill start of the page till the
164 * offset 'off' in the first page. i.e from 'off & PAGEMASK' to 'off'.
167 * int vpm_sync_pages(struct vnode *vp, uoff_t off,
168 * size_t len, uint_t flags)
170 * This function can be called to flush or sync the vnode(file) pages that
171 * have been accessed. It will call fop_putpage().
173 * For the given vnode, off and len the pages covering the range
174 * [off, off + len) are flushed. Currently it uses the same flags that
175 * are used with segmap_release() interface. Refer vm/seg_map.h.
176 * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
182 * vpm cache related definitions.
184 #define VPMAP_MINCACHE (64 * 1024 * 1024)
185 #define VPMAP_MAXCACHE (256L * 1024L * 1024L * 1024L) /* 256G */
189 * vpm caching mode
191 #define VPMCACHE_LRU 0
192 #define VPMCACHE_RANDOM 1
194 * Data structures to manage the cache of pages referenced by
195 * the vpm interfaces. There is one vpmap struct per page in the cache.
197 struct vpmap {
198 kmutex_t vpm_mtx; /* protects non list fields */
199 struct vnode *vpm_vp; /* pointer to vnode of cached page */
200 struct vpmap *vpm_next; /* free list pointers */
201 struct vpmap *vpm_prev;
202 uoff_t vpm_off; /* offset of the page */
203 page_t *vpm_pp; /* page pointer */
204 ushort_t vpm_refcnt; /* Number active references */
205 ushort_t vpm_ndxflg; /* indicates which queue */
206 ushort_t vpm_free_ndx; /* freelist it belongs to */
210 * Multiple vpmap free lists are maintaned so that allocations
211 * scale with cpu count. To further reduce contentions between
212 * allocation and deallocations, each list is made up of two queues.
214 #define VPM_FREEQ_PAD 64
215 union vpm_freeq {
216 struct {
217 struct vpmap *vpmsq_free;
218 kmutex_t vpmsq_mtx;
219 } vpmfq;
220 char vpmq_pad[VPM_FREEQ_PAD];
223 #define vpmq_free vpmfq.vpmsq_free
224 #define vpmq_mtx vpmfq.vpmsq_mtx
226 struct vpmfree {
227 union vpm_freeq vpm_freeq[2]; /* alloc and release queue */
228 union vpm_freeq *vpm_allocq; /* current alloc queue */
229 union vpm_freeq *vpm_releq; /* current release queue */
230 kcondvar_t vpm_free_cv;
231 ushort_t vpm_want;
234 #define VPMALLOCQ 0
235 #define VPMRELEQ 1
238 * VPM Interface definitions.
242 * This structure is the scatter gather list element. The page
243 * mappings will be returned in this structure. A pointer to an
244 * array of this structure is passed to the interface routines.
246 typedef struct vmap {
247 caddr_t vs_addr; /* mapped address */
248 size_t vs_len; /* length, currently fixed at PAGESIZE */
249 void *vs_data; /* opaque - private data */
250 } vmap_t;
252 #define VPM_FETCHPAGE 0x01 /* fault in pages */
255 * Max request length - Needs to be a multiple of
256 * 8192 (PAGESIZE on sparc) so it works properly on both
257 * x86 & sparc systems. Max set to 128k.
259 #define VPMMAXLEN (128*1024)
262 * The minimum and maximum number of array elements in the scatter
263 * gather list.
265 #define MINVMAPS 3 /* ((MAXBSIZE/4096 + 1) min # mappings */
266 #if defined(__sparc)
267 #define VPMMAXPGS (VPMMAXLEN/8192) /* Max # pages at a time */
268 #else
269 #define VPMMAXPGS (VPMMAXLEN/4096)
270 #endif
271 #define MAXVMAPS (VPMMAXPGS + 1) /* Max # elements in the */
272 /* scatter gather list */
273 /* +1 element to mark the */
274 /* end of the list of valid */
275 /* mappings */
277 #ifdef _KERNEL
279 extern int vpm_enable;
281 * vpm page mapping operations.
283 extern void vpm_init(void);
284 extern int vpm_map_pages(struct vnode *, uoff_t, size_t, int,
285 vmap_t *, int, int *, enum seg_rw);
287 extern void vpm_unmap_pages(vmap_t *, enum seg_rw);
288 extern int vpm_sync_pages(struct vnode *, uoff_t, size_t, uint_t);
289 extern int vpm_data_copy(struct vnode *, uoff_t, size_t,
290 struct uio *, int, int *, int, enum seg_rw rw);
291 #endif /* _KERNEL */
293 #ifdef __cplusplus
295 #endif
297 #endif /* _VM_VPM_H */