6198 Let's EOL cachefs
[illumos-gate.git] / usr / src / uts / common / os / bp_map.c
blob0fd5a5d216214c054c400b7c235fe71517cadc33
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
31 #include <sys/mman.h>
32 #include <sys/buf.h>
33 #include <sys/vmem.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/machparam.h>
37 #include <vm/page.h>
38 #include <vm/seg_kmem.h>
39 #include <vm/seg_kpm.h>
41 #ifdef __sparc
42 #include <sys/cpu_module.h>
43 #define BP_FLUSH(addr, size) flush_instr_mem((void *)addr, size);
44 #else
45 #define BP_FLUSH(addr, size)
46 #endif
48 int bp_force_copy = 0;
49 typedef enum {
50 BP_COPYIN = 0,
51 BP_COPYOUT = 1
52 } bp_copydir_t;
53 static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
54 offset_t offset, size_t size);
56 static vmem_t *bp_map_arena;
57 static size_t bp_align;
58 static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC;
59 int bp_max_cache = 1 << 17; /* 128K default; tunable */
60 int bp_mapin_kpm_enable = 1; /* enable default; tunable */
62 static void *
63 bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
65 return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag));
68 void
69 bp_init(size_t align, uint_t devload_flags)
71 bp_align = MAX(align, PAGESIZE);
72 bp_devload_flags |= devload_flags;
74 if (bp_align <= bp_max_cache)
75 bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align,
76 bp_vmem_alloc, vmem_free, heap_arena,
77 MIN(8 * bp_align, bp_max_cache), VM_SLEEP);
81 * common routine so can be called with/without VM_SLEEP
83 void *
84 bp_mapin_common(struct buf *bp, int flag)
86 struct as *as;
87 pfn_t pfnum;
88 page_t *pp;
89 page_t **pplist;
90 caddr_t kaddr;
91 caddr_t addr;
92 uintptr_t off;
93 size_t size;
94 pgcnt_t npages;
95 int color;
97 /* return if already mapped in, no pageio/physio, or physio to kas */
98 if ((bp->b_flags & B_REMAPPED) ||
99 !(bp->b_flags & (B_PAGEIO | B_PHYS)) ||
100 (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) &&
101 ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas))))
102 return (bp->b_un.b_addr);
104 ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS));
106 addr = (caddr_t)bp->b_un.b_addr;
107 off = (uintptr_t)addr & PAGEOFFSET;
108 size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
109 npages = btop(size);
111 /* Fastpath single page IO to locked memory by using kpm. */
112 if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
113 kpm_enable && bp_mapin_kpm_enable) {
114 if (bp->b_flags & B_SHADOW)
115 pp = *bp->b_shadow;
116 else
117 pp = bp->b_pages;
118 kaddr = hat_kpm_mapin(pp, NULL);
119 bp->b_un.b_addr = kaddr + off;
120 bp->b_flags |= B_REMAPPED;
121 return (bp->b_un.b_addr);
125 * Allocate kernel virtual space for remapping.
127 color = bp_color(bp);
128 ASSERT(color < bp_align);
130 if (bp_map_arena != NULL) {
131 kaddr = (caddr_t)vmem_alloc(bp_map_arena,
132 P2ROUNDUP(color + size, bp_align), flag);
133 if (kaddr == NULL)
134 return (NULL);
135 kaddr += color;
136 } else {
137 kaddr = vmem_xalloc(heap_arena, size, bp_align, color,
138 0, NULL, NULL, flag);
139 if (kaddr == NULL)
140 return (NULL);
143 ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color);
146 * Map bp into the virtual space we just allocated.
148 if (bp->b_flags & B_PAGEIO) {
149 pp = bp->b_pages;
150 pplist = NULL;
151 } else if (bp->b_flags & B_SHADOW) {
152 pp = NULL;
153 pplist = bp->b_shadow;
154 } else {
155 pp = NULL;
156 pplist = NULL;
157 if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL)
158 as = &kas;
161 bp->b_flags |= B_REMAPPED;
162 bp->b_un.b_addr = kaddr + off;
164 while (npages-- != 0) {
165 if (pp) {
166 pfnum = pp->p_pagenum;
167 pp = pp->p_next;
168 } else if (pplist == NULL) {
169 pfnum = hat_getpfnum(as->a_hat,
170 (caddr_t)((uintptr_t)addr & MMU_PAGEMASK));
171 if (pfnum == PFN_INVALID)
172 panic("bp_mapin_common: hat_getpfnum for"
173 " addr %p failed\n", (void *)addr);
174 addr += PAGESIZE;
175 } else {
176 pfnum = (*pplist)->p_pagenum;
177 pplist++;
180 hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
181 bp_devload_flags, HAT_LOAD_LOCK);
183 kaddr += PAGESIZE;
185 return (bp->b_un.b_addr);
189 * Convert bp for pageio/physio to a kernel addressable location.
191 void
192 bp_mapin(struct buf *bp)
194 (void) bp_mapin_common(bp, VM_SLEEP);
198 * Release all the resources associated with a previous bp_mapin() call.
200 void
201 bp_mapout(struct buf *bp)
203 caddr_t addr;
204 uintptr_t off;
205 uintptr_t base;
206 uintptr_t color;
207 size_t size;
208 pgcnt_t npages;
209 page_t *pp;
211 if ((bp->b_flags & B_REMAPPED) == 0)
212 return;
214 addr = bp->b_un.b_addr;
215 off = (uintptr_t)addr & PAGEOFFSET;
216 size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
217 npages = btop(size);
219 bp->b_un.b_addr = (caddr_t)off; /* debugging aid */
221 if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
222 kpm_enable && bp_mapin_kpm_enable) {
223 if (bp->b_flags & B_SHADOW)
224 pp = *bp->b_shadow;
225 else
226 pp = bp->b_pages;
227 addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK);
228 hat_kpm_mapout(pp, NULL, addr);
229 bp->b_flags &= ~B_REMAPPED;
230 return;
233 base = (uintptr_t)addr & MMU_PAGEMASK;
234 BP_FLUSH(base, size);
235 hat_unload(kas.a_hat, (void *)base, size,
236 HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
237 if (bp_map_arena != NULL) {
238 color = P2PHASE(base, bp_align);
239 vmem_free(bp_map_arena, (void *)(base - color),
240 P2ROUNDUP(color + size, bp_align));
241 } else
242 vmem_free(heap_arena, (void *)base, size);
243 bp->b_flags &= ~B_REMAPPED;
247 * copy data from a KVA into a buf_t which may not be mapped in. offset
248 * is relative to the buf_t only.
251 bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size)
253 return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size));
257 * copy data from a buf_t which may not be mapped in, into a KVA.. offset
258 * is relative to the buf_t only.
261 bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size)
263 return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size));
267 #define BP_COPY(dir, driverbuf, baddr, sz) \
268 (dir == BP_COPYIN) ? \
269 bcopy(baddr, driverbuf, sz) : bcopy(driverbuf, baddr, sz)
271 static int
272 bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
273 offset_t offset, size_t size)
275 page_t **pplist;
276 uintptr_t poff;
277 uintptr_t voff;
278 struct as *as;
279 caddr_t kaddr;
280 caddr_t addr;
281 page_t *page;
282 size_t psize;
283 page_t *pp;
284 pfn_t pfn;
287 ASSERT((offset + size) <= bp->b_bcount);
289 /* if the buf_t already has a KVA, just do a bcopy */
290 if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) {
291 BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
292 return (0);
295 /* if we don't have kpm enabled, we need to do the slow path */
296 if (!kpm_enable || bp_force_copy) {
297 bp_mapin(bp);
298 BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
299 bp_mapout(bp);
300 return (0);
304 * kpm is enabled, and we need to map in the buf_t for the copy
307 /* setup pp, plist, and make sure 'as' is right */
308 if (bp->b_flags & B_PAGEIO) {
309 pp = bp->b_pages;
310 pplist = NULL;
311 } else if (bp->b_flags & B_SHADOW) {
312 pp = NULL;
313 pplist = bp->b_shadow;
314 } else {
315 pp = NULL;
316 pplist = NULL;
317 if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) {
318 as = &kas;
323 * locals for the address, the offset into the first page, and the
324 * size of the first page we are going to copy.
326 addr = (caddr_t)bp->b_un.b_addr;
327 poff = (uintptr_t)addr & PAGEOFFSET;
328 psize = MIN(PAGESIZE - poff, size);
331 * we always start with a 0 offset into the driverbuf provided. The
332 * offset passed in only applies to the buf_t.
334 voff = 0;
336 /* Loop until we've copied al the data */
337 while (size > 0) {
340 * for a pp or pplist, get the pfn, then go to the next page_t
341 * for the next time around the loop.
343 if (pp) {
344 page = pp;
345 pp = pp->p_next;
346 } else if (pplist != NULL) {
347 page = (*pplist);
348 pplist++;
351 * We have a user VA. If we are going to copy this page, (e.g.
352 * the offset into the buf_t where we start to copy is
353 * within this page), get the pfn. Don't waste the cycles
354 * getting the pfn if we're not copying this page.
356 } else if (offset < psize) {
357 pfn = hat_getpfnum(as->a_hat,
358 (caddr_t)((uintptr_t)addr & PAGEMASK));
359 if (pfn == PFN_INVALID) {
360 return (-1);
362 page = page_numtopp_nolock(pfn);
363 addr += psize - offset;
364 } else {
365 addr += psize;
369 * if we have an initial offset into the buf_t passed in,
370 * and it falls within the current page, account for it in
371 * the page size (how much we will copy) and the offset into the
372 * page (where we'll start copying from).
374 if ((offset > 0) && (offset < psize)) {
375 psize -= offset;
376 poff += offset;
377 offset = 0;
380 * if we have an initial offset into the buf_t passed in,
381 * and it's not within the current page, skip this page.
382 * We don't have to worry about the first page offset and size
383 * anymore. psize will normally be PAGESIZE now unless we are
384 * on the last page.
386 } else if (offset >= psize) {
387 offset -= psize;
388 psize = MIN(PAGESIZE, size);
389 poff = 0;
390 continue;
394 * get a kpm mapping to the page, them copy in/out of the
395 * page. update size left and offset into the driverbuf passed
396 * in for the next time around the loop.
398 kaddr = hat_kpm_mapin(page, NULL) + poff;
399 BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr,
400 psize);
401 hat_kpm_mapout(page, NULL, kaddr - poff);
403 size -= psize;
404 voff += psize;
406 poff = 0;
407 psize = MIN(PAGESIZE, size);
410 return (0);