4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/machparam.h>
38 #include <vm/seg_kmem.h>
39 #include <vm/seg_kpm.h>
42 #include <sys/cpu_module.h>
43 #define BP_FLUSH(addr, size) flush_instr_mem((void *)addr, size);
45 #define BP_FLUSH(addr, size)
48 int bp_force_copy
= 0;
53 static int bp_copy_common(bp_copydir_t dir
, struct buf
*bp
, void *driverbuf
,
54 offset_t offset
, size_t size
);
56 static vmem_t
*bp_map_arena
;
57 static size_t bp_align
;
58 static uint_t bp_devload_flags
= PROT_READ
| PROT_WRITE
| HAT_NOSYNC
;
59 int bp_max_cache
= 1 << 17; /* 128K default; tunable */
60 int bp_mapin_kpm_enable
= 1; /* enable default; tunable */
63 bp_vmem_alloc(vmem_t
*vmp
, size_t size
, int vmflag
)
65 return (vmem_xalloc(vmp
, size
, bp_align
, 0, 0, NULL
, NULL
, vmflag
));
69 bp_init(size_t align
, uint_t devload_flags
)
71 bp_align
= MAX(align
, PAGESIZE
);
72 bp_devload_flags
|= devload_flags
;
74 if (bp_align
<= bp_max_cache
)
75 bp_map_arena
= vmem_create("bp_map", NULL
, 0, bp_align
,
76 bp_vmem_alloc
, vmem_free
, heap_arena
,
77 MIN(8 * bp_align
, bp_max_cache
), VM_SLEEP
);
81 * common routine so can be called with/without VM_SLEEP
84 bp_mapin_common(struct buf
*bp
, int flag
)
97 /* return if already mapped in, no pageio/physio, or physio to kas */
98 if ((bp
->b_flags
& B_REMAPPED
) ||
99 !(bp
->b_flags
& (B_PAGEIO
| B_PHYS
)) ||
100 (((bp
->b_flags
& (B_PAGEIO
| B_PHYS
)) == B_PHYS
) &&
101 ((bp
->b_proc
== NULL
) || (bp
->b_proc
->p_as
== &kas
))))
102 return (bp
->b_un
.b_addr
);
104 ASSERT((bp
->b_flags
& (B_PAGEIO
| B_PHYS
)) != (B_PAGEIO
| B_PHYS
));
106 addr
= (caddr_t
)bp
->b_un
.b_addr
;
107 off
= (uintptr_t)addr
& PAGEOFFSET
;
108 size
= P2ROUNDUP(bp
->b_bcount
+ off
, PAGESIZE
);
111 /* Fastpath single page IO to locked memory by using kpm. */
112 if ((bp
->b_flags
& (B_SHADOW
| B_PAGEIO
)) && (npages
== 1) &&
113 kpm_enable
&& bp_mapin_kpm_enable
) {
114 if (bp
->b_flags
& B_SHADOW
)
118 kaddr
= hat_kpm_mapin(pp
, NULL
);
119 bp
->b_un
.b_addr
= kaddr
+ off
;
120 bp
->b_flags
|= B_REMAPPED
;
121 return (bp
->b_un
.b_addr
);
125 * Allocate kernel virtual space for remapping.
127 color
= bp_color(bp
);
128 ASSERT(color
< bp_align
);
130 if (bp_map_arena
!= NULL
) {
131 kaddr
= (caddr_t
)vmem_alloc(bp_map_arena
,
132 P2ROUNDUP(color
+ size
, bp_align
), flag
);
137 kaddr
= vmem_xalloc(heap_arena
, size
, bp_align
, color
,
138 0, NULL
, NULL
, flag
);
143 ASSERT(P2PHASE((uintptr_t)kaddr
, bp_align
) == color
);
146 * Map bp into the virtual space we just allocated.
148 if (bp
->b_flags
& B_PAGEIO
) {
151 } else if (bp
->b_flags
& B_SHADOW
) {
153 pplist
= bp
->b_shadow
;
157 if (bp
->b_proc
== NULL
|| (as
= bp
->b_proc
->p_as
) == NULL
)
161 bp
->b_flags
|= B_REMAPPED
;
162 bp
->b_un
.b_addr
= kaddr
+ off
;
164 while (npages
-- != 0) {
166 pfnum
= pp
->p_pagenum
;
168 } else if (pplist
== NULL
) {
169 pfnum
= hat_getpfnum(as
->a_hat
,
170 (caddr_t
)((uintptr_t)addr
& MMU_PAGEMASK
));
171 if (pfnum
== PFN_INVALID
)
172 panic("bp_mapin_common: hat_getpfnum for"
173 " addr %p failed\n", (void *)addr
);
176 pfnum
= (*pplist
)->p_pagenum
;
180 hat_devload(kas
.a_hat
, kaddr
, PAGESIZE
, pfnum
,
181 bp_devload_flags
, HAT_LOAD_LOCK
);
185 return (bp
->b_un
.b_addr
);
189 * Convert bp for pageio/physio to a kernel addressable location.
192 bp_mapin(struct buf
*bp
)
194 (void) bp_mapin_common(bp
, VM_SLEEP
);
198 * Release all the resources associated with a previous bp_mapin() call.
201 bp_mapout(struct buf
*bp
)
211 if ((bp
->b_flags
& B_REMAPPED
) == 0)
214 addr
= bp
->b_un
.b_addr
;
215 off
= (uintptr_t)addr
& PAGEOFFSET
;
216 size
= P2ROUNDUP(bp
->b_bcount
+ off
, PAGESIZE
);
219 bp
->b_un
.b_addr
= (caddr_t
)off
; /* debugging aid */
221 if ((bp
->b_flags
& (B_SHADOW
| B_PAGEIO
)) && (npages
== 1) &&
222 kpm_enable
&& bp_mapin_kpm_enable
) {
223 if (bp
->b_flags
& B_SHADOW
)
227 addr
= (caddr_t
)((uintptr_t)addr
& MMU_PAGEMASK
);
228 hat_kpm_mapout(pp
, NULL
, addr
);
229 bp
->b_flags
&= ~B_REMAPPED
;
233 base
= (uintptr_t)addr
& MMU_PAGEMASK
;
234 BP_FLUSH(base
, size
);
235 hat_unload(kas
.a_hat
, (void *)base
, size
,
236 HAT_UNLOAD_NOSYNC
| HAT_UNLOAD_UNLOCK
);
237 if (bp_map_arena
!= NULL
) {
238 color
= P2PHASE(base
, bp_align
);
239 vmem_free(bp_map_arena
, (void *)(base
- color
),
240 P2ROUNDUP(color
+ size
, bp_align
));
242 vmem_free(heap_arena
, (void *)base
, size
);
243 bp
->b_flags
&= ~B_REMAPPED
;
247 * copy data from a KVA into a buf_t which may not be mapped in. offset
248 * is relative to the buf_t only.
251 bp_copyout(void *driverbuf
, struct buf
*bp
, offset_t offset
, size_t size
)
253 return (bp_copy_common(BP_COPYOUT
, bp
, driverbuf
, offset
, size
));
257 * copy data from a buf_t which may not be mapped in, into a KVA.. offset
258 * is relative to the buf_t only.
261 bp_copyin(struct buf
*bp
, void *driverbuf
, offset_t offset
, size_t size
)
263 return (bp_copy_common(BP_COPYIN
, bp
, driverbuf
, offset
, size
));
267 #define BP_COPY(dir, driverbuf, baddr, sz) \
268 (dir == BP_COPYIN) ? \
269 bcopy(baddr, driverbuf, sz) : bcopy(driverbuf, baddr, sz)
272 bp_copy_common(bp_copydir_t dir
, struct buf
*bp
, void *driverbuf
,
273 offset_t offset
, size_t size
)
287 ASSERT((offset
+ size
) <= bp
->b_bcount
);
289 /* if the buf_t already has a KVA, just do a bcopy */
290 if (!(bp
->b_flags
& (B_PHYS
| B_PAGEIO
))) {
291 BP_COPY(dir
, driverbuf
, bp
->b_un
.b_addr
+ offset
, size
);
295 /* if we don't have kpm enabled, we need to do the slow path */
296 if (!kpm_enable
|| bp_force_copy
) {
298 BP_COPY(dir
, driverbuf
, bp
->b_un
.b_addr
+ offset
, size
);
304 * kpm is enabled, and we need to map in the buf_t for the copy
307 /* setup pp, plist, and make sure 'as' is right */
308 if (bp
->b_flags
& B_PAGEIO
) {
311 } else if (bp
->b_flags
& B_SHADOW
) {
313 pplist
= bp
->b_shadow
;
317 if (bp
->b_proc
== NULL
|| (as
= bp
->b_proc
->p_as
) == NULL
) {
323 * locals for the address, the offset into the first page, and the
324 * size of the first page we are going to copy.
326 addr
= (caddr_t
)bp
->b_un
.b_addr
;
327 poff
= (uintptr_t)addr
& PAGEOFFSET
;
328 psize
= MIN(PAGESIZE
- poff
, size
);
331 * we always start with a 0 offset into the driverbuf provided. The
332 * offset passed in only applies to the buf_t.
336 /* Loop until we've copied al the data */
340 * for a pp or pplist, get the pfn, then go to the next page_t
341 * for the next time around the loop.
346 } else if (pplist
!= NULL
) {
351 * We have a user VA. If we are going to copy this page, (e.g.
352 * the offset into the buf_t where we start to copy is
353 * within this page), get the pfn. Don't waste the cycles
354 * getting the pfn if we're not copying this page.
356 } else if (offset
< psize
) {
357 pfn
= hat_getpfnum(as
->a_hat
,
358 (caddr_t
)((uintptr_t)addr
& PAGEMASK
));
359 if (pfn
== PFN_INVALID
) {
362 page
= page_numtopp_nolock(pfn
);
363 addr
+= psize
- offset
;
369 * if we have an initial offset into the buf_t passed in,
370 * and it falls within the current page, account for it in
371 * the page size (how much we will copy) and the offset into the
372 * page (where we'll start copying from).
374 if ((offset
> 0) && (offset
< psize
)) {
380 * if we have an initial offset into the buf_t passed in,
381 * and it's not within the current page, skip this page.
382 * We don't have to worry about the first page offset and size
383 * anymore. psize will normally be PAGESIZE now unless we are
386 } else if (offset
>= psize
) {
388 psize
= MIN(PAGESIZE
, size
);
394 * get a kpm mapping to the page, them copy in/out of the
395 * page. update size left and offset into the driverbuf passed
396 * in for the next time around the loop.
398 kaddr
= hat_kpm_mapin(page
, NULL
) + poff
;
399 BP_COPY(dir
, (void *)((uintptr_t)driverbuf
+ voff
), kaddr
,
401 hat_kpm_mapout(page
, NULL
, kaddr
- poff
);
407 psize
= MIN(PAGESIZE
, size
);