4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * PCI nexus DVMA relocation routines.
31 * These routines handle the interactions with the HAT layer to
32 * implement page relocation for page(s) which have active DMA handle
33 * bindings when DVMA is being used for those handles.
35 * The current modus operandi is as follows:
37 * Object binding: register the appropriate callback for each page
38 * of the kernel object while obtaining the PFN for the DVMA page.
40 * Object unbinding: unregister the callback for each page of the
44 * 1) Suspend the bus and sync the caches.
45 * 2) Remap the DVMA object using the new provided PFN.
46 * 3) Unsuspend the bus.
48 * The relocation code runs with CPUs captured (idling in xc_loop())
49 * so we can only acquire spinlocks at PIL >= 13 for synchronization
50 * within those codepaths.
52 #include <sys/types.h>
54 #include <sys/async.h>
55 #include <sys/sysmacros.h>
56 #include <sys/sunddi.h>
57 #include <sys/machsystm.h>
58 #include <sys/ddi_impldefs.h>
61 #include <sys/pci/pci_obj.h>
66 pci_dvma_unregister_callbacks(pci_t
*pci_p
, ddi_dma_impl_t
*mp
)
68 ddi_dma_obj_t
*dobj_p
= &mp
->dmai_object
;
69 struct as
*as_p
= dobj_p
->dmao_obj
.virt_obj
.v_as
;
70 page_t
**pplist
= dobj_p
->dmao_obj
.virt_obj
.v_priv
;
71 caddr_t vaddr
= dobj_p
->dmao_obj
.virt_obj
.v_addr
;
76 if (!PCI_DMA_CANRELOC(mp
))
79 hat_p
= (as_p
== NULL
)? kas
.a_hat
: as_p
->a_hat
;
80 ASSERT(hat_p
== kas
.a_hat
);
81 ASSERT(pplist
== NULL
);
83 offset
= mp
->dmai_roffset
;
84 hat_delete_callback(vaddr
, IOMMU_PAGE_SIZE
- offset
, mp
, HAC_PAGELOCK
,
85 MP_HAT_CB_COOKIE(mp
, 0));
86 vaddr
= (caddr_t
)(((uintptr_t)vaddr
+ IOMMU_PAGE_SIZE
) &
88 for (i
= 1; i
< mp
->dmai_ndvmapages
; i
++) {
89 hat_delete_callback(vaddr
, IOMMU_PAGE_SIZE
, mp
, HAC_PAGELOCK
,
90 MP_HAT_CB_COOKIE(mp
, i
));
91 vaddr
+= IOMMU_PAGE_SIZE
;
93 mp
->dmai_flags
&= ~DMAI_FLAGS_RELOC
;
97 pci_dvma_postrelocator(caddr_t va
, uint_t len
, uint_t flags
, void *mpvoid
,
100 ddi_dma_impl_t
*mp
= (ddi_dma_impl_t
*)mpvoid
;
101 dev_info_t
*rdip
= mp
->dmai_rdip
;
102 ddi_dma_obj_t
*dobj_p
= &mp
->dmai_object
;
103 page_t
**pplist
= dobj_p
->dmao_obj
.virt_obj
.v_priv
;
104 caddr_t baseva
= dobj_p
->dmao_obj
.virt_obj
.v_addr
;
106 size_t length
= IOMMU_PTOB(1);
109 DEBUG0(DBG_RELOC
, rdip
, "postrelocator called\n");
111 if (flags
== HAT_POSTUNSUSPEND
) {
112 mutex_enter(&pci_reloc_mutex
);
113 ASSERT(pci_reloc_thread
== curthread
);
114 ASSERT(pci_reloc_presuspend
> 0);
115 if (--pci_reloc_presuspend
== 0) {
116 pci_reloc_thread
= NULL
;
117 cv_broadcast(&pci_reloc_cv
);
119 mutex_exit(&pci_reloc_mutex
);
123 ASSERT(flags
== HAT_UNSUSPEND
);
124 ASSERT(pci_reloc_suspend
> 0);
127 ASSERT(len
<= length
);
128 ASSERT(pplist
== NULL
); /* addr bind handle only */
129 ASSERT(dobj_p
->dmao_obj
.virt_obj
.v_as
== &kas
||
130 dobj_p
->dmao_obj
.virt_obj
.v_as
== NULL
);
131 ASSERT(PCI_DMA_ISDVMA(mp
));
132 ASSERT(pci_reloc_thread
== curthread
);
134 offset
= va
- baseva
;
135 index
= IOMMU_BTOPR(offset
);
136 ASSERT(index
< mp
->dmai_ndvmapages
);
138 DEBUG3(DBG_RELOC
, rdip
, "index 0x%x, vaddr 0x%llx, baseva 0x%llx\n",
139 index
, (int64_t)va
, (int64_t)baseva
);
141 if ((mp
)->dmai_ndvmapages
== 1) {
142 DEBUG2(DBG_RELOC
, rdip
, "pfn remap (1) 0x%x -> 0x%x\n",
143 mp
->dmai_pfnlst
, newpfn
);
144 mp
->dmai_pfnlst
= (void *)newpfn
;
146 DEBUG3(DBG_RELOC
, rdip
, "pfn remap (%d) 0x%x -> 0x%x\n",
147 index
, ((iopfn_t
*)mp
->dmai_pfnlst
)[index
], newpfn
);
148 ((iopfn_t
*)mp
->dmai_pfnlst
)[index
] = (iopfn_t
)newpfn
;
151 if (ddi_dma_mctl(rdip
, rdip
, (ddi_dma_handle_t
)mp
, DDI_DMA_REMAP
,
152 &offset
, &length
, NULL
, 0) != DDI_SUCCESS
)
154 if (ddi_ctlops(rdip
, rdip
, DDI_CTLOPS_UNQUIESCE
, NULL
, NULL
) !=
162 * Log a warning message if a callback is still registered on
163 * a page which is being freed. This is indicative of a driver
164 * bug -- DMA handles are bound, and the memory is being freed by
165 * the VM subsystem without an unbind call on the handle first.
168 pci_dma_relocerr(caddr_t va
, uint_t len
, uint_t errorcode
, void *mpvoid
)
170 int errlevel
= pci_dma_panic_on_leak
? CE_PANIC
: CE_WARN
;
171 if (errorcode
== HAT_CB_ERR_LEAKED
) {
172 cmn_err(errlevel
, "object 0x%p has a bound DMA handle 0x%p\n",
177 /* unknown error code, unhandled so panic */
182 * pci DVMA remap entry points
184 * Called in response to a DDI_DMA_REMAP DMA ctlops command.
185 * Remaps the region specified in the underlying IOMMU. Safe
186 * to assume that the bus was quiesced and ddi_dma_sync() was
187 * invoked by the caller before we got to this point.
190 pci_dvma_remap(dev_info_t
*dip
, dev_info_t
*rdip
, ddi_dma_impl_t
*mp
,
191 off_t offset
, size_t length
)
193 pci_t
*pci_p
= get_pci_soft_state(ddi_get_instance(dip
));
194 iommu_t
*iommu_p
= pci_p
->pci_iommu_p
;
199 dvma_pg
= IOMMU_BTOP(mp
->dmai_mapping
);
200 idx
= IOMMU_BTOPR(offset
);
202 npgs
= IOMMU_BTOPR(length
);
204 DEBUG3(DBG_RELOC
, mp
->dmai_rdip
,
205 "pci_dvma_remap: dvma_pg 0x%llx len 0x%llx idx 0x%x\n",
206 dvma_pg
, length
, idx
);
208 ASSERT(pci_p
->pci_pbm_p
->pbm_quiesce_count
> 0);
209 iommu_remap_pages(iommu_p
, mp
, dvma_pg
, npgs
, idx
);
211 return (DDI_SUCCESS
);
215 pci_fdvma_remap(ddi_dma_impl_t
*mp
, caddr_t kvaddr
, dvma_addr_t dvma_pg
,
216 size_t npages
, size_t index
, pfn_t newpfn
)
218 fdvma_t
*fdvma_p
= (fdvma_t
*)mp
->dmai_fdvma
;
219 pci_t
*pci_p
= (pci_t
*)fdvma_p
->softsp
;
220 iommu_t
*iommu_p
= pci_p
->pci_iommu_p
;
221 dev_info_t
*dip
= pci_p
->pci_dip
;
222 iopfn_t pfn
= (iopfn_t
)newpfn
;
223 dvma_addr_t pg_index
= dvma_pg
- iommu_p
->dvma_base_pg
;
227 /* make sure we don't exceed reserved boundary */
228 DEBUG3(DBG_FAST_DVMA
, dip
, "fast remap index=%x: %p, npgs=%x", index
,
230 if (index
+ npages
> mp
->dmai_ndvmapages
) {
231 cmn_err(pci_panic_on_fatal_errors
? CE_PANIC
: CE_WARN
,
232 "%s%d: fdvma remap index(%lx)+pgs(%lx) exceeds limit\n",
233 ddi_driver_name(dip
), ddi_get_instance(dip
),
238 for (i
= 0; i
< npages
; i
++, kvaddr
+= IOMMU_PAGE_SIZE
) {
239 DEBUG3(DBG_FAST_DVMA
, dip
, "remap dvma_pg %x -> pfn %x,"
240 " old tte 0x%llx\n", dvma_pg
+ i
, pfn
,
241 iommu_p
->iommu_tsb_vaddr
[pg_index
+ i
]);
243 if (pfn
== PFN_INVALID
)
247 tte
= MAKE_TTE_TEMPLATE(pfn
, mp
);
249 /* XXX assumes iommu and mmu has same page size */
250 iommu_p
->iommu_tsb_vaddr
[pg_index
+ i
] = tte
| IOMMU_PTOB(pfn
);
251 IOMMU_PAGE_FLUSH(iommu_p
, (dvma_pg
+ i
));
255 cmn_err(CE_WARN
, "%s%d: fdvma remap can't get page frame for vaddr %p",
256 ddi_driver_name(dip
), ddi_get_instance(dip
), kvaddr
);
260 pci_fdvma_prerelocator(caddr_t va
, uint_t len
, uint_t flags
, void *mpvoid
)
262 ddi_dma_impl_t
*mp
= (ddi_dma_impl_t
*)mpvoid
;
263 fdvma_t
*fdvma_p
= (fdvma_t
*)mp
->dmai_fdvma
;
264 caddr_t baseva
, endva
;
268 * It isn't safe to do relocation if all of the IOMMU
269 * mappings haven't yet been established at this index.
271 for (i
= 0; i
< mp
->dmai_ndvmapages
; i
++) {
272 baseva
= fdvma_p
->kvbase
[i
];
273 endva
= baseva
+ IOMMU_PTOB(fdvma_p
->pagecnt
[i
]);
274 if (va
>= baseva
&& va
< endva
)
275 return (0); /* found a valid index */
281 pci_fdvma_postrelocator(caddr_t va
, uint_t len
, uint_t flags
, void *mpvoid
,
284 ddi_dma_impl_t
*mp
= (ddi_dma_impl_t
*)mpvoid
;
285 dev_info_t
*rdip
= mp
->dmai_rdip
;
286 fdvma_t
*fdvma_p
= (fdvma_t
*)mp
->dmai_fdvma
;
289 size_t length
= PAGESIZE
;
292 DEBUG0(DBG_RELOC
, rdip
, "fdvma postrelocator called\n");
294 if (flags
== HAT_POSTUNSUSPEND
) {
295 mutex_enter(&pci_reloc_mutex
);
296 ASSERT(pci_reloc_thread
== curthread
);
297 if (--pci_reloc_presuspend
== 0) {
298 pci_reloc_thread
= NULL
;
299 cv_broadcast(&pci_reloc_cv
);
301 mutex_exit(&pci_reloc_mutex
);
307 ASSERT(flags
== HAT_UNSUSPEND
);
308 ASSERT(len
<= length
);
309 ASSERT((mp
->dmai_rflags
& DMP_BYPASSNEXUS
) != 0);
312 * This virtual page can have multiple cookies that refer
313 * to it within the same handle. We must walk the whole
314 * table for this DMA handle finding all the cookies, and
315 * update all of them. Sigh.
317 for (i
= 0; i
< mp
->dmai_ndvmapages
; i
++) {
321 baseva
= fdvma_p
->kvbase
[i
];
322 endva
= baseva
+ IOMMU_PTOB(fdvma_p
->pagecnt
[i
]);
324 if (va
>= baseva
&& va
< endva
) {
325 index
= i
+ IOMMU_BTOP(va
- baseva
);
326 ASSERT(index
< mp
->dmai_ndvmapages
);
328 DEBUG4(DBG_RELOC
, rdip
, "mp %p: index 0x%x, "
329 " vaddr 0x%llx, baseva 0x%llx\n", mp
, index
,
330 (int64_t)va
, (int64_t)baseva
);
332 dvma_pg
= IOMMU_BTOP(mp
->dmai_mapping
) + index
;
333 pci_fdvma_remap(mp
, va
, dvma_pg
, IOMMU_BTOP(length
),
338 if (ddi_ctlops(rdip
, rdip
, DDI_CTLOPS_UNQUIESCE
, NULL
, NULL
) !=
346 pci_fdvma_unregister_callbacks(pci_t
*pci_p
, fdvma_t
*fdvma_p
,
347 ddi_dma_impl_t
*mp
, uint_t index
)
349 size_t npgs
= fdvma_p
->pagecnt
[index
];
350 caddr_t kva
= fdvma_p
->kvbase
[index
];
353 ASSERT(index
+ npgs
<= mp
->dmai_ndvmapages
);
356 for (i
= 0; i
< npgs
&& pci_dvma_remap_enabled
;
357 i
++, kva
+= IOMMU_PAGE_SIZE
)
358 hat_delete_callback(kva
, IOMMU_PAGE_SIZE
, mp
, HAC_PAGELOCK
,
359 fdvma_p
->cbcookie
[index
+ i
]);
363 pci_common_prerelocator(caddr_t va
, uint_t len
, uint_t flags
, void *mpvoid
)
365 ddi_dma_impl_t
*mp
= (ddi_dma_impl_t
*)mpvoid
;
366 ddi_dma_handle_t h
= (ddi_dma_handle_t
)mpvoid
;
367 dev_info_t
*rdip
= mp
->dmai_rdip
;
370 DEBUG0(DBG_RELOC
, rdip
, "prerelocator called\n");
372 if (flags
== HAT_PRESUSPEND
) {
373 if (!ddi_prop_exists(DDI_DEV_T_ANY
, rdip
, DDI_PROP_NOTPROM
,
374 "dvma-remap-supported"))
376 if (!PCI_DMA_ISMAPPED(mp
))
379 if (mp
->dmai_rflags
& DMP_BYPASSNEXUS
) {
380 ret
= pci_fdvma_prerelocator(va
, len
, flags
, mpvoid
);
383 } else if (!PCI_DMA_ISDVMA(mp
))
387 * Acquire the exclusive right to relocate a PCI DMA page,
388 * since we later have to pause CPUs which could otherwise
389 * lead to all sorts of synchronization headaches.
391 mutex_enter(&pci_reloc_mutex
);
392 if (pci_reloc_thread
!= curthread
) {
393 while (pci_reloc_thread
!= NULL
) {
394 cv_wait(&pci_reloc_cv
, &pci_reloc_mutex
);
396 pci_reloc_thread
= curthread
;
397 ASSERT(pci_reloc_suspend
== 0);
399 mutex_exit(&pci_reloc_mutex
);
401 ASSERT(pci_reloc_thread
== curthread
);
402 pci_reloc_presuspend
++;
407 ASSERT(flags
== HAT_SUSPEND
);
408 ASSERT(PCI_DMA_CANRELOC(mp
));
409 ASSERT(pci_reloc_thread
== curthread
);
412 if (ddi_ctlops(rdip
, rdip
, DDI_CTLOPS_QUIESCE
, NULL
, NULL
) !=
415 if (ddi_dma_sync(h
, 0, 0, DDI_DMA_SYNC_FORKERNEL
) != DDI_SUCCESS
)
422 * Register two callback types: one for normal DVMA and the
423 * other for fast DVMA, since each method has a different way
424 * of tracking the PFNs behind a handle.
429 int key
= pci_reloc_getkey();
431 mutex_init(&pci_reloc_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
432 cv_init(&pci_reloc_cv
, NULL
, CV_DEFAULT
, NULL
);
433 pci_dvma_cbid
= hat_register_callback(
434 key
+ ('D'<<24 | 'V'<<16 | 'M'<<8 | 'A'),
435 pci_common_prerelocator
, pci_dvma_postrelocator
,
436 pci_dma_relocerr
, 1);
437 pci_fast_dvma_cbid
= hat_register_callback(
438 key
+ ('F'<<24 | 'D'<<16 | 'M'<<8 | 'A'),
439 pci_common_prerelocator
,
440 pci_fdvma_postrelocator
, pci_dma_relocerr
, 1);
446 cv_destroy(&pci_reloc_cv
);
447 mutex_destroy(&pci_reloc_mutex
);