4 * Copyright (c) 2009,2020 Red Hat
6 * This work is licensed under the terms of the GNU General Public License
7 * (GNU GPL), version 2 or later.
10 #include "qemu/osdep.h"
11 #include "sysemu/block-backend.h"
12 #include "sysemu/dma.h"
13 #include "trace/trace-root.h"
14 #include "qemu/thread.h"
15 #include "qemu/main-loop.h"
16 #include "sysemu/cpu-timers.h"
17 #include "qemu/range.h"
19 /* #define DEBUG_IOMMU */
21 MemTxResult
dma_memory_set(AddressSpace
*as
, dma_addr_t addr
,
22 uint8_t c
, dma_addr_t len
, MemTxAttrs attrs
)
24 dma_barrier(as
, DMA_DIRECTION_FROM_DEVICE
);
26 return address_space_set(as
, addr
, c
, len
, attrs
);
29 void qemu_sglist_init(QEMUSGList
*qsg
, DeviceState
*dev
, int alloc_hint
,
32 qsg
->sg
= g_new(ScatterGatherEntry
, alloc_hint
);
34 qsg
->nalloc
= alloc_hint
;
38 object_ref(OBJECT(dev
));
41 void qemu_sglist_add(QEMUSGList
*qsg
, dma_addr_t base
, dma_addr_t len
)
43 if (qsg
->nsg
== qsg
->nalloc
) {
44 qsg
->nalloc
= 2 * qsg
->nalloc
+ 1;
45 qsg
->sg
= g_renew(ScatterGatherEntry
, qsg
->sg
, qsg
->nalloc
);
47 qsg
->sg
[qsg
->nsg
].base
= base
;
48 qsg
->sg
[qsg
->nsg
].len
= len
;
53 void qemu_sglist_destroy(QEMUSGList
*qsg
)
55 object_unref(OBJECT(qsg
->dev
));
57 memset(qsg
, 0, sizeof(*qsg
));
69 dma_addr_t sg_cur_byte
;
76 static void dma_blk_cb(void *opaque
, int ret
);
78 static void reschedule_dma(void *opaque
)
80 DMAAIOCB
*dbs
= (DMAAIOCB
*)opaque
;
82 assert(!dbs
->acb
&& dbs
->bh
);
83 qemu_bh_delete(dbs
->bh
);
88 static void dma_blk_unmap(DMAAIOCB
*dbs
)
92 for (i
= 0; i
< dbs
->iov
.niov
; ++i
) {
93 dma_memory_unmap(dbs
->sg
->as
, dbs
->iov
.iov
[i
].iov_base
,
94 dbs
->iov
.iov
[i
].iov_len
, dbs
->dir
,
95 dbs
->iov
.iov
[i
].iov_len
);
97 qemu_iovec_reset(&dbs
->iov
);
100 static void dma_complete(DMAAIOCB
*dbs
, int ret
)
102 trace_dma_complete(dbs
, ret
, dbs
->common
.cb
);
104 assert(!dbs
->acb
&& !dbs
->bh
);
106 if (dbs
->common
.cb
) {
107 dbs
->common
.cb(dbs
->common
.opaque
, ret
);
109 qemu_iovec_destroy(&dbs
->iov
);
113 static void dma_blk_cb(void *opaque
, int ret
)
115 DMAAIOCB
*dbs
= (DMAAIOCB
*)opaque
;
116 AioContext
*ctx
= dbs
->ctx
;
117 dma_addr_t cur_addr
, cur_len
;
120 trace_dma_blk_cb(dbs
, ret
);
122 aio_context_acquire(ctx
);
124 dbs
->offset
+= dbs
->iov
.size
;
126 if (dbs
->sg_cur_index
== dbs
->sg
->nsg
|| ret
< 0) {
127 dma_complete(dbs
, ret
);
132 while (dbs
->sg_cur_index
< dbs
->sg
->nsg
) {
133 cur_addr
= dbs
->sg
->sg
[dbs
->sg_cur_index
].base
+ dbs
->sg_cur_byte
;
134 cur_len
= dbs
->sg
->sg
[dbs
->sg_cur_index
].len
- dbs
->sg_cur_byte
;
135 mem
= dma_memory_map(dbs
->sg
->as
, cur_addr
, &cur_len
, dbs
->dir
,
136 MEMTXATTRS_UNSPECIFIED
);
138 * Make reads deterministic in icount mode. Windows sometimes issues
139 * disk read requests with overlapping SGs. It leads
140 * to non-determinism, because resulting buffer contents may be mixed
141 * from several sectors. This code splits all SGs into several
142 * groups. SGs in every group do not overlap.
144 if (mem
&& icount_enabled() && dbs
->dir
== DMA_DIRECTION_FROM_DEVICE
) {
146 for (i
= 0 ; i
< dbs
->iov
.niov
; ++i
) {
147 if (ranges_overlap((intptr_t)dbs
->iov
.iov
[i
].iov_base
,
148 dbs
->iov
.iov
[i
].iov_len
, (intptr_t)mem
,
150 dma_memory_unmap(dbs
->sg
->as
, mem
, cur_len
,
159 qemu_iovec_add(&dbs
->iov
, mem
, cur_len
);
160 dbs
->sg_cur_byte
+= cur_len
;
161 if (dbs
->sg_cur_byte
== dbs
->sg
->sg
[dbs
->sg_cur_index
].len
) {
162 dbs
->sg_cur_byte
= 0;
167 if (dbs
->iov
.size
== 0) {
168 trace_dma_map_wait(dbs
);
169 dbs
->bh
= aio_bh_new(ctx
, reschedule_dma
, dbs
);
170 cpu_register_map_client(dbs
->bh
);
174 if (!QEMU_IS_ALIGNED(dbs
->iov
.size
, dbs
->align
)) {
175 qemu_iovec_discard_back(&dbs
->iov
,
176 QEMU_ALIGN_DOWN(dbs
->iov
.size
, dbs
->align
));
179 dbs
->acb
= dbs
->io_func(dbs
->offset
, &dbs
->iov
,
180 dma_blk_cb
, dbs
, dbs
->io_func_opaque
);
183 aio_context_release(ctx
);
186 static void dma_aio_cancel(BlockAIOCB
*acb
)
188 DMAAIOCB
*dbs
= container_of(acb
, DMAAIOCB
, common
);
190 trace_dma_aio_cancel(dbs
);
192 assert(!(dbs
->acb
&& dbs
->bh
));
194 /* This will invoke dma_blk_cb. */
195 blk_aio_cancel_async(dbs
->acb
);
200 cpu_unregister_map_client(dbs
->bh
);
201 qemu_bh_delete(dbs
->bh
);
204 if (dbs
->common
.cb
) {
205 dbs
->common
.cb(dbs
->common
.opaque
, -ECANCELED
);
209 static AioContext
*dma_get_aio_context(BlockAIOCB
*acb
)
211 DMAAIOCB
*dbs
= container_of(acb
, DMAAIOCB
, common
);
216 static const AIOCBInfo dma_aiocb_info
= {
217 .aiocb_size
= sizeof(DMAAIOCB
),
218 .cancel_async
= dma_aio_cancel
,
219 .get_aio_context
= dma_get_aio_context
,
222 BlockAIOCB
*dma_blk_io(AioContext
*ctx
,
223 QEMUSGList
*sg
, uint64_t offset
, uint32_t align
,
224 DMAIOFunc
*io_func
, void *io_func_opaque
,
225 BlockCompletionFunc
*cb
,
226 void *opaque
, DMADirection dir
)
228 DMAAIOCB
*dbs
= qemu_aio_get(&dma_aiocb_info
, NULL
, cb
, opaque
);
230 trace_dma_blk_io(dbs
, io_func_opaque
, offset
, (dir
== DMA_DIRECTION_TO_DEVICE
));
235 dbs
->offset
= offset
;
237 dbs
->sg_cur_index
= 0;
238 dbs
->sg_cur_byte
= 0;
240 dbs
->io_func
= io_func
;
241 dbs
->io_func_opaque
= io_func_opaque
;
243 qemu_iovec_init(&dbs
->iov
, sg
->nsg
);
250 BlockAIOCB
*dma_blk_read_io_func(int64_t offset
, QEMUIOVector
*iov
,
251 BlockCompletionFunc
*cb
, void *cb_opaque
,
254 BlockBackend
*blk
= opaque
;
255 return blk_aio_preadv(blk
, offset
, iov
, 0, cb
, cb_opaque
);
258 BlockAIOCB
*dma_blk_read(BlockBackend
*blk
,
259 QEMUSGList
*sg
, uint64_t offset
, uint32_t align
,
260 void (*cb
)(void *opaque
, int ret
), void *opaque
)
262 return dma_blk_io(blk_get_aio_context(blk
), sg
, offset
, align
,
263 dma_blk_read_io_func
, blk
, cb
, opaque
,
264 DMA_DIRECTION_FROM_DEVICE
);
268 BlockAIOCB
*dma_blk_write_io_func(int64_t offset
, QEMUIOVector
*iov
,
269 BlockCompletionFunc
*cb
, void *cb_opaque
,
272 BlockBackend
*blk
= opaque
;
273 return blk_aio_pwritev(blk
, offset
, iov
, 0, cb
, cb_opaque
);
276 BlockAIOCB
*dma_blk_write(BlockBackend
*blk
,
277 QEMUSGList
*sg
, uint64_t offset
, uint32_t align
,
278 void (*cb
)(void *opaque
, int ret
), void *opaque
)
280 return dma_blk_io(blk_get_aio_context(blk
), sg
, offset
, align
,
281 dma_blk_write_io_func
, blk
, cb
, opaque
,
282 DMA_DIRECTION_TO_DEVICE
);
286 static MemTxResult
dma_buf_rw(void *buf
, dma_addr_t len
, dma_addr_t
*residual
,
287 QEMUSGList
*sg
, DMADirection dir
,
291 dma_addr_t xresidual
;
293 MemTxResult res
= MEMTX_OK
;
295 xresidual
= sg
->size
;
297 len
= MIN(len
, xresidual
);
299 ScatterGatherEntry entry
= sg
->sg
[sg_cur_index
++];
300 dma_addr_t xfer
= MIN(len
, entry
.len
);
301 res
|= dma_memory_rw(sg
->as
, entry
.base
, ptr
, xfer
, dir
, attrs
);
308 *residual
= xresidual
;
313 MemTxResult
dma_buf_read(void *ptr
, dma_addr_t len
, dma_addr_t
*residual
,
314 QEMUSGList
*sg
, MemTxAttrs attrs
)
316 return dma_buf_rw(ptr
, len
, residual
, sg
, DMA_DIRECTION_FROM_DEVICE
, attrs
);
319 MemTxResult
dma_buf_write(void *ptr
, dma_addr_t len
, dma_addr_t
*residual
,
320 QEMUSGList
*sg
, MemTxAttrs attrs
)
322 return dma_buf_rw(ptr
, len
, residual
, sg
, DMA_DIRECTION_TO_DEVICE
, attrs
);
325 void dma_acct_start(BlockBackend
*blk
, BlockAcctCookie
*cookie
,
326 QEMUSGList
*sg
, enum BlockAcctType type
)
328 block_acct_start(blk_get_stats(blk
), cookie
, sg
->size
, type
);
331 uint64_t dma_aligned_pow2_mask(uint64_t start
, uint64_t end
, int max_addr_bits
)
333 uint64_t max_mask
= UINT64_MAX
, addr_mask
= end
- start
;
334 uint64_t alignment_mask
, size_mask
;
336 if (max_addr_bits
!= 64) {
337 max_mask
= (1ULL << max_addr_bits
) - 1;
340 alignment_mask
= start
? (start
& -start
) - 1 : max_mask
;
341 alignment_mask
= MIN(alignment_mask
, max_mask
);
342 size_mask
= MIN(addr_mask
, max_mask
);
344 if (alignment_mask
<= size_mask
) {
345 /* Increase the alignment of start */
346 return alignment_mask
;
348 /* Find the largest page mask from size */
349 if (addr_mask
== UINT64_MAX
) {
352 return (1ULL << (63 - clz64(addr_mask
+ 1))) - 1;