4 * Copyright (c) 2009,2020 Red Hat
6 * This work is licensed under the terms of the GNU General Public License
7 * (GNU GPL), version 2 or later.
10 #include "qemu/osdep.h"
11 #include "sysemu/block-backend.h"
12 #include "sysemu/dma.h"
13 #include "trace/trace-root.h"
14 #include "qemu/thread.h"
15 #include "qemu/main-loop.h"
16 #include "sysemu/cpu-timers.h"
17 #include "qemu/range.h"
19 /* #define DEBUG_IOMMU */
21 MemTxResult
dma_memory_set(AddressSpace
*as
, dma_addr_t addr
,
22 uint8_t c
, dma_addr_t len
)
24 dma_barrier(as
, DMA_DIRECTION_FROM_DEVICE
);
26 #define FILLBUF_SIZE 512
27 uint8_t fillbuf
[FILLBUF_SIZE
];
29 MemTxResult error
= MEMTX_OK
;
31 memset(fillbuf
, c
, FILLBUF_SIZE
);
33 l
= len
< FILLBUF_SIZE
? len
: FILLBUF_SIZE
;
34 error
|= address_space_write(as
, addr
, MEMTXATTRS_UNSPECIFIED
,
43 void qemu_sglist_init(QEMUSGList
*qsg
, DeviceState
*dev
, int alloc_hint
,
46 qsg
->sg
= g_malloc(alloc_hint
* sizeof(ScatterGatherEntry
));
48 qsg
->nalloc
= alloc_hint
;
52 object_ref(OBJECT(dev
));
55 void qemu_sglist_add(QEMUSGList
*qsg
, dma_addr_t base
, dma_addr_t len
)
57 if (qsg
->nsg
== qsg
->nalloc
) {
58 qsg
->nalloc
= 2 * qsg
->nalloc
+ 1;
59 qsg
->sg
= g_realloc(qsg
->sg
, qsg
->nalloc
* sizeof(ScatterGatherEntry
));
61 qsg
->sg
[qsg
->nsg
].base
= base
;
62 qsg
->sg
[qsg
->nsg
].len
= len
;
67 void qemu_sglist_destroy(QEMUSGList
*qsg
)
69 object_unref(OBJECT(qsg
->dev
));
71 memset(qsg
, 0, sizeof(*qsg
));
83 dma_addr_t sg_cur_byte
;
90 static void dma_blk_cb(void *opaque
, int ret
);
92 static void reschedule_dma(void *opaque
)
94 DMAAIOCB
*dbs
= (DMAAIOCB
*)opaque
;
96 assert(!dbs
->acb
&& dbs
->bh
);
97 qemu_bh_delete(dbs
->bh
);
102 static void dma_blk_unmap(DMAAIOCB
*dbs
)
106 for (i
= 0; i
< dbs
->iov
.niov
; ++i
) {
107 dma_memory_unmap(dbs
->sg
->as
, dbs
->iov
.iov
[i
].iov_base
,
108 dbs
->iov
.iov
[i
].iov_len
, dbs
->dir
,
109 dbs
->iov
.iov
[i
].iov_len
);
111 qemu_iovec_reset(&dbs
->iov
);
114 static void dma_complete(DMAAIOCB
*dbs
, int ret
)
116 trace_dma_complete(dbs
, ret
, dbs
->common
.cb
);
118 assert(!dbs
->acb
&& !dbs
->bh
);
120 if (dbs
->common
.cb
) {
121 dbs
->common
.cb(dbs
->common
.opaque
, ret
);
123 qemu_iovec_destroy(&dbs
->iov
);
127 static void dma_blk_cb(void *opaque
, int ret
)
129 DMAAIOCB
*dbs
= (DMAAIOCB
*)opaque
;
130 dma_addr_t cur_addr
, cur_len
;
133 trace_dma_blk_cb(dbs
, ret
);
136 dbs
->offset
+= dbs
->iov
.size
;
138 if (dbs
->sg_cur_index
== dbs
->sg
->nsg
|| ret
< 0) {
139 dma_complete(dbs
, ret
);
144 while (dbs
->sg_cur_index
< dbs
->sg
->nsg
) {
145 cur_addr
= dbs
->sg
->sg
[dbs
->sg_cur_index
].base
+ dbs
->sg_cur_byte
;
146 cur_len
= dbs
->sg
->sg
[dbs
->sg_cur_index
].len
- dbs
->sg_cur_byte
;
147 mem
= dma_memory_map(dbs
->sg
->as
, cur_addr
, &cur_len
, dbs
->dir
);
149 * Make reads deterministic in icount mode. Windows sometimes issues
150 * disk read requests with overlapping SGs. It leads
151 * to non-determinism, because resulting buffer contents may be mixed
152 * from several sectors. This code splits all SGs into several
153 * groups. SGs in every group do not overlap.
155 if (mem
&& icount_enabled() && dbs
->dir
== DMA_DIRECTION_FROM_DEVICE
) {
157 for (i
= 0 ; i
< dbs
->iov
.niov
; ++i
) {
158 if (ranges_overlap((intptr_t)dbs
->iov
.iov
[i
].iov_base
,
159 dbs
->iov
.iov
[i
].iov_len
, (intptr_t)mem
,
161 dma_memory_unmap(dbs
->sg
->as
, mem
, cur_len
,
170 qemu_iovec_add(&dbs
->iov
, mem
, cur_len
);
171 dbs
->sg_cur_byte
+= cur_len
;
172 if (dbs
->sg_cur_byte
== dbs
->sg
->sg
[dbs
->sg_cur_index
].len
) {
173 dbs
->sg_cur_byte
= 0;
178 if (dbs
->iov
.size
== 0) {
179 trace_dma_map_wait(dbs
);
180 dbs
->bh
= aio_bh_new(dbs
->ctx
, reschedule_dma
, dbs
);
181 cpu_register_map_client(dbs
->bh
);
185 if (!QEMU_IS_ALIGNED(dbs
->iov
.size
, dbs
->align
)) {
186 qemu_iovec_discard_back(&dbs
->iov
,
187 QEMU_ALIGN_DOWN(dbs
->iov
.size
, dbs
->align
));
190 aio_context_acquire(dbs
->ctx
);
191 dbs
->acb
= dbs
->io_func(dbs
->offset
, &dbs
->iov
,
192 dma_blk_cb
, dbs
, dbs
->io_func_opaque
);
193 aio_context_release(dbs
->ctx
);
197 static void dma_aio_cancel(BlockAIOCB
*acb
)
199 DMAAIOCB
*dbs
= container_of(acb
, DMAAIOCB
, common
);
201 trace_dma_aio_cancel(dbs
);
203 assert(!(dbs
->acb
&& dbs
->bh
));
205 /* This will invoke dma_blk_cb. */
206 blk_aio_cancel_async(dbs
->acb
);
211 cpu_unregister_map_client(dbs
->bh
);
212 qemu_bh_delete(dbs
->bh
);
215 if (dbs
->common
.cb
) {
216 dbs
->common
.cb(dbs
->common
.opaque
, -ECANCELED
);
220 static AioContext
*dma_get_aio_context(BlockAIOCB
*acb
)
222 DMAAIOCB
*dbs
= container_of(acb
, DMAAIOCB
, common
);
227 static const AIOCBInfo dma_aiocb_info
= {
228 .aiocb_size
= sizeof(DMAAIOCB
),
229 .cancel_async
= dma_aio_cancel
,
230 .get_aio_context
= dma_get_aio_context
,
233 BlockAIOCB
*dma_blk_io(AioContext
*ctx
,
234 QEMUSGList
*sg
, uint64_t offset
, uint32_t align
,
235 DMAIOFunc
*io_func
, void *io_func_opaque
,
236 BlockCompletionFunc
*cb
,
237 void *opaque
, DMADirection dir
)
239 DMAAIOCB
*dbs
= qemu_aio_get(&dma_aiocb_info
, NULL
, cb
, opaque
);
241 trace_dma_blk_io(dbs
, io_func_opaque
, offset
, (dir
== DMA_DIRECTION_TO_DEVICE
));
246 dbs
->offset
= offset
;
248 dbs
->sg_cur_index
= 0;
249 dbs
->sg_cur_byte
= 0;
251 dbs
->io_func
= io_func
;
252 dbs
->io_func_opaque
= io_func_opaque
;
254 qemu_iovec_init(&dbs
->iov
, sg
->nsg
);
261 BlockAIOCB
*dma_blk_read_io_func(int64_t offset
, QEMUIOVector
*iov
,
262 BlockCompletionFunc
*cb
, void *cb_opaque
,
265 BlockBackend
*blk
= opaque
;
266 return blk_aio_preadv(blk
, offset
, iov
, 0, cb
, cb_opaque
);
269 BlockAIOCB
*dma_blk_read(BlockBackend
*blk
,
270 QEMUSGList
*sg
, uint64_t offset
, uint32_t align
,
271 void (*cb
)(void *opaque
, int ret
), void *opaque
)
273 return dma_blk_io(blk_get_aio_context(blk
), sg
, offset
, align
,
274 dma_blk_read_io_func
, blk
, cb
, opaque
,
275 DMA_DIRECTION_FROM_DEVICE
);
279 BlockAIOCB
*dma_blk_write_io_func(int64_t offset
, QEMUIOVector
*iov
,
280 BlockCompletionFunc
*cb
, void *cb_opaque
,
283 BlockBackend
*blk
= opaque
;
284 return blk_aio_pwritev(blk
, offset
, iov
, 0, cb
, cb_opaque
);
287 BlockAIOCB
*dma_blk_write(BlockBackend
*blk
,
288 QEMUSGList
*sg
, uint64_t offset
, uint32_t align
,
289 void (*cb
)(void *opaque
, int ret
), void *opaque
)
291 return dma_blk_io(blk_get_aio_context(blk
), sg
, offset
, align
,
292 dma_blk_write_io_func
, blk
, cb
, opaque
,
293 DMA_DIRECTION_TO_DEVICE
);
297 static uint64_t dma_buf_rw(uint8_t *ptr
, int32_t len
, QEMUSGList
*sg
,
305 len
= MIN(len
, resid
);
307 ScatterGatherEntry entry
= sg
->sg
[sg_cur_index
++];
308 int32_t xfer
= MIN(len
, entry
.len
);
309 dma_memory_rw(sg
->as
, entry
.base
, ptr
, xfer
, dir
);
318 uint64_t dma_buf_read(uint8_t *ptr
, int32_t len
, QEMUSGList
*sg
)
320 return dma_buf_rw(ptr
, len
, sg
, DMA_DIRECTION_FROM_DEVICE
);
323 uint64_t dma_buf_write(uint8_t *ptr
, int32_t len
, QEMUSGList
*sg
)
325 return dma_buf_rw(ptr
, len
, sg
, DMA_DIRECTION_TO_DEVICE
);
328 void dma_acct_start(BlockBackend
*blk
, BlockAcctCookie
*cookie
,
329 QEMUSGList
*sg
, enum BlockAcctType type
)
331 block_acct_start(blk_get_stats(blk
), cookie
, sg
->size
, type
);
334 uint64_t dma_aligned_pow2_mask(uint64_t start
, uint64_t end
, int max_addr_bits
)
336 uint64_t max_mask
= UINT64_MAX
, addr_mask
= end
- start
;
337 uint64_t alignment_mask
, size_mask
;
339 if (max_addr_bits
!= 64) {
340 max_mask
= (1ULL << max_addr_bits
) - 1;
343 alignment_mask
= start
? (start
& -start
) - 1 : max_mask
;
344 alignment_mask
= MIN(alignment_mask
, max_mask
);
345 size_mask
= MIN(addr_mask
, max_mask
);
347 if (alignment_mask
<= size_mask
) {
348 /* Increase the alignment of start */
349 return alignment_mask
;
351 /* Find the largest page mask from size */
352 if (addr_mask
== UINT64_MAX
) {
355 return (1ULL << (63 - clz64(addr_mask
+ 1))) - 1;