1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
5 * Copyright Red Hat, Inc.
8 * Stefan Hajnoczi <stefanha@redhat.com>
11 #include "qemu/osdep.h"
13 #include "block/block_int.h"
14 #include "exec/memory.h"
15 #include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
16 #include "qapi/error.h"
17 #include "qemu/error-report.h"
18 #include "qapi/qmp/qdict.h"
19 #include "qemu/module.h"
20 #include "sysemu/block-backend.h"
21 #include "exec/memory.h" /* for ram_block_discard_disable() */
23 #include "block/block-io.h"
26 * Keep the QEMU BlockDriver names identical to the libblkio driver names.
27 * Using macros instead of typing out the string literals avoids typos.
29 #define DRIVER_IO_URING "io_uring"
30 #define DRIVER_NVME_IO_URING "nvme-io_uring"
31 #define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
32 #define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
33 #define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
36 * Allocated bounce buffers are kept in a list sorted by buffer address.
38 typedef struct BlkioBounceBuf
{
39 QLIST_ENTRY(BlkioBounceBuf
) next
;
41 /* The bounce buffer */
47 * libblkio is not thread-safe so this lock protects ->blkio and
52 struct blkioq
*blkioq
; /* make this multi-queue in the future... */
56 * Polling fetches the next completion into this field.
58 * No lock is necessary since only one thread calls aio_poll() and invokes
59 * fd and poll handlers.
61 struct blkio_completion poll_completion
;
64 * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
66 * Lock ordering: ->bounce_lock before ->blkio_lock.
70 /* Bounce buffer pool */
71 struct blkio_mem_region bounce_pool
;
73 /* Sorted list of allocated bounce buffers */
74 QLIST_HEAD(, BlkioBounceBuf
) bounce_bufs
;
76 /* Queue for coroutines waiting for bounce buffer space */
77 CoQueue bounce_available
;
79 /* The value of the "mem-region-alignment" property */
80 size_t mem_region_alignment
;
82 /* Can we skip adding/deleting blkio_mem_regions? */
83 bool needs_mem_regions
;
85 /* Are file descriptors necessary for blkio_mem_regions? */
86 bool needs_mem_region_fd
;
88 /* Are madvise(MADV_DONTNEED)-style operations unavailable? */
89 bool may_pin_mem_regions
;
92 /* Called with s->bounce_lock held */
93 static int blkio_resize_bounce_pool(BDRVBlkioState
*s
, int64_t bytes
)
95 /* There can be no allocated bounce buffers during resize */
96 assert(QLIST_EMPTY(&s
->bounce_bufs
));
98 /* Pad size to reduce frequency of resize calls */
101 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
104 if (s
->bounce_pool
.addr
) {
105 blkio_unmap_mem_region(s
->blkio
, &s
->bounce_pool
);
106 blkio_free_mem_region(s
->blkio
, &s
->bounce_pool
);
107 memset(&s
->bounce_pool
, 0, sizeof(s
->bounce_pool
));
110 /* Automatically freed when s->blkio is destroyed */
111 ret
= blkio_alloc_mem_region(s
->blkio
, &s
->bounce_pool
, bytes
);
116 ret
= blkio_map_mem_region(s
->blkio
, &s
->bounce_pool
);
118 blkio_free_mem_region(s
->blkio
, &s
->bounce_pool
);
119 memset(&s
->bounce_pool
, 0, sizeof(s
->bounce_pool
));
127 /* Called with s->bounce_lock held */
129 blkio_do_alloc_bounce_buffer(BDRVBlkioState
*s
, BlkioBounceBuf
*bounce
,
132 void *addr
= s
->bounce_pool
.addr
;
133 BlkioBounceBuf
*cur
= NULL
;
134 BlkioBounceBuf
*prev
= NULL
;
138 * This is just a linear search over the holes between requests. An
139 * efficient allocator would be nice.
141 QLIST_FOREACH(cur
, &s
->bounce_bufs
, next
) {
142 space
= cur
->buf
.iov_base
- addr
;
143 if (bytes
<= space
) {
144 QLIST_INSERT_BEFORE(cur
, bounce
, next
);
145 bounce
->buf
.iov_base
= addr
;
146 bounce
->buf
.iov_len
= bytes
;
150 addr
= cur
->buf
.iov_base
+ cur
->buf
.iov_len
;
154 /* Is there space after the last request? */
155 space
= s
->bounce_pool
.addr
+ s
->bounce_pool
.len
- addr
;
160 QLIST_INSERT_AFTER(prev
, bounce
, next
);
162 QLIST_INSERT_HEAD(&s
->bounce_bufs
, bounce
, next
);
164 bounce
->buf
.iov_base
= addr
;
165 bounce
->buf
.iov_len
= bytes
;
169 static int coroutine_fn
170 blkio_alloc_bounce_buffer(BDRVBlkioState
*s
, BlkioBounceBuf
*bounce
,
174 * Ensure fairness: first time around we join the back of the queue,
175 * subsequently we join the front so we don't lose our place.
177 CoQueueWaitFlags wait_flags
= 0;
179 QEMU_LOCK_GUARD(&s
->bounce_lock
);
181 /* Ensure fairness: don't even try if other requests are already waiting */
182 if (!qemu_co_queue_empty(&s
->bounce_available
)) {
183 qemu_co_queue_wait_flags(&s
->bounce_available
, &s
->bounce_lock
,
185 wait_flags
= CO_QUEUE_WAIT_FRONT
;
189 if (blkio_do_alloc_bounce_buffer(s
, bounce
, bytes
)) {
190 /* Kick the next queued request since there may be space */
191 qemu_co_queue_next(&s
->bounce_available
);
196 * If there are no in-flight requests then the pool was simply too
199 if (QLIST_EMPTY(&s
->bounce_bufs
)) {
203 ret
= blkio_resize_bounce_pool(s
, bytes
);
205 /* Kick the next queued request since that may fail too */
206 qemu_co_queue_next(&s
->bounce_available
);
210 ok
= blkio_do_alloc_bounce_buffer(s
, bounce
, bytes
);
211 assert(ok
); /* must have space this time */
215 qemu_co_queue_wait_flags(&s
->bounce_available
, &s
->bounce_lock
,
217 wait_flags
= CO_QUEUE_WAIT_FRONT
;
221 static void coroutine_fn
blkio_free_bounce_buffer(BDRVBlkioState
*s
,
222 BlkioBounceBuf
*bounce
)
224 QEMU_LOCK_GUARD(&s
->bounce_lock
);
226 QLIST_REMOVE(bounce
, next
);
228 /* Wake up waiting coroutines since space may now be available */
229 qemu_co_queue_next(&s
->bounce_available
);
232 /* For async to .bdrv_co_*() conversion */
234 Coroutine
*coroutine
;
238 static void blkio_completion_fd_read(void *opaque
)
240 BlockDriverState
*bs
= opaque
;
241 BDRVBlkioState
*s
= bs
->opaque
;
245 /* Polling may have already fetched a completion */
246 if (s
->poll_completion
.user_data
!= NULL
) {
247 BlkioCoData
*cod
= s
->poll_completion
.user_data
;
248 cod
->ret
= s
->poll_completion
.ret
;
250 /* Clear it in case aio_co_wake() enters a nested event loop */
251 s
->poll_completion
.user_data
= NULL
;
253 aio_co_wake(cod
->coroutine
);
256 /* Reset completion fd status */
257 ret
= read(s
->completion_fd
, &val
, sizeof(val
));
259 /* Ignore errors, there's nothing we can do */
263 * Reading one completion at a time makes nested event loop re-entrancy
264 * simple. Change this loop to get multiple completions in one go if it
265 * becomes a performance bottleneck.
268 struct blkio_completion completion
;
270 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
271 ret
= blkioq_do_io(s
->blkioq
, &completion
, 0, 1, NULL
);
277 BlkioCoData
*cod
= completion
.user_data
;
278 cod
->ret
= completion
.ret
;
279 aio_co_wake(cod
->coroutine
);
283 static bool blkio_completion_fd_poll(void *opaque
)
285 BlockDriverState
*bs
= opaque
;
286 BDRVBlkioState
*s
= bs
->opaque
;
289 /* Just in case we already fetched a completion */
290 if (s
->poll_completion
.user_data
!= NULL
) {
294 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
295 ret
= blkioq_do_io(s
->blkioq
, &s
->poll_completion
, 0, 1, NULL
);
300 static void blkio_completion_fd_poll_ready(void *opaque
)
302 blkio_completion_fd_read(opaque
);
305 static void blkio_attach_aio_context(BlockDriverState
*bs
,
306 AioContext
*new_context
)
308 BDRVBlkioState
*s
= bs
->opaque
;
310 aio_set_fd_handler(new_context
, s
->completion_fd
,
311 blkio_completion_fd_read
, NULL
,
312 blkio_completion_fd_poll
,
313 blkio_completion_fd_poll_ready
, bs
);
316 static void blkio_detach_aio_context(BlockDriverState
*bs
)
318 BDRVBlkioState
*s
= bs
->opaque
;
320 aio_set_fd_handler(bdrv_get_aio_context(bs
), s
->completion_fd
, NULL
, NULL
,
325 * Called by blk_io_unplug() or immediately if not plugged. Called without
328 static void blkio_unplug_fn(void *opaque
)
330 BDRVBlkioState
*s
= opaque
;
332 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
333 blkioq_do_io(s
->blkioq
, NULL
, 0, 0, NULL
);
338 * Schedule I/O submission after enqueuing a new request. Called without
341 static void blkio_submit_io(BlockDriverState
*bs
)
343 BDRVBlkioState
*s
= bs
->opaque
;
345 blk_io_plug_call(blkio_unplug_fn
, s
);
348 static int coroutine_fn
349 blkio_co_pdiscard(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
)
351 BDRVBlkioState
*s
= bs
->opaque
;
353 .coroutine
= qemu_coroutine_self(),
356 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
357 blkioq_discard(s
->blkioq
, offset
, bytes
, &cod
, 0);
361 qemu_coroutine_yield();
365 static int coroutine_fn
366 blkio_co_preadv(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
,
367 QEMUIOVector
*qiov
, BdrvRequestFlags flags
)
370 .coroutine
= qemu_coroutine_self(),
372 BDRVBlkioState
*s
= bs
->opaque
;
373 bool use_bounce_buffer
=
374 s
->needs_mem_regions
&& !(flags
& BDRV_REQ_REGISTERED_BUF
);
375 BlkioBounceBuf bounce
;
376 struct iovec
*iov
= qiov
->iov
;
377 int iovcnt
= qiov
->niov
;
379 if (use_bounce_buffer
) {
380 int ret
= blkio_alloc_bounce_buffer(s
, &bounce
, bytes
);
389 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
390 blkioq_readv(s
->blkioq
, offset
, iov
, iovcnt
, &cod
, 0);
394 qemu_coroutine_yield();
396 if (use_bounce_buffer
) {
398 qemu_iovec_from_buf(qiov
, 0,
403 blkio_free_bounce_buffer(s
, &bounce
);
409 static int coroutine_fn
blkio_co_pwritev(BlockDriverState
*bs
, int64_t offset
,
410 int64_t bytes
, QEMUIOVector
*qiov
, BdrvRequestFlags flags
)
412 uint32_t blkio_flags
= (flags
& BDRV_REQ_FUA
) ? BLKIO_REQ_FUA
: 0;
414 .coroutine
= qemu_coroutine_self(),
416 BDRVBlkioState
*s
= bs
->opaque
;
417 bool use_bounce_buffer
=
418 s
->needs_mem_regions
&& !(flags
& BDRV_REQ_REGISTERED_BUF
);
419 BlkioBounceBuf bounce
;
420 struct iovec
*iov
= qiov
->iov
;
421 int iovcnt
= qiov
->niov
;
423 if (use_bounce_buffer
) {
424 int ret
= blkio_alloc_bounce_buffer(s
, &bounce
, bytes
);
429 qemu_iovec_to_buf(qiov
, 0, bounce
.buf
.iov_base
, bytes
);
434 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
435 blkioq_writev(s
->blkioq
, offset
, iov
, iovcnt
, &cod
, blkio_flags
);
439 qemu_coroutine_yield();
441 if (use_bounce_buffer
) {
442 blkio_free_bounce_buffer(s
, &bounce
);
448 static int coroutine_fn
blkio_co_flush(BlockDriverState
*bs
)
450 BDRVBlkioState
*s
= bs
->opaque
;
452 .coroutine
= qemu_coroutine_self(),
455 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
456 blkioq_flush(s
->blkioq
, &cod
, 0);
460 qemu_coroutine_yield();
464 static int coroutine_fn
blkio_co_pwrite_zeroes(BlockDriverState
*bs
,
465 int64_t offset
, int64_t bytes
, BdrvRequestFlags flags
)
467 BDRVBlkioState
*s
= bs
->opaque
;
469 .coroutine
= qemu_coroutine_self(),
471 uint32_t blkio_flags
= 0;
473 if (flags
& BDRV_REQ_FUA
) {
474 blkio_flags
|= BLKIO_REQ_FUA
;
476 if (!(flags
& BDRV_REQ_MAY_UNMAP
)) {
477 blkio_flags
|= BLKIO_REQ_NO_UNMAP
;
479 if (flags
& BDRV_REQ_NO_FALLBACK
) {
480 blkio_flags
|= BLKIO_REQ_NO_FALLBACK
;
483 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
484 blkioq_write_zeroes(s
->blkioq
, offset
, bytes
, &cod
, blkio_flags
);
488 qemu_coroutine_yield();
496 } BlkioMemRegionResult
;
499 * Produce a struct blkio_mem_region for a given address and size.
501 * This function produces identical results when called multiple times with the
502 * same arguments. This property is necessary because blkio_unmap_mem_region()
503 * must receive the same struct blkio_mem_region field values that were passed
504 * to blkio_map_mem_region().
506 static BlkioMemRegionResult
507 blkio_mem_region_from_host(BlockDriverState
*bs
,
508 void *host
, size_t size
,
509 struct blkio_mem_region
*region
,
512 BDRVBlkioState
*s
= bs
->opaque
;
514 ram_addr_t fd_offset
= 0;
516 if (((uintptr_t)host
| size
) % s
->mem_region_alignment
) {
517 error_setg(errp
, "unaligned buf %p with size %zu", host
, size
);
521 /* Attempt to find the fd for the underlying memory */
522 if (s
->needs_mem_region_fd
) {
528 * bdrv_register_buf() is called with the BQL held so mr lives at least
529 * until this function returns.
531 ram_block
= qemu_ram_block_from_host(host
, false, &fd_offset
);
533 fd
= qemu_ram_get_fd(ram_block
);
537 * Ideally every RAMBlock would have an fd. pc-bios and other
538 * things don't. Luckily they are usually not I/O buffers and we
539 * can just ignore them.
544 /* Make sure the fd covers the entire range */
545 end_block
= qemu_ram_block_from_host(host
+ size
- 1, false, &offset
);
546 if (ram_block
!= end_block
) {
547 error_setg(errp
, "registered buffer at %p with size %zu extends "
548 "beyond RAMBlock", host
, size
);
553 *region
= (struct blkio_mem_region
){
557 .fd_offset
= fd_offset
,
562 static bool blkio_register_buf(BlockDriverState
*bs
, void *host
, size_t size
,
565 BDRVBlkioState
*s
= bs
->opaque
;
566 struct blkio_mem_region region
;
567 BlkioMemRegionResult region_result
;
571 * Mapping memory regions conflicts with RAM discard (virtio-mem) when
572 * there is pinning, so only do it when necessary.
574 if (!s
->needs_mem_regions
&& s
->may_pin_mem_regions
) {
578 region_result
= blkio_mem_region_from_host(bs
, host
, size
, ®ion
, errp
);
579 if (region_result
== BMRR_SKIP
) {
581 } else if (region_result
!= BMRR_OK
) {
585 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
586 ret
= blkio_map_mem_region(s
->blkio
, ®ion
);
590 error_setg(errp
, "Failed to add blkio mem region %p with size %zu: %s",
591 host
, size
, blkio_get_error_msg());
597 static void blkio_unregister_buf(BlockDriverState
*bs
, void *host
, size_t size
)
599 BDRVBlkioState
*s
= bs
->opaque
;
600 struct blkio_mem_region region
;
602 /* See blkio_register_buf() */
603 if (!s
->needs_mem_regions
&& s
->may_pin_mem_regions
) {
607 if (blkio_mem_region_from_host(bs
, host
, size
, ®ion
, NULL
) != BMRR_OK
) {
611 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
612 blkio_unmap_mem_region(s
->blkio
, ®ion
);
616 static int blkio_io_uring_open(BlockDriverState
*bs
, QDict
*options
, int flags
,
619 const char *filename
= qdict_get_str(options
, "filename");
620 BDRVBlkioState
*s
= bs
->opaque
;
623 ret
= blkio_set_str(s
->blkio
, "path", filename
);
624 qdict_del(options
, "filename");
626 error_setg_errno(errp
, -ret
, "failed to set path: %s",
627 blkio_get_error_msg());
631 if (flags
& BDRV_O_NOCACHE
) {
632 ret
= blkio_set_bool(s
->blkio
, "direct", true);
634 error_setg_errno(errp
, -ret
, "failed to set direct: %s",
635 blkio_get_error_msg());
643 static int blkio_nvme_io_uring(BlockDriverState
*bs
, QDict
*options
, int flags
,
646 const char *path
= qdict_get_try_str(options
, "path");
647 BDRVBlkioState
*s
= bs
->opaque
;
651 error_setg(errp
, "missing 'path' option");
655 ret
= blkio_set_str(s
->blkio
, "path", path
);
656 qdict_del(options
, "path");
658 error_setg_errno(errp
, -ret
, "failed to set path: %s",
659 blkio_get_error_msg());
663 if (!(flags
& BDRV_O_NOCACHE
)) {
664 error_setg(errp
, "cache.direct=off is not supported");
671 static int blkio_virtio_blk_common_open(BlockDriverState
*bs
,
672 QDict
*options
, int flags
, Error
**errp
)
674 const char *path
= qdict_get_try_str(options
, "path");
675 BDRVBlkioState
*s
= bs
->opaque
;
676 bool fd_supported
= false;
680 error_setg(errp
, "missing 'path' option");
684 if (!(flags
& BDRV_O_NOCACHE
)) {
685 error_setg(errp
, "cache.direct=off is not supported");
689 if (blkio_get_int(s
->blkio
, "fd", &fd
) == 0) {
694 * If the libblkio driver supports fd passing, let's always use qemu_open()
695 * to open the `path`, so we can handle fd passing from the management
696 * layer through the "/dev/fdset/N" special path.
701 if (flags
& BDRV_O_RDWR
) {
704 open_flags
= O_RDONLY
;
707 fd
= qemu_open(path
, open_flags
, errp
);
712 ret
= blkio_set_int(s
->blkio
, "fd", fd
);
714 error_setg_errno(errp
, -ret
, "failed to set fd: %s",
715 blkio_get_error_msg());
720 ret
= blkio_set_str(s
->blkio
, "path", path
);
722 error_setg_errno(errp
, -ret
, "failed to set path: %s",
723 blkio_get_error_msg());
728 qdict_del(options
, "path");
733 static int blkio_file_open(BlockDriverState
*bs
, QDict
*options
, int flags
,
736 const char *blkio_driver
= bs
->drv
->protocol_name
;
737 BDRVBlkioState
*s
= bs
->opaque
;
740 ret
= blkio_create(blkio_driver
, &s
->blkio
);
742 error_setg_errno(errp
, -ret
, "blkio_create failed: %s",
743 blkio_get_error_msg());
747 if (strcmp(blkio_driver
, DRIVER_IO_URING
) == 0) {
748 ret
= blkio_io_uring_open(bs
, options
, flags
, errp
);
749 } else if (strcmp(blkio_driver
, DRIVER_NVME_IO_URING
) == 0) {
750 ret
= blkio_nvme_io_uring(bs
, options
, flags
, errp
);
751 } else if (strcmp(blkio_driver
, DRIVER_VIRTIO_BLK_VFIO_PCI
) == 0) {
752 ret
= blkio_virtio_blk_common_open(bs
, options
, flags
, errp
);
753 } else if (strcmp(blkio_driver
, DRIVER_VIRTIO_BLK_VHOST_USER
) == 0) {
754 ret
= blkio_virtio_blk_common_open(bs
, options
, flags
, errp
);
755 } else if (strcmp(blkio_driver
, DRIVER_VIRTIO_BLK_VHOST_VDPA
) == 0) {
756 ret
= blkio_virtio_blk_common_open(bs
, options
, flags
, errp
);
758 g_assert_not_reached();
761 blkio_destroy(&s
->blkio
);
765 if (!(flags
& BDRV_O_RDWR
)) {
766 ret
= blkio_set_bool(s
->blkio
, "read-only", true);
768 error_setg_errno(errp
, -ret
, "failed to set read-only: %s",
769 blkio_get_error_msg());
770 blkio_destroy(&s
->blkio
);
775 ret
= blkio_connect(s
->blkio
);
777 error_setg_errno(errp
, -ret
, "blkio_connect failed: %s",
778 blkio_get_error_msg());
779 blkio_destroy(&s
->blkio
);
783 ret
= blkio_get_bool(s
->blkio
,
785 &s
->needs_mem_regions
);
787 error_setg_errno(errp
, -ret
,
788 "failed to get needs-mem-regions: %s",
789 blkio_get_error_msg());
790 blkio_destroy(&s
->blkio
);
794 ret
= blkio_get_bool(s
->blkio
,
795 "needs-mem-region-fd",
796 &s
->needs_mem_region_fd
);
798 error_setg_errno(errp
, -ret
,
799 "failed to get needs-mem-region-fd: %s",
800 blkio_get_error_msg());
801 blkio_destroy(&s
->blkio
);
805 ret
= blkio_get_uint64(s
->blkio
,
806 "mem-region-alignment",
807 &s
->mem_region_alignment
);
809 error_setg_errno(errp
, -ret
,
810 "failed to get mem-region-alignment: %s",
811 blkio_get_error_msg());
812 blkio_destroy(&s
->blkio
);
816 ret
= blkio_get_bool(s
->blkio
,
817 "may-pin-mem-regions",
818 &s
->may_pin_mem_regions
);
820 /* Be conservative (assume pinning) if the property is not supported */
821 s
->may_pin_mem_regions
= s
->needs_mem_regions
;
825 * Notify if libblkio drivers pin memory and prevent features like
826 * virtio-mem from working.
828 if (s
->may_pin_mem_regions
) {
829 ret
= ram_block_discard_disable(true);
831 error_setg_errno(errp
, -ret
, "ram_block_discard_disable() failed");
832 blkio_destroy(&s
->blkio
);
837 ret
= blkio_start(s
->blkio
);
839 error_setg_errno(errp
, -ret
, "blkio_start failed: %s",
840 blkio_get_error_msg());
841 blkio_destroy(&s
->blkio
);
842 if (s
->may_pin_mem_regions
) {
843 ram_block_discard_disable(false);
848 bs
->supported_write_flags
= BDRV_REQ_FUA
| BDRV_REQ_REGISTERED_BUF
;
849 bs
->supported_zero_flags
= BDRV_REQ_FUA
| BDRV_REQ_MAY_UNMAP
|
850 BDRV_REQ_NO_FALLBACK
;
852 qemu_mutex_init(&s
->blkio_lock
);
853 qemu_co_mutex_init(&s
->bounce_lock
);
854 qemu_co_queue_init(&s
->bounce_available
);
855 QLIST_INIT(&s
->bounce_bufs
);
856 s
->blkioq
= blkio_get_queue(s
->blkio
, 0);
857 s
->completion_fd
= blkioq_get_completion_fd(s
->blkioq
);
859 blkio_attach_aio_context(bs
, bdrv_get_aio_context(bs
));
863 static void blkio_close(BlockDriverState
*bs
)
865 BDRVBlkioState
*s
= bs
->opaque
;
867 /* There is no destroy() API for s->bounce_lock */
869 qemu_mutex_destroy(&s
->blkio_lock
);
870 blkio_detach_aio_context(bs
);
871 blkio_destroy(&s
->blkio
);
873 if (s
->may_pin_mem_regions
) {
874 ram_block_discard_disable(false);
878 static int64_t coroutine_fn
blkio_co_getlength(BlockDriverState
*bs
)
880 BDRVBlkioState
*s
= bs
->opaque
;
884 WITH_QEMU_LOCK_GUARD(&s
->blkio_lock
) {
885 ret
= blkio_get_uint64(s
->blkio
, "capacity", &capacity
);
894 static int coroutine_fn
blkio_truncate(BlockDriverState
*bs
, int64_t offset
,
895 bool exact
, PreallocMode prealloc
,
896 BdrvRequestFlags flags
, Error
**errp
)
898 int64_t current_length
;
900 if (prealloc
!= PREALLOC_MODE_OFF
) {
901 error_setg(errp
, "Unsupported preallocation mode '%s'",
902 PreallocMode_str(prealloc
));
906 current_length
= blkio_co_getlength(bs
);
908 if (offset
> current_length
) {
909 error_setg(errp
, "Cannot grow device");
911 } else if (exact
&& offset
!= current_length
) {
912 error_setg(errp
, "Cannot resize device");
919 static int coroutine_fn
920 blkio_co_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
925 static void blkio_refresh_limits(BlockDriverState
*bs
, Error
**errp
)
927 BDRVBlkioState
*s
= bs
->opaque
;
928 QEMU_LOCK_GUARD(&s
->blkio_lock
);
932 ret
= blkio_get_int(s
->blkio
, "request-alignment", &value
);
934 error_setg_errno(errp
, -ret
, "failed to get \"request-alignment\": %s",
935 blkio_get_error_msg());
938 bs
->bl
.request_alignment
= value
;
939 if (bs
->bl
.request_alignment
< 1 ||
940 bs
->bl
.request_alignment
>= INT_MAX
||
941 !is_power_of_2(bs
->bl
.request_alignment
)) {
942 error_setg(errp
, "invalid \"request-alignment\" value %" PRIu32
", "
943 "must be a power of 2 less than INT_MAX",
944 bs
->bl
.request_alignment
);
948 ret
= blkio_get_int(s
->blkio
, "optimal-io-size", &value
);
950 error_setg_errno(errp
, -ret
, "failed to get \"optimal-io-size\": %s",
951 blkio_get_error_msg());
954 bs
->bl
.opt_transfer
= value
;
955 if (bs
->bl
.opt_transfer
> INT_MAX
||
956 (bs
->bl
.opt_transfer
% bs
->bl
.request_alignment
)) {
957 error_setg(errp
, "invalid \"optimal-io-size\" value %" PRIu32
", must "
958 "be a multiple of %" PRIu32
, bs
->bl
.opt_transfer
,
959 bs
->bl
.request_alignment
);
963 ret
= blkio_get_int(s
->blkio
, "max-transfer", &value
);
965 error_setg_errno(errp
, -ret
, "failed to get \"max-transfer\": %s",
966 blkio_get_error_msg());
969 bs
->bl
.max_transfer
= value
;
970 if ((bs
->bl
.max_transfer
% bs
->bl
.request_alignment
) ||
971 (bs
->bl
.opt_transfer
&& (bs
->bl
.max_transfer
% bs
->bl
.opt_transfer
))) {
972 error_setg(errp
, "invalid \"max-transfer\" value %" PRIu32
", must be "
973 "a multiple of %" PRIu32
" and %" PRIu32
" (if non-zero)",
974 bs
->bl
.max_transfer
, bs
->bl
.request_alignment
,
975 bs
->bl
.opt_transfer
);
979 ret
= blkio_get_int(s
->blkio
, "buf-alignment", &value
);
981 error_setg_errno(errp
, -ret
, "failed to get \"buf-alignment\": %s",
982 blkio_get_error_msg());
986 error_setg(errp
, "invalid \"buf-alignment\" value %d, must be "
990 bs
->bl
.min_mem_alignment
= value
;
992 ret
= blkio_get_int(s
->blkio
, "optimal-buf-alignment", &value
);
994 error_setg_errno(errp
, -ret
,
995 "failed to get \"optimal-buf-alignment\": %s",
996 blkio_get_error_msg());
1000 error_setg(errp
, "invalid \"optimal-buf-alignment\" value %d, "
1001 "must be positive", value
);
1004 bs
->bl
.opt_mem_alignment
= value
;
1006 ret
= blkio_get_int(s
->blkio
, "max-segments", &value
);
1008 error_setg_errno(errp
, -ret
, "failed to get \"max-segments\": %s",
1009 blkio_get_error_msg());
1013 error_setg(errp
, "invalid \"max-segments\" value %d, must be positive",
1017 bs
->bl
.max_iov
= value
;
1022 * Missing libblkio APIs:
1024 * - co_invalidate_cache
1031 #define BLKIO_DRIVER(name, ...) \
1033 .format_name = name, \
1034 .protocol_name = name, \
1035 .instance_size = sizeof(BDRVBlkioState), \
1036 .bdrv_file_open = blkio_file_open, \
1037 .bdrv_close = blkio_close, \
1038 .bdrv_co_getlength = blkio_co_getlength, \
1039 .bdrv_co_truncate = blkio_truncate, \
1040 .bdrv_co_get_info = blkio_co_get_info, \
1041 .bdrv_attach_aio_context = blkio_attach_aio_context, \
1042 .bdrv_detach_aio_context = blkio_detach_aio_context, \
1043 .bdrv_co_pdiscard = blkio_co_pdiscard, \
1044 .bdrv_co_preadv = blkio_co_preadv, \
1045 .bdrv_co_pwritev = blkio_co_pwritev, \
1046 .bdrv_co_flush_to_disk = blkio_co_flush, \
1047 .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
1048 .bdrv_refresh_limits = blkio_refresh_limits, \
1049 .bdrv_register_buf = blkio_register_buf, \
1050 .bdrv_unregister_buf = blkio_unregister_buf, \
1054 static BlockDriver bdrv_io_uring
= BLKIO_DRIVER(
1056 .bdrv_needs_filename
= true,
1059 static BlockDriver bdrv_nvme_io_uring
= BLKIO_DRIVER(
1060 DRIVER_NVME_IO_URING
,
1063 static BlockDriver bdrv_virtio_blk_vfio_pci
= BLKIO_DRIVER(
1064 DRIVER_VIRTIO_BLK_VFIO_PCI
1067 static BlockDriver bdrv_virtio_blk_vhost_user
= BLKIO_DRIVER(
1068 DRIVER_VIRTIO_BLK_VHOST_USER
1071 static BlockDriver bdrv_virtio_blk_vhost_vdpa
= BLKIO_DRIVER(
1072 DRIVER_VIRTIO_BLK_VHOST_VDPA
1075 static void bdrv_blkio_init(void)
1077 bdrv_register(&bdrv_io_uring
);
1078 bdrv_register(&bdrv_nvme_io_uring
);
1079 bdrv_register(&bdrv_virtio_blk_vfio_pci
);
1080 bdrv_register(&bdrv_virtio_blk_vhost_user
);
1081 bdrv_register(&bdrv_virtio_blk_vhost_vdpa
);
1084 block_init(bdrv_blkio_init
);