2 * Export QEMU block device via VDUSE
4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
7 * Xie Yongji <xieyongji@bytedance.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
13 #include <sys/eventfd.h>
15 #include "qemu/osdep.h"
16 #include "qapi/error.h"
17 #include "block/export.h"
18 #include "qemu/error-report.h"
19 #include "util/block-helpers.h"
20 #include "subprojects/libvduse/libvduse.h"
21 #include "virtio-blk-handler.h"
23 #include "standard-headers/linux/virtio_blk.h"
25 #define VDUSE_DEFAULT_NUM_QUEUE 1
26 #define VDUSE_DEFAULT_QUEUE_SIZE 256
28 typedef struct VduseBlkExport
{
30 VirtioBlkHandler handler
;
33 unsigned int inflight
;
36 typedef struct VduseBlkReq
{
37 VduseVirtqElement elem
;
41 static void vduse_blk_inflight_inc(VduseBlkExport
*vblk_exp
)
46 static void vduse_blk_inflight_dec(VduseBlkExport
*vblk_exp
)
48 if (--vblk_exp
->inflight
== 0) {
53 static void vduse_blk_req_complete(VduseBlkReq
*req
, size_t in_len
)
55 vduse_queue_push(req
->vq
, &req
->elem
, in_len
);
56 vduse_queue_notify(req
->vq
);
61 static void coroutine_fn
vduse_blk_virtio_process_req(void *opaque
)
63 VduseBlkReq
*req
= opaque
;
64 VduseVirtq
*vq
= req
->vq
;
65 VduseDev
*dev
= vduse_queue_get_dev(vq
);
66 VduseBlkExport
*vblk_exp
= vduse_dev_get_priv(dev
);
67 VirtioBlkHandler
*handler
= &vblk_exp
->handler
;
68 VduseVirtqElement
*elem
= &req
->elem
;
69 struct iovec
*in_iov
= elem
->in_sg
;
70 struct iovec
*out_iov
= elem
->out_sg
;
71 unsigned in_num
= elem
->in_num
;
72 unsigned out_num
= elem
->out_num
;
75 in_len
= virtio_blk_process_req(handler
, in_iov
,
76 out_iov
, in_num
, out_num
);
82 vduse_blk_req_complete(req
, in_len
);
83 vduse_blk_inflight_dec(vblk_exp
);
86 static void vduse_blk_vq_handler(VduseDev
*dev
, VduseVirtq
*vq
)
88 VduseBlkExport
*vblk_exp
= vduse_dev_get_priv(dev
);
93 req
= vduse_queue_pop(vq
, sizeof(VduseBlkReq
));
100 qemu_coroutine_create(vduse_blk_virtio_process_req
, req
);
102 vduse_blk_inflight_inc(vblk_exp
);
103 qemu_coroutine_enter(co
);
107 static void on_vduse_vq_kick(void *opaque
)
109 VduseVirtq
*vq
= opaque
;
110 VduseDev
*dev
= vduse_queue_get_dev(vq
);
111 int fd
= vduse_queue_get_fd(vq
);
114 if (eventfd_read(fd
, &kick_data
) == -1) {
115 error_report("failed to read data from eventfd");
119 vduse_blk_vq_handler(dev
, vq
);
122 static void vduse_blk_enable_queue(VduseDev
*dev
, VduseVirtq
*vq
)
124 VduseBlkExport
*vblk_exp
= vduse_dev_get_priv(dev
);
126 aio_set_fd_handler(vblk_exp
->export
.ctx
, vduse_queue_get_fd(vq
),
127 true, on_vduse_vq_kick
, NULL
, NULL
, NULL
, vq
);
130 static void vduse_blk_disable_queue(VduseDev
*dev
, VduseVirtq
*vq
)
132 VduseBlkExport
*vblk_exp
= vduse_dev_get_priv(dev
);
134 aio_set_fd_handler(vblk_exp
->export
.ctx
, vduse_queue_get_fd(vq
),
135 true, NULL
, NULL
, NULL
, NULL
, NULL
);
138 static const VduseOps vduse_blk_ops
= {
139 .enable_queue
= vduse_blk_enable_queue
,
140 .disable_queue
= vduse_blk_disable_queue
,
143 static void on_vduse_dev_kick(void *opaque
)
145 VduseDev
*dev
= opaque
;
147 vduse_dev_handler(dev
);
150 static void vduse_blk_attach_ctx(VduseBlkExport
*vblk_exp
, AioContext
*ctx
)
154 aio_set_fd_handler(vblk_exp
->export
.ctx
, vduse_dev_get_fd(vblk_exp
->dev
),
155 true, on_vduse_dev_kick
, NULL
, NULL
, NULL
,
158 for (i
= 0; i
< vblk_exp
->num_queues
; i
++) {
159 VduseVirtq
*vq
= vduse_dev_get_queue(vblk_exp
->dev
, i
);
160 int fd
= vduse_queue_get_fd(vq
);
165 aio_set_fd_handler(vblk_exp
->export
.ctx
, fd
, true,
166 on_vduse_vq_kick
, NULL
, NULL
, NULL
, vq
);
170 static void vduse_blk_detach_ctx(VduseBlkExport
*vblk_exp
)
174 for (i
= 0; i
< vblk_exp
->num_queues
; i
++) {
175 VduseVirtq
*vq
= vduse_dev_get_queue(vblk_exp
->dev
, i
);
176 int fd
= vduse_queue_get_fd(vq
);
181 aio_set_fd_handler(vblk_exp
->export
.ctx
, fd
,
182 true, NULL
, NULL
, NULL
, NULL
, NULL
);
184 aio_set_fd_handler(vblk_exp
->export
.ctx
, vduse_dev_get_fd(vblk_exp
->dev
),
185 true, NULL
, NULL
, NULL
, NULL
, NULL
);
187 AIO_WAIT_WHILE(vblk_exp
->export
.ctx
, vblk_exp
->inflight
> 0);
191 static void blk_aio_attached(AioContext
*ctx
, void *opaque
)
193 VduseBlkExport
*vblk_exp
= opaque
;
195 vblk_exp
->export
.ctx
= ctx
;
196 vduse_blk_attach_ctx(vblk_exp
, ctx
);
199 static void blk_aio_detach(void *opaque
)
201 VduseBlkExport
*vblk_exp
= opaque
;
203 vduse_blk_detach_ctx(vblk_exp
);
204 vblk_exp
->export
.ctx
= NULL
;
207 static void vduse_blk_resize(void *opaque
)
209 BlockExport
*exp
= opaque
;
210 VduseBlkExport
*vblk_exp
= container_of(exp
, VduseBlkExport
, export
);
211 struct virtio_blk_config config
;
214 cpu_to_le64(blk_getlength(exp
->blk
) >> VIRTIO_BLK_SECTOR_BITS
);
215 vduse_dev_update_config(vblk_exp
->dev
, sizeof(config
.capacity
),
216 offsetof(struct virtio_blk_config
, capacity
),
217 (char *)&config
.capacity
);
220 static const BlockDevOps vduse_block_ops
= {
221 .resize_cb
= vduse_blk_resize
,
224 static int vduse_blk_exp_create(BlockExport
*exp
, BlockExportOptions
*opts
,
227 VduseBlkExport
*vblk_exp
= container_of(exp
, VduseBlkExport
, export
);
228 BlockExportOptionsVduseBlk
*vblk_opts
= &opts
->u
.vduse_blk
;
229 uint64_t logical_block_size
= VIRTIO_BLK_SECTOR_SIZE
;
230 uint16_t num_queues
= VDUSE_DEFAULT_NUM_QUEUE
;
231 uint16_t queue_size
= VDUSE_DEFAULT_QUEUE_SIZE
;
232 Error
*local_err
= NULL
;
233 struct virtio_blk_config config
= { 0 };
237 if (vblk_opts
->has_num_queues
) {
238 num_queues
= vblk_opts
->num_queues
;
239 if (num_queues
== 0) {
240 error_setg(errp
, "num-queues must be greater than 0");
245 if (vblk_opts
->has_queue_size
) {
246 queue_size
= vblk_opts
->queue_size
;
247 if (queue_size
<= 2 || !is_power_of_2(queue_size
) ||
248 queue_size
> VIRTQUEUE_MAX_SIZE
) {
249 error_setg(errp
, "queue-size is invalid");
254 if (vblk_opts
->has_logical_block_size
) {
255 logical_block_size
= vblk_opts
->logical_block_size
;
256 check_block_size(exp
->id
, "logical-block-size", logical_block_size
,
259 error_propagate(errp
, local_err
);
263 vblk_exp
->num_queues
= num_queues
;
264 vblk_exp
->handler
.blk
= exp
->blk
;
265 vblk_exp
->handler
.serial
= exp
->id
;
266 vblk_exp
->handler
.logical_block_size
= logical_block_size
;
267 vblk_exp
->handler
.writable
= opts
->writable
;
270 cpu_to_le64(blk_getlength(exp
->blk
) >> VIRTIO_BLK_SECTOR_BITS
);
271 config
.seg_max
= cpu_to_le32(queue_size
- 2);
272 config
.min_io_size
= cpu_to_le16(1);
273 config
.opt_io_size
= cpu_to_le32(1);
274 config
.num_queues
= cpu_to_le16(num_queues
);
275 config
.blk_size
= cpu_to_le32(logical_block_size
);
276 config
.max_discard_sectors
= cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS
);
277 config
.max_discard_seg
= cpu_to_le32(1);
278 config
.discard_sector_alignment
=
279 cpu_to_le32(logical_block_size
>> VIRTIO_BLK_SECTOR_BITS
);
280 config
.max_write_zeroes_sectors
=
281 cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS
);
282 config
.max_write_zeroes_seg
= cpu_to_le32(1);
284 features
= vduse_get_virtio_features() |
285 (1ULL << VIRTIO_BLK_F_SEG_MAX
) |
286 (1ULL << VIRTIO_BLK_F_TOPOLOGY
) |
287 (1ULL << VIRTIO_BLK_F_BLK_SIZE
) |
288 (1ULL << VIRTIO_BLK_F_FLUSH
) |
289 (1ULL << VIRTIO_BLK_F_DISCARD
) |
290 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES
);
292 if (num_queues
> 1) {
293 features
|= 1ULL << VIRTIO_BLK_F_MQ
;
295 if (!opts
->writable
) {
296 features
|= 1ULL << VIRTIO_BLK_F_RO
;
299 vblk_exp
->dev
= vduse_dev_create(exp
->id
, VIRTIO_ID_BLOCK
, 0,
300 features
, num_queues
,
301 sizeof(struct virtio_blk_config
),
302 (char *)&config
, &vduse_blk_ops
,
304 if (!vblk_exp
->dev
) {
305 error_setg(errp
, "failed to create vduse device");
309 for (i
= 0; i
< num_queues
; i
++) {
310 vduse_dev_setup_queue(vblk_exp
->dev
, i
, queue_size
);
313 aio_set_fd_handler(exp
->ctx
, vduse_dev_get_fd(vblk_exp
->dev
), true,
314 on_vduse_dev_kick
, NULL
, NULL
, NULL
, vblk_exp
->dev
);
316 blk_add_aio_context_notifier(exp
->blk
, blk_aio_attached
, blk_aio_detach
,
319 blk_set_dev_ops(exp
->blk
, &vduse_block_ops
, exp
);
324 static void vduse_blk_exp_delete(BlockExport
*exp
)
326 VduseBlkExport
*vblk_exp
= container_of(exp
, VduseBlkExport
, export
);
328 blk_remove_aio_context_notifier(exp
->blk
, blk_aio_attached
, blk_aio_detach
,
330 blk_set_dev_ops(exp
->blk
, NULL
, NULL
);
331 vduse_dev_destroy(vblk_exp
->dev
);
334 static void vduse_blk_exp_request_shutdown(BlockExport
*exp
)
336 VduseBlkExport
*vblk_exp
= container_of(exp
, VduseBlkExport
, export
);
338 aio_context_acquire(vblk_exp
->export
.ctx
);
339 vduse_blk_detach_ctx(vblk_exp
);
340 aio_context_acquire(vblk_exp
->export
.ctx
);
343 const BlockExportDriver blk_exp_vduse_blk
= {
344 .type
= BLOCK_EXPORT_TYPE_VDUSE_BLK
,
345 .instance_size
= sizeof(VduseBlkExport
),
346 .create
= vduse_blk_exp_create
,
347 .delete = vduse_blk_exp_delete
,
348 .request_shutdown
= vduse_blk_exp_request_shutdown
,