2 * Export QEMU block device via VDUSE
4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
7 * Xie Yongji <xieyongji@bytedance.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include <sys/eventfd.h>
16 #include "qapi/error.h"
17 #include "block/export.h"
18 #include "qemu/error-report.h"
19 #include "util/block-helpers.h"
20 #include "subprojects/libvduse/libvduse.h"
21 #include "virtio-blk-handler.h"
23 #include "standard-headers/linux/virtio_blk.h"
25 #define VDUSE_DEFAULT_NUM_QUEUE 1
26 #define VDUSE_DEFAULT_QUEUE_SIZE 256
28 typedef struct VduseBlkExport
{
30 VirtioBlkHandler handler
;
34 unsigned int inflight
;
37 typedef struct VduseBlkReq
{
38 VduseVirtqElement elem
;
42 static void vduse_blk_inflight_inc(VduseBlkExport
*vblk_exp
)
47 static void vduse_blk_inflight_dec(VduseBlkExport
*vblk_exp
)
49 if (--vblk_exp
->inflight
== 0) {
54 static void vduse_blk_req_complete(VduseBlkReq
*req
, size_t in_len
)
56 vduse_queue_push(req
->vq
, &req
->elem
, in_len
);
57 vduse_queue_notify(req
->vq
);
62 static void coroutine_fn
vduse_blk_virtio_process_req(void *opaque
)
64 VduseBlkReq
*req
= opaque
;
65 VduseVirtq
*vq
= req
->vq
;
66 VduseDev
*dev
= vduse_queue_get_dev(vq
);
67 VduseBlkExport
*vblk_exp
= vduse_dev_get_priv(dev
);
68 VirtioBlkHandler
*handler
= &vblk_exp
->handler
;
69 VduseVirtqElement
*elem
= &req
->elem
;
70 struct iovec
*in_iov
= elem
->in_sg
;
71 struct iovec
*out_iov
= elem
->out_sg
;
72 unsigned in_num
= elem
->in_num
;
73 unsigned out_num
= elem
->out_num
;
76 in_len
= virtio_blk_process_req(handler
, in_iov
,
77 out_iov
, in_num
, out_num
);
83 vduse_blk_req_complete(req
, in_len
);
84 vduse_blk_inflight_dec(vblk_exp
);
87 static void vduse_blk_vq_handler(VduseDev
*dev
, VduseVirtq
*vq
)
89 VduseBlkExport
*vblk_exp
= vduse_dev_get_priv(dev
);
94 req
= vduse_queue_pop(vq
, sizeof(VduseBlkReq
));
101 qemu_coroutine_create(vduse_blk_virtio_process_req
, req
);
103 vduse_blk_inflight_inc(vblk_exp
);
104 qemu_coroutine_enter(co
);
108 static void on_vduse_vq_kick(void *opaque
)
110 VduseVirtq
*vq
= opaque
;
111 VduseDev
*dev
= vduse_queue_get_dev(vq
);
112 int fd
= vduse_queue_get_fd(vq
);
115 if (eventfd_read(fd
, &kick_data
) == -1) {
116 error_report("failed to read data from eventfd");
120 vduse_blk_vq_handler(dev
, vq
);
123 static void vduse_blk_enable_queue(VduseDev
*dev
, VduseVirtq
*vq
)
125 VduseBlkExport
*vblk_exp
= vduse_dev_get_priv(dev
);
127 aio_set_fd_handler(vblk_exp
->export
.ctx
, vduse_queue_get_fd(vq
),
128 true, on_vduse_vq_kick
, NULL
, NULL
, NULL
, vq
);
129 /* Make sure we don't miss any kick afer reconnecting */
130 eventfd_write(vduse_queue_get_fd(vq
), 1);
133 static void vduse_blk_disable_queue(VduseDev
*dev
, VduseVirtq
*vq
)
135 VduseBlkExport
*vblk_exp
= vduse_dev_get_priv(dev
);
137 aio_set_fd_handler(vblk_exp
->export
.ctx
, vduse_queue_get_fd(vq
),
138 true, NULL
, NULL
, NULL
, NULL
, NULL
);
141 static const VduseOps vduse_blk_ops
= {
142 .enable_queue
= vduse_blk_enable_queue
,
143 .disable_queue
= vduse_blk_disable_queue
,
146 static void on_vduse_dev_kick(void *opaque
)
148 VduseDev
*dev
= opaque
;
150 vduse_dev_handler(dev
);
153 static void vduse_blk_attach_ctx(VduseBlkExport
*vblk_exp
, AioContext
*ctx
)
157 aio_set_fd_handler(vblk_exp
->export
.ctx
, vduse_dev_get_fd(vblk_exp
->dev
),
158 true, on_vduse_dev_kick
, NULL
, NULL
, NULL
,
161 for (i
= 0; i
< vblk_exp
->num_queues
; i
++) {
162 VduseVirtq
*vq
= vduse_dev_get_queue(vblk_exp
->dev
, i
);
163 int fd
= vduse_queue_get_fd(vq
);
168 aio_set_fd_handler(vblk_exp
->export
.ctx
, fd
, true,
169 on_vduse_vq_kick
, NULL
, NULL
, NULL
, vq
);
173 static void vduse_blk_detach_ctx(VduseBlkExport
*vblk_exp
)
177 for (i
= 0; i
< vblk_exp
->num_queues
; i
++) {
178 VduseVirtq
*vq
= vduse_dev_get_queue(vblk_exp
->dev
, i
);
179 int fd
= vduse_queue_get_fd(vq
);
184 aio_set_fd_handler(vblk_exp
->export
.ctx
, fd
,
185 true, NULL
, NULL
, NULL
, NULL
, NULL
);
187 aio_set_fd_handler(vblk_exp
->export
.ctx
, vduse_dev_get_fd(vblk_exp
->dev
),
188 true, NULL
, NULL
, NULL
, NULL
, NULL
);
190 AIO_WAIT_WHILE(vblk_exp
->export
.ctx
, vblk_exp
->inflight
> 0);
194 static void blk_aio_attached(AioContext
*ctx
, void *opaque
)
196 VduseBlkExport
*vblk_exp
= opaque
;
198 vblk_exp
->export
.ctx
= ctx
;
199 vduse_blk_attach_ctx(vblk_exp
, ctx
);
202 static void blk_aio_detach(void *opaque
)
204 VduseBlkExport
*vblk_exp
= opaque
;
206 vduse_blk_detach_ctx(vblk_exp
);
207 vblk_exp
->export
.ctx
= NULL
;
210 static void vduse_blk_resize(void *opaque
)
212 BlockExport
*exp
= opaque
;
213 VduseBlkExport
*vblk_exp
= container_of(exp
, VduseBlkExport
, export
);
214 struct virtio_blk_config config
;
217 cpu_to_le64(blk_getlength(exp
->blk
) >> VIRTIO_BLK_SECTOR_BITS
);
218 vduse_dev_update_config(vblk_exp
->dev
, sizeof(config
.capacity
),
219 offsetof(struct virtio_blk_config
, capacity
),
220 (char *)&config
.capacity
);
223 static const BlockDevOps vduse_block_ops
= {
224 .resize_cb
= vduse_blk_resize
,
227 static int vduse_blk_exp_create(BlockExport
*exp
, BlockExportOptions
*opts
,
230 VduseBlkExport
*vblk_exp
= container_of(exp
, VduseBlkExport
, export
);
231 BlockExportOptionsVduseBlk
*vblk_opts
= &opts
->u
.vduse_blk
;
232 uint64_t logical_block_size
= VIRTIO_BLK_SECTOR_SIZE
;
233 uint16_t num_queues
= VDUSE_DEFAULT_NUM_QUEUE
;
234 uint16_t queue_size
= VDUSE_DEFAULT_QUEUE_SIZE
;
235 Error
*local_err
= NULL
;
236 struct virtio_blk_config config
= { 0 };
240 if (vblk_opts
->has_num_queues
) {
241 num_queues
= vblk_opts
->num_queues
;
242 if (num_queues
== 0) {
243 error_setg(errp
, "num-queues must be greater than 0");
248 if (vblk_opts
->has_queue_size
) {
249 queue_size
= vblk_opts
->queue_size
;
250 if (queue_size
<= 2 || !is_power_of_2(queue_size
) ||
251 queue_size
> VIRTQUEUE_MAX_SIZE
) {
252 error_setg(errp
, "queue-size is invalid");
257 if (vblk_opts
->has_logical_block_size
) {
258 logical_block_size
= vblk_opts
->logical_block_size
;
259 check_block_size(exp
->id
, "logical-block-size", logical_block_size
,
262 error_propagate(errp
, local_err
);
266 vblk_exp
->num_queues
= num_queues
;
267 vblk_exp
->handler
.blk
= exp
->blk
;
268 vblk_exp
->handler
.serial
= g_strdup(vblk_opts
->serial
?: "");
269 vblk_exp
->handler
.logical_block_size
= logical_block_size
;
270 vblk_exp
->handler
.writable
= opts
->writable
;
273 cpu_to_le64(blk_getlength(exp
->blk
) >> VIRTIO_BLK_SECTOR_BITS
);
274 config
.seg_max
= cpu_to_le32(queue_size
- 2);
275 config
.min_io_size
= cpu_to_le16(1);
276 config
.opt_io_size
= cpu_to_le32(1);
277 config
.num_queues
= cpu_to_le16(num_queues
);
278 config
.blk_size
= cpu_to_le32(logical_block_size
);
279 config
.max_discard_sectors
= cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS
);
280 config
.max_discard_seg
= cpu_to_le32(1);
281 config
.discard_sector_alignment
=
282 cpu_to_le32(logical_block_size
>> VIRTIO_BLK_SECTOR_BITS
);
283 config
.max_write_zeroes_sectors
=
284 cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS
);
285 config
.max_write_zeroes_seg
= cpu_to_le32(1);
287 features
= vduse_get_virtio_features() |
288 (1ULL << VIRTIO_BLK_F_SEG_MAX
) |
289 (1ULL << VIRTIO_BLK_F_TOPOLOGY
) |
290 (1ULL << VIRTIO_BLK_F_BLK_SIZE
) |
291 (1ULL << VIRTIO_BLK_F_FLUSH
) |
292 (1ULL << VIRTIO_BLK_F_DISCARD
) |
293 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES
);
295 if (num_queues
> 1) {
296 features
|= 1ULL << VIRTIO_BLK_F_MQ
;
298 if (!opts
->writable
) {
299 features
|= 1ULL << VIRTIO_BLK_F_RO
;
302 vblk_exp
->dev
= vduse_dev_create(vblk_opts
->name
, VIRTIO_ID_BLOCK
, 0,
303 features
, num_queues
,
304 sizeof(struct virtio_blk_config
),
305 (char *)&config
, &vduse_blk_ops
,
307 if (!vblk_exp
->dev
) {
308 error_setg(errp
, "failed to create vduse device");
313 vblk_exp
->recon_file
= g_strdup_printf("%s/vduse-blk-%s",
314 g_get_tmp_dir(), vblk_opts
->name
);
315 if (vduse_set_reconnect_log_file(vblk_exp
->dev
, vblk_exp
->recon_file
)) {
316 error_setg(errp
, "failed to set reconnect log file");
321 for (i
= 0; i
< num_queues
; i
++) {
322 vduse_dev_setup_queue(vblk_exp
->dev
, i
, queue_size
);
325 aio_set_fd_handler(exp
->ctx
, vduse_dev_get_fd(vblk_exp
->dev
), true,
326 on_vduse_dev_kick
, NULL
, NULL
, NULL
, vblk_exp
->dev
);
328 blk_add_aio_context_notifier(exp
->blk
, blk_aio_attached
, blk_aio_detach
,
331 blk_set_dev_ops(exp
->blk
, &vduse_block_ops
, exp
);
335 vduse_dev_destroy(vblk_exp
->dev
);
336 g_free(vblk_exp
->recon_file
);
338 g_free(vblk_exp
->handler
.serial
);
342 static void vduse_blk_exp_delete(BlockExport
*exp
)
344 VduseBlkExport
*vblk_exp
= container_of(exp
, VduseBlkExport
, export
);
347 blk_remove_aio_context_notifier(exp
->blk
, blk_aio_attached
, blk_aio_detach
,
349 blk_set_dev_ops(exp
->blk
, NULL
, NULL
);
350 ret
= vduse_dev_destroy(vblk_exp
->dev
);
352 unlink(vblk_exp
->recon_file
);
354 g_free(vblk_exp
->recon_file
);
355 g_free(vblk_exp
->handler
.serial
);
358 static void vduse_blk_exp_request_shutdown(BlockExport
*exp
)
360 VduseBlkExport
*vblk_exp
= container_of(exp
, VduseBlkExport
, export
);
362 aio_context_acquire(vblk_exp
->export
.ctx
);
363 vduse_blk_detach_ctx(vblk_exp
);
364 aio_context_acquire(vblk_exp
->export
.ctx
);
367 const BlockExportDriver blk_exp_vduse_blk
= {
368 .type
= BLOCK_EXPORT_TYPE_VDUSE_BLK
,
369 .instance_size
= sizeof(VduseBlkExport
),
370 .create
= vduse_blk_exp_create
,
371 .delete = vduse_blk_exp_delete
,
372 .request_shutdown
= vduse_blk_exp_request_shutdown
,