vduse-blk: Add vduse-blk resize support
[qemu.git] / block / export / vduse-blk.c
blobcab190423437fc0ded7b57a98dd7ac750ecdf741
1 /*
2 * Export QEMU block device via VDUSE
4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
6 * Author:
7 * Xie Yongji <xieyongji@bytedance.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
13 #include <sys/eventfd.h>
15 #include "qemu/osdep.h"
16 #include "qapi/error.h"
17 #include "block/export.h"
18 #include "qemu/error-report.h"
19 #include "util/block-helpers.h"
20 #include "subprojects/libvduse/libvduse.h"
21 #include "virtio-blk-handler.h"
23 #include "standard-headers/linux/virtio_blk.h"
25 #define VDUSE_DEFAULT_NUM_QUEUE 1
26 #define VDUSE_DEFAULT_QUEUE_SIZE 256
28 typedef struct VduseBlkExport {
29 BlockExport export;
30 VirtioBlkHandler handler;
31 VduseDev *dev;
32 uint16_t num_queues;
33 unsigned int inflight;
34 } VduseBlkExport;
36 typedef struct VduseBlkReq {
37 VduseVirtqElement elem;
38 VduseVirtq *vq;
39 } VduseBlkReq;
41 static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
43 vblk_exp->inflight++;
46 static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
48 if (--vblk_exp->inflight == 0) {
49 aio_wait_kick();
53 static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
55 vduse_queue_push(req->vq, &req->elem, in_len);
56 vduse_queue_notify(req->vq);
58 free(req);
61 static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
63 VduseBlkReq *req = opaque;
64 VduseVirtq *vq = req->vq;
65 VduseDev *dev = vduse_queue_get_dev(vq);
66 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
67 VirtioBlkHandler *handler = &vblk_exp->handler;
68 VduseVirtqElement *elem = &req->elem;
69 struct iovec *in_iov = elem->in_sg;
70 struct iovec *out_iov = elem->out_sg;
71 unsigned in_num = elem->in_num;
72 unsigned out_num = elem->out_num;
73 int in_len;
75 in_len = virtio_blk_process_req(handler, in_iov,
76 out_iov, in_num, out_num);
77 if (in_len < 0) {
78 free(req);
79 return;
82 vduse_blk_req_complete(req, in_len);
83 vduse_blk_inflight_dec(vblk_exp);
86 static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
88 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
90 while (1) {
91 VduseBlkReq *req;
93 req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
94 if (!req) {
95 break;
97 req->vq = vq;
99 Coroutine *co =
100 qemu_coroutine_create(vduse_blk_virtio_process_req, req);
102 vduse_blk_inflight_inc(vblk_exp);
103 qemu_coroutine_enter(co);
107 static void on_vduse_vq_kick(void *opaque)
109 VduseVirtq *vq = opaque;
110 VduseDev *dev = vduse_queue_get_dev(vq);
111 int fd = vduse_queue_get_fd(vq);
112 eventfd_t kick_data;
114 if (eventfd_read(fd, &kick_data) == -1) {
115 error_report("failed to read data from eventfd");
116 return;
119 vduse_blk_vq_handler(dev, vq);
122 static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
124 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
126 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
127 true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
130 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
132 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
134 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
135 true, NULL, NULL, NULL, NULL, NULL);
138 static const VduseOps vduse_blk_ops = {
139 .enable_queue = vduse_blk_enable_queue,
140 .disable_queue = vduse_blk_disable_queue,
143 static void on_vduse_dev_kick(void *opaque)
145 VduseDev *dev = opaque;
147 vduse_dev_handler(dev);
150 static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
152 int i;
154 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
155 true, on_vduse_dev_kick, NULL, NULL, NULL,
156 vblk_exp->dev);
158 for (i = 0; i < vblk_exp->num_queues; i++) {
159 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
160 int fd = vduse_queue_get_fd(vq);
162 if (fd < 0) {
163 continue;
165 aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
166 on_vduse_vq_kick, NULL, NULL, NULL, vq);
170 static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
172 int i;
174 for (i = 0; i < vblk_exp->num_queues; i++) {
175 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
176 int fd = vduse_queue_get_fd(vq);
178 if (fd < 0) {
179 continue;
181 aio_set_fd_handler(vblk_exp->export.ctx, fd,
182 true, NULL, NULL, NULL, NULL, NULL);
184 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
185 true, NULL, NULL, NULL, NULL, NULL);
187 AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
191 static void blk_aio_attached(AioContext *ctx, void *opaque)
193 VduseBlkExport *vblk_exp = opaque;
195 vblk_exp->export.ctx = ctx;
196 vduse_blk_attach_ctx(vblk_exp, ctx);
199 static void blk_aio_detach(void *opaque)
201 VduseBlkExport *vblk_exp = opaque;
203 vduse_blk_detach_ctx(vblk_exp);
204 vblk_exp->export.ctx = NULL;
207 static void vduse_blk_resize(void *opaque)
209 BlockExport *exp = opaque;
210 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
211 struct virtio_blk_config config;
213 config.capacity =
214 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
215 vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
216 offsetof(struct virtio_blk_config, capacity),
217 (char *)&config.capacity);
220 static const BlockDevOps vduse_block_ops = {
221 .resize_cb = vduse_blk_resize,
224 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
225 Error **errp)
227 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
228 BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
229 uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
230 uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
231 uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
232 Error *local_err = NULL;
233 struct virtio_blk_config config = { 0 };
234 uint64_t features;
235 int i;
237 if (vblk_opts->has_num_queues) {
238 num_queues = vblk_opts->num_queues;
239 if (num_queues == 0) {
240 error_setg(errp, "num-queues must be greater than 0");
241 return -EINVAL;
245 if (vblk_opts->has_queue_size) {
246 queue_size = vblk_opts->queue_size;
247 if (queue_size <= 2 || !is_power_of_2(queue_size) ||
248 queue_size > VIRTQUEUE_MAX_SIZE) {
249 error_setg(errp, "queue-size is invalid");
250 return -EINVAL;
254 if (vblk_opts->has_logical_block_size) {
255 logical_block_size = vblk_opts->logical_block_size;
256 check_block_size(exp->id, "logical-block-size", logical_block_size,
257 &local_err);
258 if (local_err) {
259 error_propagate(errp, local_err);
260 return -EINVAL;
263 vblk_exp->num_queues = num_queues;
264 vblk_exp->handler.blk = exp->blk;
265 vblk_exp->handler.serial = exp->id;
266 vblk_exp->handler.logical_block_size = logical_block_size;
267 vblk_exp->handler.writable = opts->writable;
269 config.capacity =
270 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
271 config.seg_max = cpu_to_le32(queue_size - 2);
272 config.min_io_size = cpu_to_le16(1);
273 config.opt_io_size = cpu_to_le32(1);
274 config.num_queues = cpu_to_le16(num_queues);
275 config.blk_size = cpu_to_le32(logical_block_size);
276 config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
277 config.max_discard_seg = cpu_to_le32(1);
278 config.discard_sector_alignment =
279 cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
280 config.max_write_zeroes_sectors =
281 cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
282 config.max_write_zeroes_seg = cpu_to_le32(1);
284 features = vduse_get_virtio_features() |
285 (1ULL << VIRTIO_BLK_F_SEG_MAX) |
286 (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
287 (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
288 (1ULL << VIRTIO_BLK_F_FLUSH) |
289 (1ULL << VIRTIO_BLK_F_DISCARD) |
290 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
292 if (num_queues > 1) {
293 features |= 1ULL << VIRTIO_BLK_F_MQ;
295 if (!opts->writable) {
296 features |= 1ULL << VIRTIO_BLK_F_RO;
299 vblk_exp->dev = vduse_dev_create(exp->id, VIRTIO_ID_BLOCK, 0,
300 features, num_queues,
301 sizeof(struct virtio_blk_config),
302 (char *)&config, &vduse_blk_ops,
303 vblk_exp);
304 if (!vblk_exp->dev) {
305 error_setg(errp, "failed to create vduse device");
306 return -ENOMEM;
309 for (i = 0; i < num_queues; i++) {
310 vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
313 aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
314 on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
316 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
317 vblk_exp);
319 blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
321 return 0;
324 static void vduse_blk_exp_delete(BlockExport *exp)
326 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
328 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
329 vblk_exp);
330 blk_set_dev_ops(exp->blk, NULL, NULL);
331 vduse_dev_destroy(vblk_exp->dev);
334 static void vduse_blk_exp_request_shutdown(BlockExport *exp)
336 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
338 aio_context_acquire(vblk_exp->export.ctx);
339 vduse_blk_detach_ctx(vblk_exp);
340 aio_context_acquire(vblk_exp->export.ctx);
343 const BlockExportDriver blk_exp_vduse_blk = {
344 .type = BLOCK_EXPORT_TYPE_VDUSE_BLK,
345 .instance_size = sizeof(VduseBlkExport),
346 .create = vduse_blk_exp_create,
347 .delete = vduse_blk_exp_delete,
348 .request_shutdown = vduse_blk_exp_request_shutdown,