2 * Sharing QEMU block devices via vhost-user protocol
4 * Parts of the code based on nbd/server.c.
6 * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
7 * Copyright (c) 2020 Red Hat, Inc.
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu/error-report.h"
14 #include "block/block.h"
15 #include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */
16 #include "standard-headers/linux/virtio_blk.h"
17 #include "qemu/vhost-user-server.h"
18 #include "vhost-user-blk-server.h"
19 #include "qapi/error.h"
20 #include "qom/object_interfaces.h"
21 #include "util/block-helpers.h"
22 #include "virtio-blk-handler.h"
25 VHOST_USER_BLK_NUM_QUEUES_DEFAULT
= 1,
28 typedef struct VuBlkReq
{
34 /* vhost user block device */
38 VirtioBlkHandler handler
;
39 QIOChannelSocket
*sioc
;
40 struct virtio_blk_config blkcfg
;
43 static void vu_blk_req_complete(VuBlkReq
*req
, size_t in_len
)
45 VuDev
*vu_dev
= &req
->server
->vu_dev
;
47 vu_queue_push(vu_dev
, req
->vq
, &req
->elem
, in_len
);
48 vu_queue_notify(vu_dev
, req
->vq
);
54 * Called with server in_flight counter increased, must decrease before
57 static void coroutine_fn
vu_blk_virtio_process_req(void *opaque
)
59 VuBlkReq
*req
= opaque
;
60 VuServer
*server
= req
->server
;
61 VuVirtqElement
*elem
= &req
->elem
;
62 VuBlkExport
*vexp
= container_of(server
, VuBlkExport
, vu_server
);
63 VirtioBlkHandler
*handler
= &vexp
->handler
;
64 struct iovec
*in_iov
= elem
->in_sg
;
65 struct iovec
*out_iov
= elem
->out_sg
;
66 unsigned in_num
= elem
->in_num
;
67 unsigned out_num
= elem
->out_num
;
70 in_len
= virtio_blk_process_req(handler
, in_iov
, out_iov
,
74 vhost_user_server_dec_in_flight(server
);
78 vu_blk_req_complete(req
, in_len
);
79 vhost_user_server_dec_in_flight(server
);
82 static void vu_blk_process_vq(VuDev
*vu_dev
, int idx
)
84 VuServer
*server
= container_of(vu_dev
, VuServer
, vu_dev
);
85 VuVirtq
*vq
= vu_get_queue(vu_dev
, idx
);
90 req
= vu_queue_pop(vu_dev
, vq
, sizeof(VuBlkReq
));
99 qemu_coroutine_create(vu_blk_virtio_process_req
, req
);
101 vhost_user_server_inc_in_flight(server
);
102 qemu_coroutine_enter(co
);
106 static void vu_blk_queue_set_started(VuDev
*vu_dev
, int idx
, bool started
)
112 vq
= vu_get_queue(vu_dev
, idx
);
113 vu_set_queue_handler(vu_dev
, vq
, started
? vu_blk_process_vq
: NULL
);
116 static uint64_t vu_blk_get_features(VuDev
*dev
)
119 VuServer
*server
= container_of(dev
, VuServer
, vu_dev
);
120 VuBlkExport
*vexp
= container_of(server
, VuBlkExport
, vu_server
);
121 features
= 1ull << VIRTIO_BLK_F_SIZE_MAX
|
122 1ull << VIRTIO_BLK_F_SEG_MAX
|
123 1ull << VIRTIO_BLK_F_TOPOLOGY
|
124 1ull << VIRTIO_BLK_F_BLK_SIZE
|
125 1ull << VIRTIO_BLK_F_FLUSH
|
126 1ull << VIRTIO_BLK_F_DISCARD
|
127 1ull << VIRTIO_BLK_F_WRITE_ZEROES
|
128 1ull << VIRTIO_BLK_F_CONFIG_WCE
|
129 1ull << VIRTIO_BLK_F_MQ
|
130 1ull << VIRTIO_F_VERSION_1
|
131 1ull << VIRTIO_RING_F_INDIRECT_DESC
|
132 1ull << VIRTIO_RING_F_EVENT_IDX
|
133 1ull << VHOST_USER_F_PROTOCOL_FEATURES
;
135 if (!vexp
->handler
.writable
) {
136 features
|= 1ull << VIRTIO_BLK_F_RO
;
142 static uint64_t vu_blk_get_protocol_features(VuDev
*dev
)
144 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG
;
148 vu_blk_get_config(VuDev
*vu_dev
, uint8_t *config
, uint32_t len
)
150 VuServer
*server
= container_of(vu_dev
, VuServer
, vu_dev
);
151 VuBlkExport
*vexp
= container_of(server
, VuBlkExport
, vu_server
);
153 if (len
> sizeof(struct virtio_blk_config
)) {
157 memcpy(config
, &vexp
->blkcfg
, len
);
162 vu_blk_set_config(VuDev
*vu_dev
, const uint8_t *data
,
163 uint32_t offset
, uint32_t size
, uint32_t flags
)
165 VuServer
*server
= container_of(vu_dev
, VuServer
, vu_dev
);
166 VuBlkExport
*vexp
= container_of(server
, VuBlkExport
, vu_server
);
169 /* don't support live migration */
170 if (flags
!= VHOST_SET_CONFIG_TYPE_FRONTEND
) {
174 if (offset
!= offsetof(struct virtio_blk_config
, wce
) ||
180 vexp
->blkcfg
.wce
= wce
;
181 blk_set_enable_write_cache(vexp
->export
.blk
, wce
);
186 * When the client disconnects, it sends a VHOST_USER_NONE request
187 * and vu_process_message will simple call exit which cause the VM
189 * To avoid this issue, process VHOST_USER_NONE request ahead
190 * of vu_process_message.
193 static int vu_blk_process_msg(VuDev
*dev
, VhostUserMsg
*vmsg
, int *do_reply
)
195 if (vmsg
->request
== VHOST_USER_NONE
) {
196 dev
->panic(dev
, "disconnect");
202 static const VuDevIface vu_blk_iface
= {
203 .get_features
= vu_blk_get_features
,
204 .queue_set_started
= vu_blk_queue_set_started
,
205 .get_protocol_features
= vu_blk_get_protocol_features
,
206 .get_config
= vu_blk_get_config
,
207 .set_config
= vu_blk_set_config
,
208 .process_msg
= vu_blk_process_msg
,
211 static void blk_aio_attached(AioContext
*ctx
, void *opaque
)
213 VuBlkExport
*vexp
= opaque
;
216 * The actual attach will happen in vu_blk_drained_end() and we just
219 vexp
->export
.ctx
= ctx
;
222 static void blk_aio_detach(void *opaque
)
224 VuBlkExport
*vexp
= opaque
;
227 * The actual detach already happened in vu_blk_drained_begin() but from
228 * this point on we must not access ctx anymore.
230 vexp
->export
.ctx
= NULL
;
234 vu_blk_initialize_config(BlockDriverState
*bs
,
235 struct virtio_blk_config
*config
,
240 cpu_to_le64(bdrv_getlength(bs
) >> VIRTIO_BLK_SECTOR_BITS
);
241 config
->blk_size
= cpu_to_le32(blk_size
);
242 config
->size_max
= cpu_to_le32(0);
243 config
->seg_max
= cpu_to_le32(128 - 2);
244 config
->min_io_size
= cpu_to_le16(1);
245 config
->opt_io_size
= cpu_to_le32(1);
246 config
->num_queues
= cpu_to_le16(num_queues
);
247 config
->max_discard_sectors
=
248 cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS
);
249 config
->max_discard_seg
= cpu_to_le32(1);
250 config
->discard_sector_alignment
=
251 cpu_to_le32(blk_size
>> VIRTIO_BLK_SECTOR_BITS
);
252 config
->max_write_zeroes_sectors
253 = cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS
);
254 config
->max_write_zeroes_seg
= cpu_to_le32(1);
257 static void vu_blk_exp_request_shutdown(BlockExport
*exp
)
259 VuBlkExport
*vexp
= container_of(exp
, VuBlkExport
, export
);
261 vhost_user_server_stop(&vexp
->vu_server
);
264 static void vu_blk_exp_resize(void *opaque
)
266 VuBlkExport
*vexp
= opaque
;
267 BlockDriverState
*bs
= blk_bs(vexp
->handler
.blk
);
268 int64_t new_size
= bdrv_getlength(bs
);
271 error_printf("Failed to get length of block node '%s'",
272 bdrv_get_node_name(bs
));
276 vexp
->blkcfg
.capacity
= cpu_to_le64(new_size
>> VIRTIO_BLK_SECTOR_BITS
);
278 vu_config_change_msg(&vexp
->vu_server
.vu_dev
);
281 /* Called with vexp->export.ctx acquired */
282 static void vu_blk_drained_begin(void *opaque
)
284 VuBlkExport
*vexp
= opaque
;
286 vexp
->vu_server
.quiescing
= true;
287 vhost_user_server_detach_aio_context(&vexp
->vu_server
);
290 /* Called with vexp->export.blk AioContext acquired */
291 static void vu_blk_drained_end(void *opaque
)
293 VuBlkExport
*vexp
= opaque
;
295 vexp
->vu_server
.quiescing
= false;
296 vhost_user_server_attach_aio_context(&vexp
->vu_server
, vexp
->export
.ctx
);
300 * Ensures that bdrv_drained_begin() waits until in-flight requests complete
301 * and the server->co_trip coroutine has terminated. It will be restarted in
302 * vhost_user_server_attach_aio_context().
304 * Called with vexp->export.ctx acquired.
306 static bool vu_blk_drained_poll(void *opaque
)
308 VuBlkExport
*vexp
= opaque
;
309 VuServer
*server
= &vexp
->vu_server
;
311 return server
->co_trip
|| vhost_user_server_has_in_flight(server
);
314 static const BlockDevOps vu_blk_dev_ops
= {
315 .drained_begin
= vu_blk_drained_begin
,
316 .drained_end
= vu_blk_drained_end
,
317 .drained_poll
= vu_blk_drained_poll
,
318 .resize_cb
= vu_blk_exp_resize
,
321 static int vu_blk_exp_create(BlockExport
*exp
, BlockExportOptions
*opts
,
324 VuBlkExport
*vexp
= container_of(exp
, VuBlkExport
, export
);
325 BlockExportOptionsVhostUserBlk
*vu_opts
= &opts
->u
.vhost_user_blk
;
326 Error
*local_err
= NULL
;
327 uint64_t logical_block_size
;
328 uint16_t num_queues
= VHOST_USER_BLK_NUM_QUEUES_DEFAULT
;
330 vexp
->blkcfg
.wce
= 0;
332 if (vu_opts
->has_logical_block_size
) {
333 logical_block_size
= vu_opts
->logical_block_size
;
335 logical_block_size
= VIRTIO_BLK_SECTOR_SIZE
;
337 check_block_size(exp
->id
, "logical-block-size", logical_block_size
,
340 error_propagate(errp
, local_err
);
344 if (vu_opts
->has_num_queues
) {
345 num_queues
= vu_opts
->num_queues
;
347 if (num_queues
== 0) {
348 error_setg(errp
, "num-queues must be greater than 0");
351 vexp
->handler
.blk
= exp
->blk
;
352 vexp
->handler
.serial
= g_strdup("vhost_user_blk");
353 vexp
->handler
.logical_block_size
= logical_block_size
;
354 vexp
->handler
.writable
= opts
->writable
;
356 vu_blk_initialize_config(blk_bs(exp
->blk
), &vexp
->blkcfg
,
357 logical_block_size
, num_queues
);
359 blk_add_aio_context_notifier(exp
->blk
, blk_aio_attached
, blk_aio_detach
,
362 blk_set_dev_ops(exp
->blk
, &vu_blk_dev_ops
, vexp
);
364 if (!vhost_user_server_start(&vexp
->vu_server
, vu_opts
->addr
, exp
->ctx
,
365 num_queues
, &vu_blk_iface
, errp
)) {
366 blk_remove_aio_context_notifier(exp
->blk
, blk_aio_attached
,
367 blk_aio_detach
, vexp
);
368 g_free(vexp
->handler
.serial
);
369 return -EADDRNOTAVAIL
;
375 static void vu_blk_exp_delete(BlockExport
*exp
)
377 VuBlkExport
*vexp
= container_of(exp
, VuBlkExport
, export
);
379 blk_remove_aio_context_notifier(exp
->blk
, blk_aio_attached
, blk_aio_detach
,
381 g_free(vexp
->handler
.serial
);
384 const BlockExportDriver blk_exp_vhost_user_blk
= {
385 .type
= BLOCK_EXPORT_TYPE_VHOST_USER_BLK
,
386 .instance_size
= sizeof(VuBlkExport
),
387 .create
= vu_blk_exp_create
,
388 .delete = vu_blk_exp_delete
,
389 .request_shutdown
= vu_blk_exp_request_shutdown
,