2 * Sharing QEMU block devices via vhost-user protocal
4 * Parts of the code based on nbd/server.c.
6 * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
7 * Copyright (c) 2020 Red Hat, Inc.
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "block/block.h"
14 #include "contrib/libvhost-user/libvhost-user.h"
15 #include "standard-headers/linux/virtio_blk.h"
16 #include "qemu/vhost-user-server.h"
17 #include "vhost-user-blk-server.h"
18 #include "qapi/error.h"
19 #include "qom/object_interfaces.h"
20 #include "sysemu/block-backend.h"
21 #include "util/block-helpers.h"
24 VHOST_USER_BLK_NUM_QUEUES_DEFAULT
= 1,
26 struct virtio_blk_inhdr
{
30 typedef struct VuBlkReq
{
34 struct virtio_blk_inhdr
*in
;
35 struct virtio_blk_outhdr out
;
40 /* vhost user block device */
45 QIOChannelSocket
*sioc
;
46 struct virtio_blk_config blkcfg
;
50 static void vu_blk_req_complete(VuBlkReq
*req
)
52 VuDev
*vu_dev
= &req
->server
->vu_dev
;
54 /* IO size with 1 extra status byte */
55 vu_queue_push(vu_dev
, req
->vq
, &req
->elem
, req
->size
+ 1);
56 vu_queue_notify(vu_dev
, req
->vq
);
61 static int coroutine_fn
62 vu_blk_discard_write_zeroes(BlockBackend
*blk
, struct iovec
*iov
,
63 uint32_t iovcnt
, uint32_t type
)
65 struct virtio_blk_discard_write_zeroes desc
;
66 ssize_t size
= iov_to_buf(iov
, iovcnt
, 0, &desc
, sizeof(desc
));
67 if (unlikely(size
!= sizeof(desc
))) {
68 error_report("Invalid size %zd, expect %zu", size
, sizeof(desc
));
72 uint64_t range
[2] = { le64_to_cpu(desc
.sector
) << 9,
73 le32_to_cpu(desc
.num_sectors
) << 9 };
74 if (type
== VIRTIO_BLK_T_DISCARD
) {
75 if (blk_co_pdiscard(blk
, range
[0], range
[1]) == 0) {
78 } else if (type
== VIRTIO_BLK_T_WRITE_ZEROES
) {
79 if (blk_co_pwrite_zeroes(blk
, range
[0], range
[1], 0) == 0) {
87 static void coroutine_fn
vu_blk_virtio_process_req(void *opaque
)
89 VuBlkReq
*req
= opaque
;
90 VuServer
*server
= req
->server
;
91 VuVirtqElement
*elem
= &req
->elem
;
94 VuBlkExport
*vexp
= container_of(server
, VuBlkExport
, vu_server
);
95 BlockBackend
*blk
= vexp
->export
.blk
;
97 struct iovec
*in_iov
= elem
->in_sg
;
98 struct iovec
*out_iov
= elem
->out_sg
;
99 unsigned in_num
= elem
->in_num
;
100 unsigned out_num
= elem
->out_num
;
102 /* refer to hw/block/virtio_blk.c */
103 if (elem
->out_num
< 1 || elem
->in_num
< 1) {
104 error_report("virtio-blk request missing headers");
108 if (unlikely(iov_to_buf(out_iov
, out_num
, 0, &req
->out
,
109 sizeof(req
->out
)) != sizeof(req
->out
))) {
110 error_report("virtio-blk request outhdr too short");
114 iov_discard_front(&out_iov
, &out_num
, sizeof(req
->out
));
116 if (in_iov
[in_num
- 1].iov_len
< sizeof(struct virtio_blk_inhdr
)) {
117 error_report("virtio-blk request inhdr too short");
121 /* We always touch the last byte, so just see how big in_iov is. */
122 req
->in
= (void *)in_iov
[in_num
- 1].iov_base
123 + in_iov
[in_num
- 1].iov_len
124 - sizeof(struct virtio_blk_inhdr
);
125 iov_discard_back(in_iov
, &in_num
, sizeof(struct virtio_blk_inhdr
));
127 type
= le32_to_cpu(req
->out
.type
);
128 switch (type
& ~VIRTIO_BLK_T_BARRIER
) {
129 case VIRTIO_BLK_T_IN
:
130 case VIRTIO_BLK_T_OUT
: {
132 bool is_write
= type
& VIRTIO_BLK_T_OUT
;
133 req
->sector_num
= le64_to_cpu(req
->out
.sector
);
135 if (is_write
&& !vexp
->writable
) {
136 req
->in
->status
= VIRTIO_BLK_S_IOERR
;
140 int64_t offset
= req
->sector_num
* vexp
->blk_size
;
143 qemu_iovec_init_external(&qiov
, out_iov
, out_num
);
144 ret
= blk_co_pwritev(blk
, offset
, qiov
.size
, &qiov
, 0);
146 qemu_iovec_init_external(&qiov
, in_iov
, in_num
);
147 ret
= blk_co_preadv(blk
, offset
, qiov
.size
, &qiov
, 0);
150 req
->in
->status
= VIRTIO_BLK_S_OK
;
152 req
->in
->status
= VIRTIO_BLK_S_IOERR
;
156 case VIRTIO_BLK_T_FLUSH
:
157 if (blk_co_flush(blk
) == 0) {
158 req
->in
->status
= VIRTIO_BLK_S_OK
;
160 req
->in
->status
= VIRTIO_BLK_S_IOERR
;
163 case VIRTIO_BLK_T_GET_ID
: {
164 size_t size
= MIN(iov_size(&elem
->in_sg
[0], in_num
),
165 VIRTIO_BLK_ID_BYTES
);
166 snprintf(elem
->in_sg
[0].iov_base
, size
, "%s", "vhost_user_blk");
167 req
->in
->status
= VIRTIO_BLK_S_OK
;
168 req
->size
= elem
->in_sg
[0].iov_len
;
171 case VIRTIO_BLK_T_DISCARD
:
172 case VIRTIO_BLK_T_WRITE_ZEROES
: {
175 if (!vexp
->writable
) {
176 req
->in
->status
= VIRTIO_BLK_S_IOERR
;
180 rc
= vu_blk_discard_write_zeroes(blk
, &elem
->out_sg
[1], out_num
, type
);
182 req
->in
->status
= VIRTIO_BLK_S_OK
;
184 req
->in
->status
= VIRTIO_BLK_S_IOERR
;
189 req
->in
->status
= VIRTIO_BLK_S_UNSUPP
;
193 vu_blk_req_complete(req
);
200 static void vu_blk_process_vq(VuDev
*vu_dev
, int idx
)
202 VuServer
*server
= container_of(vu_dev
, VuServer
, vu_dev
);
203 VuVirtq
*vq
= vu_get_queue(vu_dev
, idx
);
208 req
= vu_queue_pop(vu_dev
, vq
, sizeof(VuBlkReq
));
213 req
->server
= server
;
217 qemu_coroutine_create(vu_blk_virtio_process_req
, req
);
218 qemu_coroutine_enter(co
);
222 static void vu_blk_queue_set_started(VuDev
*vu_dev
, int idx
, bool started
)
228 vq
= vu_get_queue(vu_dev
, idx
);
229 vu_set_queue_handler(vu_dev
, vq
, started
? vu_blk_process_vq
: NULL
);
232 static uint64_t vu_blk_get_features(VuDev
*dev
)
235 VuServer
*server
= container_of(dev
, VuServer
, vu_dev
);
236 VuBlkExport
*vexp
= container_of(server
, VuBlkExport
, vu_server
);
237 features
= 1ull << VIRTIO_BLK_F_SIZE_MAX
|
238 1ull << VIRTIO_BLK_F_SEG_MAX
|
239 1ull << VIRTIO_BLK_F_TOPOLOGY
|
240 1ull << VIRTIO_BLK_F_BLK_SIZE
|
241 1ull << VIRTIO_BLK_F_FLUSH
|
242 1ull << VIRTIO_BLK_F_DISCARD
|
243 1ull << VIRTIO_BLK_F_WRITE_ZEROES
|
244 1ull << VIRTIO_BLK_F_CONFIG_WCE
|
245 1ull << VIRTIO_BLK_F_MQ
|
246 1ull << VIRTIO_F_VERSION_1
|
247 1ull << VIRTIO_RING_F_INDIRECT_DESC
|
248 1ull << VIRTIO_RING_F_EVENT_IDX
|
249 1ull << VHOST_USER_F_PROTOCOL_FEATURES
;
251 if (!vexp
->writable
) {
252 features
|= 1ull << VIRTIO_BLK_F_RO
;
258 static uint64_t vu_blk_get_protocol_features(VuDev
*dev
)
260 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG
|
261 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
;
265 vu_blk_get_config(VuDev
*vu_dev
, uint8_t *config
, uint32_t len
)
267 /* TODO blkcfg must be little-endian for VIRTIO 1.0 */
268 VuServer
*server
= container_of(vu_dev
, VuServer
, vu_dev
);
269 VuBlkExport
*vexp
= container_of(server
, VuBlkExport
, vu_server
);
270 memcpy(config
, &vexp
->blkcfg
, len
);
275 vu_blk_set_config(VuDev
*vu_dev
, const uint8_t *data
,
276 uint32_t offset
, uint32_t size
, uint32_t flags
)
278 VuServer
*server
= container_of(vu_dev
, VuServer
, vu_dev
);
279 VuBlkExport
*vexp
= container_of(server
, VuBlkExport
, vu_server
);
282 /* don't support live migration */
283 if (flags
!= VHOST_SET_CONFIG_TYPE_MASTER
) {
287 if (offset
!= offsetof(struct virtio_blk_config
, wce
) ||
293 vexp
->blkcfg
.wce
= wce
;
294 blk_set_enable_write_cache(vexp
->export
.blk
, wce
);
299 * When the client disconnects, it sends a VHOST_USER_NONE request
300 * and vu_process_message will simple call exit which cause the VM
302 * To avoid this issue, process VHOST_USER_NONE request ahead
303 * of vu_process_message.
306 static int vu_blk_process_msg(VuDev
*dev
, VhostUserMsg
*vmsg
, int *do_reply
)
308 if (vmsg
->request
== VHOST_USER_NONE
) {
309 dev
->panic(dev
, "disconnect");
315 static const VuDevIface vu_blk_iface
= {
316 .get_features
= vu_blk_get_features
,
317 .queue_set_started
= vu_blk_queue_set_started
,
318 .get_protocol_features
= vu_blk_get_protocol_features
,
319 .get_config
= vu_blk_get_config
,
320 .set_config
= vu_blk_set_config
,
321 .process_msg
= vu_blk_process_msg
,
324 static void blk_aio_attached(AioContext
*ctx
, void *opaque
)
326 VuBlkExport
*vexp
= opaque
;
328 vexp
->export
.ctx
= ctx
;
329 vhost_user_server_attach_aio_context(&vexp
->vu_server
, ctx
);
332 static void blk_aio_detach(void *opaque
)
334 VuBlkExport
*vexp
= opaque
;
336 vhost_user_server_detach_aio_context(&vexp
->vu_server
);
337 vexp
->export
.ctx
= NULL
;
341 vu_blk_initialize_config(BlockDriverState
*bs
,
342 struct virtio_blk_config
*config
,
346 config
->capacity
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
347 config
->blk_size
= blk_size
;
348 config
->size_max
= 0;
349 config
->seg_max
= 128 - 2;
350 config
->min_io_size
= 1;
351 config
->opt_io_size
= 1;
352 config
->num_queues
= num_queues
;
353 config
->max_discard_sectors
= 32768;
354 config
->max_discard_seg
= 1;
355 config
->discard_sector_alignment
= config
->blk_size
>> 9;
356 config
->max_write_zeroes_sectors
= 32768;
357 config
->max_write_zeroes_seg
= 1;
360 static void vu_blk_exp_request_shutdown(BlockExport
*exp
)
362 VuBlkExport
*vexp
= container_of(exp
, VuBlkExport
, export
);
364 vhost_user_server_stop(&vexp
->vu_server
);
367 static int vu_blk_exp_create(BlockExport
*exp
, BlockExportOptions
*opts
,
370 VuBlkExport
*vexp
= container_of(exp
, VuBlkExport
, export
);
371 BlockExportOptionsVhostUserBlk
*vu_opts
= &opts
->u
.vhost_user_blk
;
372 Error
*local_err
= NULL
;
373 uint64_t logical_block_size
;
374 uint16_t num_queues
= VHOST_USER_BLK_NUM_QUEUES_DEFAULT
;
376 vexp
->writable
= opts
->writable
;
377 vexp
->blkcfg
.wce
= 0;
379 if (vu_opts
->has_logical_block_size
) {
380 logical_block_size
= vu_opts
->logical_block_size
;
382 logical_block_size
= BDRV_SECTOR_SIZE
;
384 check_block_size(exp
->id
, "logical-block-size", logical_block_size
,
387 error_propagate(errp
, local_err
);
390 vexp
->blk_size
= logical_block_size
;
391 blk_set_guest_block_size(exp
->blk
, logical_block_size
);
393 if (vu_opts
->has_num_queues
) {
394 num_queues
= vu_opts
->num_queues
;
396 if (num_queues
== 0) {
397 error_setg(errp
, "num-queues must be greater than 0");
401 vu_blk_initialize_config(blk_bs(exp
->blk
), &vexp
->blkcfg
,
402 logical_block_size
, num_queues
);
404 blk_add_aio_context_notifier(exp
->blk
, blk_aio_attached
, blk_aio_detach
,
407 if (!vhost_user_server_start(&vexp
->vu_server
, vu_opts
->addr
, exp
->ctx
,
408 num_queues
, &vu_blk_iface
, errp
)) {
409 blk_remove_aio_context_notifier(exp
->blk
, blk_aio_attached
,
410 blk_aio_detach
, vexp
);
411 return -EADDRNOTAVAIL
;
417 static void vu_blk_exp_delete(BlockExport
*exp
)
419 VuBlkExport
*vexp
= container_of(exp
, VuBlkExport
, export
);
421 blk_remove_aio_context_notifier(exp
->blk
, blk_aio_attached
, blk_aio_detach
,
425 const BlockExportDriver blk_exp_vhost_user_blk
= {
426 .type
= BLOCK_EXPORT_TYPE_VHOST_USER_BLK
,
427 .instance_size
= sizeof(VuBlkExport
),
428 .create
= vu_blk_exp_create
,
429 .delete = vu_blk_exp_delete
,
430 .request_shutdown
= vu_blk_exp_request_shutdown
,