4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
14 #include "hw/virtio/virtio-net.h"
15 #include "net/vhost_net.h"
16 #include "net/vhost-vdpa.h"
17 #include "hw/virtio/vhost-vdpa.h"
18 #include "qemu/config-file.h"
19 #include "qemu/error-report.h"
21 #include "qemu/memalign.h"
22 #include "qemu/option.h"
23 #include "qapi/error.h"
24 #include <linux/vhost.h>
25 #include <sys/ioctl.h>
27 #include "standard-headers/linux/virtio_net.h"
28 #include "monitor/monitor.h"
29 #include "hw/virtio/vhost.h"
31 /* Todo:need to add the multiqueue support here */
32 typedef struct VhostVDPAState
{
34 struct vhost_vdpa vhost_vdpa
;
35 VHostNetState
*vhost_net
;
37 /* Control commands shadow buffers */
38 void *cvq_cmd_out_buffer
;
39 virtio_net_ctrl_ack
*status
;
41 /* The device always have SVQ enabled */
46 const int vdpa_feature_bits
[] = {
47 VIRTIO_F_NOTIFY_ON_EMPTY
,
48 VIRTIO_RING_F_INDIRECT_DESC
,
49 VIRTIO_RING_F_EVENT_IDX
,
53 VIRTIO_NET_F_GUEST_CSUM
,
55 VIRTIO_NET_F_GUEST_TSO4
,
56 VIRTIO_NET_F_GUEST_TSO6
,
57 VIRTIO_NET_F_GUEST_ECN
,
58 VIRTIO_NET_F_GUEST_UFO
,
59 VIRTIO_NET_F_HOST_TSO4
,
60 VIRTIO_NET_F_HOST_TSO6
,
61 VIRTIO_NET_F_HOST_ECN
,
62 VIRTIO_NET_F_HOST_UFO
,
63 VIRTIO_NET_F_MRG_RXBUF
,
66 VIRTIO_NET_F_CTRL_RX_EXTRA
,
67 VIRTIO_NET_F_CTRL_VLAN
,
68 VIRTIO_NET_F_CTRL_MAC_ADDR
,
72 VIRTIO_F_IOMMU_PLATFORM
,
76 VIRTIO_NET_F_HASH_REPORT
,
78 VHOST_INVALID_FEATURE_BIT
81 /** Supported device specific feature bits with SVQ */
82 static const uint64_t vdpa_svq_device_features
=
83 BIT_ULL(VIRTIO_NET_F_CSUM
) |
84 BIT_ULL(VIRTIO_NET_F_GUEST_CSUM
) |
85 BIT_ULL(VIRTIO_NET_F_MTU
) |
86 BIT_ULL(VIRTIO_NET_F_MAC
) |
87 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4
) |
88 BIT_ULL(VIRTIO_NET_F_GUEST_TSO6
) |
89 BIT_ULL(VIRTIO_NET_F_GUEST_ECN
) |
90 BIT_ULL(VIRTIO_NET_F_GUEST_UFO
) |
91 BIT_ULL(VIRTIO_NET_F_HOST_TSO4
) |
92 BIT_ULL(VIRTIO_NET_F_HOST_TSO6
) |
93 BIT_ULL(VIRTIO_NET_F_HOST_ECN
) |
94 BIT_ULL(VIRTIO_NET_F_HOST_UFO
) |
95 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF
) |
96 BIT_ULL(VIRTIO_NET_F_STATUS
) |
97 BIT_ULL(VIRTIO_NET_F_CTRL_VQ
) |
98 BIT_ULL(VIRTIO_NET_F_MQ
) |
99 BIT_ULL(VIRTIO_F_ANY_LAYOUT
) |
100 BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR
) |
101 BIT_ULL(VIRTIO_NET_F_RSC_EXT
) |
102 BIT_ULL(VIRTIO_NET_F_STANDBY
);
104 #define VHOST_VDPA_NET_CVQ_ASID 1
106 VHostNetState
*vhost_vdpa_get_vhost_net(NetClientState
*nc
)
108 VhostVDPAState
*s
= DO_UPCAST(VhostVDPAState
, nc
, nc
);
109 assert(nc
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
);
113 static bool vhost_vdpa_net_valid_svq_features(uint64_t features
, Error
**errp
)
115 uint64_t invalid_dev_features
=
116 features
& ~vdpa_svq_device_features
&
117 /* Transport are all accepted at this point */
118 ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START
,
119 VIRTIO_TRANSPORT_F_END
- VIRTIO_TRANSPORT_F_START
);
121 if (invalid_dev_features
) {
122 error_setg(errp
, "vdpa svq does not work with features 0x%" PRIx64
,
123 invalid_dev_features
);
127 return vhost_svq_valid_features(features
, errp
);
130 static int vhost_vdpa_net_check_device_id(struct vhost_net
*net
)
134 struct vhost_dev
*hdev
;
136 hdev
= (struct vhost_dev
*)&net
->dev
;
137 ret
= hdev
->vhost_ops
->vhost_get_device_id(hdev
, &device_id
);
138 if (device_id
!= VIRTIO_ID_NET
) {
144 static int vhost_vdpa_add(NetClientState
*ncs
, void *be
,
145 int queue_pair_index
, int nvqs
)
147 VhostNetOptions options
;
148 struct vhost_net
*net
= NULL
;
152 options
.backend_type
= VHOST_BACKEND_TYPE_VDPA
;
153 assert(ncs
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
);
154 s
= DO_UPCAST(VhostVDPAState
, nc
, ncs
);
155 options
.net_backend
= ncs
;
157 options
.busyloop_timeout
= 0;
160 net
= vhost_net_init(&options
);
162 error_report("failed to init vhost_net for queue");
166 ret
= vhost_vdpa_net_check_device_id(net
);
172 vhost_net_cleanup(net
);
178 static void vhost_vdpa_cleanup(NetClientState
*nc
)
180 VhostVDPAState
*s
= DO_UPCAST(VhostVDPAState
, nc
, nc
);
181 struct vhost_dev
*dev
= &s
->vhost_net
->dev
;
183 qemu_vfree(s
->cvq_cmd_out_buffer
);
184 qemu_vfree(s
->status
);
185 if (dev
->vq_index
+ dev
->nvqs
== dev
->vq_index_end
) {
186 g_clear_pointer(&s
->vhost_vdpa
.iova_tree
, vhost_iova_tree_delete
);
189 vhost_net_cleanup(s
->vhost_net
);
190 g_free(s
->vhost_net
);
193 if (s
->vhost_vdpa
.device_fd
>= 0) {
194 qemu_close(s
->vhost_vdpa
.device_fd
);
195 s
->vhost_vdpa
.device_fd
= -1;
199 static bool vhost_vdpa_has_vnet_hdr(NetClientState
*nc
)
201 assert(nc
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
);
206 static bool vhost_vdpa_has_ufo(NetClientState
*nc
)
208 assert(nc
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
);
209 VhostVDPAState
*s
= DO_UPCAST(VhostVDPAState
, nc
, nc
);
210 uint64_t features
= 0;
211 features
|= (1ULL << VIRTIO_NET_F_HOST_UFO
);
212 features
= vhost_net_get_features(s
->vhost_net
, features
);
213 return !!(features
& (1ULL << VIRTIO_NET_F_HOST_UFO
));
217 static bool vhost_vdpa_check_peer_type(NetClientState
*nc
, ObjectClass
*oc
,
220 const char *driver
= object_class_get_name(oc
);
222 if (!g_str_has_prefix(driver
, "virtio-net-")) {
223 error_setg(errp
, "vhost-vdpa requires frontend driver virtio-net-*");
230 /** Dummy receive in case qemu falls back to userland tap networking */
231 static ssize_t
vhost_vdpa_receive(NetClientState
*nc
, const uint8_t *buf
,
237 static NetClientInfo net_vhost_vdpa_info
= {
238 .type
= NET_CLIENT_DRIVER_VHOST_VDPA
,
239 .size
= sizeof(VhostVDPAState
),
240 .receive
= vhost_vdpa_receive
,
241 .cleanup
= vhost_vdpa_cleanup
,
242 .has_vnet_hdr
= vhost_vdpa_has_vnet_hdr
,
243 .has_ufo
= vhost_vdpa_has_ufo
,
244 .check_peer_type
= vhost_vdpa_check_peer_type
,
247 static int64_t vhost_vdpa_get_vring_group(int device_fd
, unsigned vq_index
)
249 struct vhost_vring_state state
= {
252 int r
= ioctl(device_fd
, VHOST_VDPA_GET_VRING_GROUP
, &state
);
254 if (unlikely(r
< 0)) {
255 error_report("Cannot get VQ %u group: %s", vq_index
,
263 static int vhost_vdpa_set_address_space_id(struct vhost_vdpa
*v
,
267 struct vhost_vring_state asid
= {
273 r
= ioctl(v
->device_fd
, VHOST_VDPA_SET_GROUP_ASID
, &asid
);
274 if (unlikely(r
< 0)) {
275 error_report("Can't set vq group %u asid %u, errno=%d (%s)",
276 asid
.index
, asid
.num
, errno
, g_strerror(errno
));
281 static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa
*v
, void *addr
)
283 VhostIOVATree
*tree
= v
->iova_tree
;
286 * No need to specify size or to look for more translations since
287 * this contiguous chunk was allocated by us.
289 .translated_addr
= (hwaddr
)(uintptr_t)addr
,
291 const DMAMap
*map
= vhost_iova_tree_find_iova(tree
, &needle
);
294 if (unlikely(!map
)) {
295 error_report("Cannot locate expected map");
299 r
= vhost_vdpa_dma_unmap(v
, v
->address_space_id
, map
->iova
, map
->size
+ 1);
300 if (unlikely(r
!= 0)) {
301 error_report("Device cannot unmap: %s(%d)", g_strerror(r
), r
);
304 vhost_iova_tree_remove(tree
, *map
);
307 static size_t vhost_vdpa_net_cvq_cmd_len(void)
310 * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
311 * In buffer is always 1 byte, so it should fit here
313 return sizeof(struct virtio_net_ctrl_hdr
) +
314 2 * sizeof(struct virtio_net_ctrl_mac
) +
315 MAC_TABLE_ENTRIES
* ETH_ALEN
;
318 static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
320 return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
323 /** Map CVQ buffer. */
324 static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa
*v
, void *buf
, size_t size
,
330 map
.translated_addr
= (hwaddr
)(uintptr_t)buf
;
332 map
.perm
= write
? IOMMU_RW
: IOMMU_RO
,
333 r
= vhost_iova_tree_map_alloc(v
->iova_tree
, &map
);
334 if (unlikely(r
!= IOVA_OK
)) {
335 error_report("Cannot map injected element");
339 r
= vhost_vdpa_dma_map(v
, v
->address_space_id
, map
.iova
,
340 vhost_vdpa_net_cvq_cmd_page_len(), buf
, !write
);
341 if (unlikely(r
< 0)) {
348 vhost_iova_tree_remove(v
->iova_tree
, map
);
352 static int vhost_vdpa_net_cvq_start(NetClientState
*nc
)
355 struct vhost_vdpa
*v
;
356 uint64_t backend_features
;
360 assert(nc
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
);
362 s
= DO_UPCAST(VhostVDPAState
, nc
, nc
);
365 v
->shadow_data
= s
->always_svq
;
366 v
->shadow_vqs_enabled
= s
->always_svq
;
367 s
->vhost_vdpa
.address_space_id
= VHOST_VDPA_GUEST_PA_ASID
;
370 /* SVQ is already configured for all virtqueues */
375 * If we early return in these cases SVQ will not be enabled. The migration
376 * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
378 * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev
381 r
= ioctl(v
->device_fd
, VHOST_GET_BACKEND_FEATURES
, &backend_features
);
382 if (unlikely(r
< 0)) {
383 error_report("Cannot get vdpa backend_features: %s(%d)",
384 g_strerror(errno
), errno
);
387 if (!(backend_features
& VHOST_BACKEND_F_IOTLB_ASID
) ||
388 !vhost_vdpa_net_valid_svq_features(v
->dev
->features
, NULL
)) {
393 * Check if all the virtqueues of the virtio device are in a different vq
394 * than the last vq. VQ group of last group passed in cvq_group.
396 cvq_index
= v
->dev
->vq_index_end
- 1;
397 cvq_group
= vhost_vdpa_get_vring_group(v
->device_fd
, cvq_index
);
398 if (unlikely(cvq_group
< 0)) {
401 for (int i
= 0; i
< cvq_index
; ++i
) {
402 int64_t group
= vhost_vdpa_get_vring_group(v
->device_fd
, i
);
404 if (unlikely(group
< 0)) {
408 if (group
== cvq_group
) {
413 r
= vhost_vdpa_set_address_space_id(v
, cvq_group
, VHOST_VDPA_NET_CVQ_ASID
);
414 if (unlikely(r
< 0)) {
418 v
->iova_tree
= vhost_iova_tree_new(v
->iova_range
.first
,
420 v
->shadow_vqs_enabled
= true;
421 s
->vhost_vdpa
.address_space_id
= VHOST_VDPA_NET_CVQ_ASID
;
424 if (!s
->vhost_vdpa
.shadow_vqs_enabled
) {
428 r
= vhost_vdpa_cvq_map_buf(&s
->vhost_vdpa
, s
->cvq_cmd_out_buffer
,
429 vhost_vdpa_net_cvq_cmd_page_len(), false);
430 if (unlikely(r
< 0)) {
434 r
= vhost_vdpa_cvq_map_buf(&s
->vhost_vdpa
, s
->status
,
435 vhost_vdpa_net_cvq_cmd_page_len(), true);
436 if (unlikely(r
< 0)) {
437 vhost_vdpa_cvq_unmap_buf(&s
->vhost_vdpa
, s
->cvq_cmd_out_buffer
);
443 static void vhost_vdpa_net_cvq_stop(NetClientState
*nc
)
445 VhostVDPAState
*s
= DO_UPCAST(VhostVDPAState
, nc
, nc
);
447 assert(nc
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
);
449 if (s
->vhost_vdpa
.shadow_vqs_enabled
) {
450 vhost_vdpa_cvq_unmap_buf(&s
->vhost_vdpa
, s
->cvq_cmd_out_buffer
);
451 vhost_vdpa_cvq_unmap_buf(&s
->vhost_vdpa
, s
->status
);
452 if (!s
->always_svq
) {
454 * If only the CVQ is shadowed we can delete this safely.
455 * If all the VQs are shadows this will be needed by the time the
456 * device is started again to register SVQ vrings and similar.
458 g_clear_pointer(&s
->vhost_vdpa
.iova_tree
, vhost_iova_tree_delete
);
463 static ssize_t
vhost_vdpa_net_cvq_add(VhostVDPAState
*s
, size_t out_len
,
466 /* Buffers for the device */
467 const struct iovec out
= {
468 .iov_base
= s
->cvq_cmd_out_buffer
,
471 const struct iovec in
= {
472 .iov_base
= s
->status
,
473 .iov_len
= sizeof(virtio_net_ctrl_ack
),
475 VhostShadowVirtqueue
*svq
= g_ptr_array_index(s
->vhost_vdpa
.shadow_vqs
, 0);
478 r
= vhost_svq_add(svq
, &out
, 1, &in
, 1, NULL
);
479 if (unlikely(r
!= 0)) {
480 if (unlikely(r
== -ENOSPC
)) {
481 qemu_log_mask(LOG_GUEST_ERROR
, "%s: No space on device queue\n",
488 * We can poll here since we've had BQL from the time we sent the
489 * descriptor. Also, we need to take the answer before SVQ pulls by itself,
490 * when BQL is released
492 return vhost_svq_poll(svq
);
495 static ssize_t
vhost_vdpa_net_load_cmd(VhostVDPAState
*s
, uint8_t class,
496 uint8_t cmd
, const void *data
,
499 const struct virtio_net_ctrl_hdr ctrl
= {
504 assert(data_size
< vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl
));
506 memcpy(s
->cvq_cmd_out_buffer
, &ctrl
, sizeof(ctrl
));
507 memcpy(s
->cvq_cmd_out_buffer
+ sizeof(ctrl
), data
, data_size
);
509 return vhost_vdpa_net_cvq_add(s
, sizeof(ctrl
) + data_size
,
510 sizeof(virtio_net_ctrl_ack
));
513 static int vhost_vdpa_net_load_mac(VhostVDPAState
*s
, const VirtIONet
*n
)
515 uint64_t features
= n
->parent_obj
.guest_features
;
516 if (features
& BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR
)) {
517 ssize_t dev_written
= vhost_vdpa_net_load_cmd(s
, VIRTIO_NET_CTRL_MAC
,
518 VIRTIO_NET_CTRL_MAC_ADDR_SET
,
519 n
->mac
, sizeof(n
->mac
));
520 if (unlikely(dev_written
< 0)) {
524 return *s
->status
!= VIRTIO_NET_OK
;
530 static int vhost_vdpa_net_load_mq(VhostVDPAState
*s
,
533 struct virtio_net_ctrl_mq mq
;
534 uint64_t features
= n
->parent_obj
.guest_features
;
537 if (!(features
& BIT_ULL(VIRTIO_NET_F_MQ
))) {
541 mq
.virtqueue_pairs
= cpu_to_le16(n
->curr_queue_pairs
);
542 dev_written
= vhost_vdpa_net_load_cmd(s
, VIRTIO_NET_CTRL_MQ
,
543 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
, &mq
,
545 if (unlikely(dev_written
< 0)) {
549 return *s
->status
!= VIRTIO_NET_OK
;
552 static int vhost_vdpa_net_load(NetClientState
*nc
)
554 VhostVDPAState
*s
= DO_UPCAST(VhostVDPAState
, nc
, nc
);
555 struct vhost_vdpa
*v
= &s
->vhost_vdpa
;
559 assert(nc
->info
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
);
561 if (!v
->shadow_vqs_enabled
) {
565 n
= VIRTIO_NET(v
->dev
->vdev
);
566 r
= vhost_vdpa_net_load_mac(s
, n
);
567 if (unlikely(r
< 0)) {
570 r
= vhost_vdpa_net_load_mq(s
, n
);
578 static NetClientInfo net_vhost_vdpa_cvq_info
= {
579 .type
= NET_CLIENT_DRIVER_VHOST_VDPA
,
580 .size
= sizeof(VhostVDPAState
),
581 .receive
= vhost_vdpa_receive
,
582 .start
= vhost_vdpa_net_cvq_start
,
583 .load
= vhost_vdpa_net_load
,
584 .stop
= vhost_vdpa_net_cvq_stop
,
585 .cleanup
= vhost_vdpa_cleanup
,
586 .has_vnet_hdr
= vhost_vdpa_has_vnet_hdr
,
587 .has_ufo
= vhost_vdpa_has_ufo
,
588 .check_peer_type
= vhost_vdpa_check_peer_type
,
592 * Validate and copy control virtqueue commands.
594 * Following QEMU guidelines, we offer a copy of the buffers to the device to
595 * prevent TOCTOU bugs.
597 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue
*svq
,
598 VirtQueueElement
*elem
,
601 VhostVDPAState
*s
= opaque
;
603 virtio_net_ctrl_ack status
= VIRTIO_NET_ERR
;
604 /* Out buffer sent to both the vdpa device and the device model */
606 .iov_base
= s
->cvq_cmd_out_buffer
,
608 /* in buffer used for device model */
609 const struct iovec in
= {
611 .iov_len
= sizeof(status
),
613 ssize_t dev_written
= -EINVAL
;
615 out
.iov_len
= iov_to_buf(elem
->out_sg
, elem
->out_num
, 0,
616 s
->cvq_cmd_out_buffer
,
617 vhost_vdpa_net_cvq_cmd_len());
618 if (*(uint8_t *)s
->cvq_cmd_out_buffer
== VIRTIO_NET_CTRL_ANNOUNCE
) {
620 * Guest announce capability is emulated by qemu, so don't forward to
623 dev_written
= sizeof(status
);
624 *s
->status
= VIRTIO_NET_OK
;
626 dev_written
= vhost_vdpa_net_cvq_add(s
, out
.iov_len
, sizeof(status
));
627 if (unlikely(dev_written
< 0)) {
632 if (unlikely(dev_written
< sizeof(status
))) {
633 error_report("Insufficient written data (%zu)", dev_written
);
637 if (*s
->status
!= VIRTIO_NET_OK
) {
638 return VIRTIO_NET_ERR
;
641 status
= VIRTIO_NET_ERR
;
642 virtio_net_handle_ctrl_iov(svq
->vdev
, &in
, 1, &out
, 1);
643 if (status
!= VIRTIO_NET_OK
) {
644 error_report("Bad CVQ processing in model");
648 in_len
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0, &status
,
650 if (unlikely(in_len
< sizeof(status
))) {
651 error_report("Bad device CVQ written length");
653 vhost_svq_push_elem(svq
, elem
, MIN(in_len
, sizeof(status
)));
655 return dev_written
< 0 ? dev_written
: 0;
658 static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops
= {
659 .avail_handler
= vhost_vdpa_net_handle_ctrl_avail
,
662 static NetClientState
*net_vhost_vdpa_init(NetClientState
*peer
,
666 int queue_pair_index
,
670 struct vhost_vdpa_iova_range iova_range
,
671 VhostIOVATree
*iova_tree
)
673 NetClientState
*nc
= NULL
;
678 nc
= qemu_new_net_client(&net_vhost_vdpa_info
, peer
, device
,
681 nc
= qemu_new_net_control_client(&net_vhost_vdpa_cvq_info
, peer
,
684 qemu_set_info_str(nc
, TYPE_VHOST_VDPA
);
685 s
= DO_UPCAST(VhostVDPAState
, nc
, nc
);
687 s
->vhost_vdpa
.device_fd
= vdpa_device_fd
;
688 s
->vhost_vdpa
.index
= queue_pair_index
;
690 s
->vhost_vdpa
.shadow_vqs_enabled
= svq
;
691 s
->vhost_vdpa
.iova_range
= iova_range
;
692 s
->vhost_vdpa
.shadow_data
= svq
;
693 s
->vhost_vdpa
.iova_tree
= iova_tree
;
695 s
->cvq_cmd_out_buffer
= qemu_memalign(qemu_real_host_page_size(),
696 vhost_vdpa_net_cvq_cmd_page_len());
697 memset(s
->cvq_cmd_out_buffer
, 0, vhost_vdpa_net_cvq_cmd_page_len());
698 s
->status
= qemu_memalign(qemu_real_host_page_size(),
699 vhost_vdpa_net_cvq_cmd_page_len());
700 memset(s
->status
, 0, vhost_vdpa_net_cvq_cmd_page_len());
702 s
->vhost_vdpa
.shadow_vq_ops
= &vhost_vdpa_net_svq_ops
;
703 s
->vhost_vdpa
.shadow_vq_ops_opaque
= s
;
705 ret
= vhost_vdpa_add(nc
, (void *)&s
->vhost_vdpa
, queue_pair_index
, nvqs
);
707 qemu_del_net_client(nc
);
713 static int vhost_vdpa_get_features(int fd
, uint64_t *features
, Error
**errp
)
715 int ret
= ioctl(fd
, VHOST_GET_FEATURES
, features
);
716 if (unlikely(ret
< 0)) {
717 error_setg_errno(errp
, errno
,
718 "Fail to query features from vhost-vDPA device");
723 static int vhost_vdpa_get_max_queue_pairs(int fd
, uint64_t features
,
724 int *has_cvq
, Error
**errp
)
726 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
727 g_autofree
struct vhost_vdpa_config
*config
= NULL
;
728 __virtio16
*max_queue_pairs
;
731 if (features
& (1 << VIRTIO_NET_F_CTRL_VQ
)) {
737 if (features
& (1 << VIRTIO_NET_F_MQ
)) {
738 config
= g_malloc0(config_size
+ sizeof(*max_queue_pairs
));
739 config
->off
= offsetof(struct virtio_net_config
, max_virtqueue_pairs
);
740 config
->len
= sizeof(*max_queue_pairs
);
742 ret
= ioctl(fd
, VHOST_VDPA_GET_CONFIG
, config
);
744 error_setg(errp
, "Fail to get config from vhost-vDPA device");
748 max_queue_pairs
= (__virtio16
*)&config
->buf
;
750 return lduw_le_p(max_queue_pairs
);
756 int net_init_vhost_vdpa(const Netdev
*netdev
, const char *name
,
757 NetClientState
*peer
, Error
**errp
)
759 const NetdevVhostVDPAOptions
*opts
;
762 g_autofree NetClientState
**ncs
= NULL
;
763 g_autoptr(VhostIOVATree
) iova_tree
= NULL
;
764 struct vhost_vdpa_iova_range iova_range
;
766 int queue_pairs
, r
, i
= 0, has_cvq
= 0;
768 assert(netdev
->type
== NET_CLIENT_DRIVER_VHOST_VDPA
);
769 opts
= &netdev
->u
.vhost_vdpa
;
770 if (!opts
->vhostdev
&& !opts
->vhostfd
) {
772 "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
776 if (opts
->vhostdev
&& opts
->vhostfd
) {
778 "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
782 if (opts
->vhostdev
) {
783 vdpa_device_fd
= qemu_open(opts
->vhostdev
, O_RDWR
, errp
);
784 if (vdpa_device_fd
== -1) {
789 vdpa_device_fd
= monitor_fd_param(monitor_cur(), opts
->vhostfd
, errp
);
790 if (vdpa_device_fd
== -1) {
791 error_prepend(errp
, "vhost-vdpa: unable to parse vhostfd: ");
796 r
= vhost_vdpa_get_features(vdpa_device_fd
, &features
, errp
);
797 if (unlikely(r
< 0)) {
801 queue_pairs
= vhost_vdpa_get_max_queue_pairs(vdpa_device_fd
, features
,
803 if (queue_pairs
< 0) {
804 qemu_close(vdpa_device_fd
);
808 r
= vhost_vdpa_get_iova_range(vdpa_device_fd
, &iova_range
);
809 if (unlikely(r
< 0)) {
810 error_setg(errp
, "vhost-vdpa: get iova range failed: %s",
816 if (!vhost_vdpa_net_valid_svq_features(features
, errp
)) {
820 iova_tree
= vhost_iova_tree_new(iova_range
.first
, iova_range
.last
);
823 ncs
= g_malloc0(sizeof(*ncs
) * queue_pairs
);
825 for (i
= 0; i
< queue_pairs
; i
++) {
826 ncs
[i
] = net_vhost_vdpa_init(peer
, TYPE_VHOST_VDPA
, name
,
827 vdpa_device_fd
, i
, 2, true, opts
->x_svq
,
828 iova_range
, iova_tree
);
834 nc
= net_vhost_vdpa_init(peer
, TYPE_VHOST_VDPA
, name
,
835 vdpa_device_fd
, i
, 1, false,
836 opts
->x_svq
, iova_range
, iova_tree
);
841 /* iova_tree ownership belongs to last NetClientState */
842 g_steal_pointer(&iova_tree
);
847 for (i
--; i
>= 0; i
--) {
848 qemu_del_net_client(ncs
[i
]);
853 qemu_close(vdpa_device_fd
);