2 * VDUSE (vDPA Device in Userspace) library
4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
5 * Portions of codes and concepts borrowed from libvhost-user.c, so:
6 * Copyright IBM, Corp. 2007
7 * Copyright (c) 2016 Red Hat, Inc.
10 * Xie Yongji <xieyongji@bytedance.com>
11 * Anthony Liguori <aliguori@us.ibm.com>
12 * Marc-André Lureau <mlureau@redhat.com>
13 * Victor Kaplansky <victork@redhat.com>
15 * This work is licensed under the terms of the GNU GPL, version 2 or
16 * later. See the COPYING file in the top-level directory.
32 #include <sys/ioctl.h>
33 #include <sys/eventfd.h>
36 #include "include/atomic.h"
37 #include "linux-headers/linux/virtio_ring.h"
38 #include "linux-headers/linux/virtio_config.h"
39 #include "linux-headers/linux/vduse.h"
42 #define VDUSE_VQ_ALIGN 4096
43 #define MAX_IOVA_REGIONS 256
45 #define LOG_ALIGNMENT 64
47 /* Round number down to multiple */
48 #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
50 /* Round number up to multiple */
51 #define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
54 #define unlikely(x) __builtin_expect(!!(x), 0)
57 typedef struct VduseDescStateSplit
{
62 } VduseDescStateSplit
;
64 typedef struct VduseVirtqLogInflight
{
68 uint16_t last_batch_head
;
70 VduseDescStateSplit desc
[];
71 } VduseVirtqLogInflight
;
73 typedef struct VduseVirtqLog
{
74 VduseVirtqLogInflight inflight
;
77 typedef struct VduseVirtqInflightDesc
{
80 } VduseVirtqInflightDesc
;
82 typedef struct VduseRing
{
87 struct vring_desc
*desc
;
88 struct vring_avail
*avail
;
89 struct vring_used
*used
;
94 uint16_t last_avail_idx
;
95 uint16_t shadow_avail_idx
;
97 uint16_t signalled_used
;
98 bool signalled_used_valid
;
104 VduseVirtqInflightDesc
*resubmit_list
;
105 uint16_t resubmit_num
;
110 typedef struct VduseIovaRegion
{
113 uint64_t mmap_offset
;
119 VduseIovaRegion regions
[MAX_IOVA_REGIONS
];
134 static inline size_t vduse_vq_log_size(uint16_t queue_size
)
136 return ALIGN_UP(sizeof(VduseDescStateSplit
) * queue_size
+
137 sizeof(VduseVirtqLogInflight
), LOG_ALIGNMENT
);
140 static void *vduse_log_get(const char *filename
, size_t size
)
142 void *ptr
= MAP_FAILED
;
145 fd
= open(filename
, O_RDWR
| O_CREAT
, 0600);
150 if (ftruncate(fd
, size
) == -1) {
154 ptr
= mmap(0, size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
, fd
, 0);
161 static inline bool has_feature(uint64_t features
, unsigned int fbit
)
164 return !!(features
& (1ULL << fbit
));
167 static inline bool vduse_dev_has_feature(VduseDev
*dev
, unsigned int fbit
)
169 return has_feature(dev
->features
, fbit
);
172 uint64_t vduse_get_virtio_features(void)
174 return (1ULL << VIRTIO_F_IOMMU_PLATFORM
) |
175 (1ULL << VIRTIO_F_VERSION_1
) |
176 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY
) |
177 (1ULL << VIRTIO_RING_F_EVENT_IDX
) |
178 (1ULL << VIRTIO_RING_F_INDIRECT_DESC
);
181 VduseDev
*vduse_queue_get_dev(VduseVirtq
*vq
)
186 int vduse_queue_get_fd(VduseVirtq
*vq
)
191 void *vduse_dev_get_priv(VduseDev
*dev
)
196 VduseVirtq
*vduse_dev_get_queue(VduseDev
*dev
, int index
)
198 return &dev
->vqs
[index
];
201 int vduse_dev_get_fd(VduseDev
*dev
)
206 static int vduse_inject_irq(VduseDev
*dev
, int index
)
208 return ioctl(dev
->fd
, VDUSE_VQ_INJECT_IRQ
, &index
);
211 static int inflight_desc_compare(const void *a
, const void *b
)
213 VduseVirtqInflightDesc
*desc0
= (VduseVirtqInflightDesc
*)a
,
214 *desc1
= (VduseVirtqInflightDesc
*)b
;
216 if (desc1
->counter
> desc0
->counter
&&
217 (desc1
->counter
- desc0
->counter
) < VIRTQUEUE_MAX_SIZE
* 2) {
224 static int vduse_queue_check_inflights(VduseVirtq
*vq
)
227 VduseDev
*dev
= vq
->dev
;
229 vq
->used_idx
= le16toh(vq
->vring
.used
->idx
);
230 vq
->resubmit_num
= 0;
231 vq
->resubmit_list
= NULL
;
234 if (unlikely(vq
->log
->inflight
.used_idx
!= vq
->used_idx
)) {
235 if (vq
->log
->inflight
.last_batch_head
> VIRTQUEUE_MAX_SIZE
) {
239 vq
->log
->inflight
.desc
[vq
->log
->inflight
.last_batch_head
].inflight
= 0;
243 vq
->log
->inflight
.used_idx
= vq
->used_idx
;
246 for (i
= 0; i
< vq
->log
->inflight
.desc_num
; i
++) {
247 if (vq
->log
->inflight
.desc
[i
].inflight
== 1) {
252 vq
->shadow_avail_idx
= vq
->last_avail_idx
= vq
->inuse
+ vq
->used_idx
;
255 vq
->resubmit_list
= calloc(vq
->inuse
, sizeof(VduseVirtqInflightDesc
));
256 if (!vq
->resubmit_list
) {
260 for (i
= 0; i
< vq
->log
->inflight
.desc_num
; i
++) {
261 if (vq
->log
->inflight
.desc
[i
].inflight
) {
262 vq
->resubmit_list
[vq
->resubmit_num
].index
= i
;
263 vq
->resubmit_list
[vq
->resubmit_num
].counter
=
264 vq
->log
->inflight
.desc
[i
].counter
;
269 if (vq
->resubmit_num
> 1) {
270 qsort(vq
->resubmit_list
, vq
->resubmit_num
,
271 sizeof(VduseVirtqInflightDesc
), inflight_desc_compare
);
273 vq
->counter
= vq
->resubmit_list
[0].counter
+ 1;
276 vduse_inject_irq(dev
, vq
->index
);
281 static int vduse_queue_inflight_get(VduseVirtq
*vq
, int desc_idx
)
283 vq
->log
->inflight
.desc
[desc_idx
].counter
= vq
->counter
++;
287 vq
->log
->inflight
.desc
[desc_idx
].inflight
= 1;
292 static int vduse_queue_inflight_pre_put(VduseVirtq
*vq
, int desc_idx
)
294 vq
->log
->inflight
.last_batch_head
= desc_idx
;
299 static int vduse_queue_inflight_post_put(VduseVirtq
*vq
, int desc_idx
)
301 vq
->log
->inflight
.desc
[desc_idx
].inflight
= 0;
305 vq
->log
->inflight
.used_idx
= vq
->used_idx
;
310 static void vduse_iova_remove_region(VduseDev
*dev
, uint64_t start
,
319 for (i
= 0; i
< MAX_IOVA_REGIONS
; i
++) {
320 if (!dev
->regions
[i
].mmap_addr
) {
324 if (start
<= dev
->regions
[i
].iova
&&
325 last
>= (dev
->regions
[i
].iova
+ dev
->regions
[i
].size
- 1)) {
326 munmap((void *)(uintptr_t)dev
->regions
[i
].mmap_addr
,
327 dev
->regions
[i
].mmap_offset
+ dev
->regions
[i
].size
);
328 dev
->regions
[i
].mmap_addr
= 0;
334 static int vduse_iova_add_region(VduseDev
*dev
, int fd
,
335 uint64_t offset
, uint64_t start
,
336 uint64_t last
, int prot
)
339 uint64_t size
= last
- start
+ 1;
340 void *mmap_addr
= mmap(0, size
+ offset
, prot
, MAP_SHARED
, fd
, 0);
342 if (mmap_addr
== MAP_FAILED
) {
347 for (i
= 0; i
< MAX_IOVA_REGIONS
; i
++) {
348 if (!dev
->regions
[i
].mmap_addr
) {
349 dev
->regions
[i
].mmap_addr
= (uint64_t)(uintptr_t)mmap_addr
;
350 dev
->regions
[i
].mmap_offset
= offset
;
351 dev
->regions
[i
].iova
= start
;
352 dev
->regions
[i
].size
= size
;
357 assert(i
< MAX_IOVA_REGIONS
);
363 static int perm_to_prot(uint8_t perm
)
368 case VDUSE_ACCESS_WO
:
371 case VDUSE_ACCESS_RO
:
374 case VDUSE_ACCESS_RW
:
375 prot
|= PROT_READ
| PROT_WRITE
;
384 static inline void *iova_to_va(VduseDev
*dev
, uint64_t *plen
, uint64_t iova
)
387 struct vduse_iotlb_entry entry
;
389 for (i
= 0; i
< MAX_IOVA_REGIONS
; i
++) {
390 VduseIovaRegion
*r
= &dev
->regions
[i
];
396 if ((iova
>= r
->iova
) && (iova
< (r
->iova
+ r
->size
))) {
397 if ((iova
+ *plen
) > (r
->iova
+ r
->size
)) {
398 *plen
= r
->iova
+ r
->size
- iova
;
400 return (void *)(uintptr_t)(iova
- r
->iova
+
401 r
->mmap_addr
+ r
->mmap_offset
);
406 entry
.last
= iova
+ 1;
407 ret
= ioctl(dev
->fd
, VDUSE_IOTLB_GET_FD
, &entry
);
412 if (!vduse_iova_add_region(dev
, ret
, entry
.offset
, entry
.start
,
413 entry
.last
, perm_to_prot(entry
.perm
))) {
414 return iova_to_va(dev
, plen
, iova
);
420 static inline uint16_t vring_avail_flags(VduseVirtq
*vq
)
422 return le16toh(vq
->vring
.avail
->flags
);
425 static inline uint16_t vring_avail_idx(VduseVirtq
*vq
)
427 vq
->shadow_avail_idx
= le16toh(vq
->vring
.avail
->idx
);
429 return vq
->shadow_avail_idx
;
432 static inline uint16_t vring_avail_ring(VduseVirtq
*vq
, int i
)
434 return le16toh(vq
->vring
.avail
->ring
[i
]);
437 static inline uint16_t vring_get_used_event(VduseVirtq
*vq
)
439 return vring_avail_ring(vq
, vq
->vring
.num
);
442 static bool vduse_queue_get_head(VduseVirtq
*vq
, unsigned int idx
,
446 * Grab the next descriptor number they're advertising, and increment
447 * the index we've seen.
449 *head
= vring_avail_ring(vq
, idx
% vq
->vring
.num
);
451 /* If their number is silly, that's a fatal mistake. */
452 if (*head
>= vq
->vring
.num
) {
453 fprintf(stderr
, "Guest says index %u is available\n", *head
);
461 vduse_queue_read_indirect_desc(VduseDev
*dev
, struct vring_desc
*desc
,
462 uint64_t addr
, size_t len
)
464 struct vring_desc
*ori_desc
;
467 if (len
> (VIRTQUEUE_MAX_SIZE
* sizeof(struct vring_desc
))) {
477 ori_desc
= iova_to_va(dev
, &read_len
, addr
);
482 memcpy(desc
, ori_desc
, read_len
);
492 VIRTQUEUE_READ_DESC_ERROR
= -1,
493 VIRTQUEUE_READ_DESC_DONE
= 0, /* end of chain */
494 VIRTQUEUE_READ_DESC_MORE
= 1, /* more buffers in chain */
497 static int vduse_queue_read_next_desc(struct vring_desc
*desc
, int i
,
498 unsigned int max
, unsigned int *next
)
500 /* If this descriptor says it doesn't chain, we're done. */
501 if (!(le16toh(desc
[i
].flags
) & VRING_DESC_F_NEXT
)) {
502 return VIRTQUEUE_READ_DESC_DONE
;
505 /* Check they're not leading us off end of descriptors. */
506 *next
= desc
[i
].next
;
507 /* Make sure compiler knows to grab that: we don't want it changing! */
511 fprintf(stderr
, "Desc next is %u\n", *next
);
512 return VIRTQUEUE_READ_DESC_ERROR
;
515 return VIRTQUEUE_READ_DESC_MORE
;
519 * Fetch avail_idx from VQ memory only when we really need to know if
520 * guest has added some buffers.
522 static bool vduse_queue_empty(VduseVirtq
*vq
)
524 if (unlikely(!vq
->vring
.avail
)) {
528 if (vq
->shadow_avail_idx
!= vq
->last_avail_idx
) {
532 return vring_avail_idx(vq
) == vq
->last_avail_idx
;
535 static bool vduse_queue_should_notify(VduseVirtq
*vq
)
537 VduseDev
*dev
= vq
->dev
;
541 /* We need to expose used array entries before checking used event. */
544 /* Always notify when queue is empty (when feature acknowledge) */
545 if (vduse_dev_has_feature(dev
, VIRTIO_F_NOTIFY_ON_EMPTY
) &&
546 !vq
->inuse
&& vduse_queue_empty(vq
)) {
550 if (!vduse_dev_has_feature(dev
, VIRTIO_RING_F_EVENT_IDX
)) {
551 return !(vring_avail_flags(vq
) & VRING_AVAIL_F_NO_INTERRUPT
);
554 v
= vq
->signalled_used_valid
;
555 vq
->signalled_used_valid
= true;
556 old
= vq
->signalled_used
;
557 new = vq
->signalled_used
= vq
->used_idx
;
558 return !v
|| vring_need_event(vring_get_used_event(vq
), new, old
);
561 void vduse_queue_notify(VduseVirtq
*vq
)
563 VduseDev
*dev
= vq
->dev
;
565 if (unlikely(!vq
->vring
.avail
)) {
569 if (!vduse_queue_should_notify(vq
)) {
573 if (vduse_inject_irq(dev
, vq
->index
) < 0) {
574 fprintf(stderr
, "Error inject irq for vq %d: %s\n",
575 vq
->index
, strerror(errno
));
579 static inline void vring_set_avail_event(VduseVirtq
*vq
, uint16_t val
)
581 *((uint16_t *)&vq
->vring
.used
->ring
[vq
->vring
.num
]) = htole16(val
);
584 static bool vduse_queue_map_single_desc(VduseVirtq
*vq
, unsigned int *p_num_sg
,
585 struct iovec
*iov
, unsigned int max_num_sg
,
586 bool is_write
, uint64_t pa
, size_t sz
)
588 unsigned num_sg
= *p_num_sg
;
589 VduseDev
*dev
= vq
->dev
;
591 assert(num_sg
<= max_num_sg
);
594 fprintf(stderr
, "virtio: zero sized buffers are not allowed\n");
601 if (num_sg
== max_num_sg
) {
603 "virtio: too many descriptors in indirect table\n");
607 iov
[num_sg
].iov_base
= iova_to_va(dev
, &len
, pa
);
608 if (iov
[num_sg
].iov_base
== NULL
) {
609 fprintf(stderr
, "virtio: invalid address for buffers\n");
612 iov
[num_sg
++].iov_len
= len
;
621 static void *vduse_queue_alloc_element(size_t sz
, unsigned out_num
,
624 VduseVirtqElement
*elem
;
625 size_t in_sg_ofs
= ALIGN_UP(sz
, __alignof__(elem
->in_sg
[0]));
626 size_t out_sg_ofs
= in_sg_ofs
+ in_num
* sizeof(elem
->in_sg
[0]);
627 size_t out_sg_end
= out_sg_ofs
+ out_num
* sizeof(elem
->out_sg
[0]);
629 assert(sz
>= sizeof(VduseVirtqElement
));
630 elem
= malloc(out_sg_end
);
634 elem
->out_num
= out_num
;
635 elem
->in_num
= in_num
;
636 elem
->in_sg
= (void *)elem
+ in_sg_ofs
;
637 elem
->out_sg
= (void *)elem
+ out_sg_ofs
;
641 static void *vduse_queue_map_desc(VduseVirtq
*vq
, unsigned int idx
, size_t sz
)
643 struct vring_desc
*desc
= vq
->vring
.desc
;
644 VduseDev
*dev
= vq
->dev
;
645 uint64_t desc_addr
, read_len
;
646 unsigned int desc_len
;
647 unsigned int max
= vq
->vring
.num
;
648 unsigned int i
= idx
;
649 VduseVirtqElement
*elem
;
650 struct iovec iov
[VIRTQUEUE_MAX_SIZE
];
651 struct vring_desc desc_buf
[VIRTQUEUE_MAX_SIZE
];
652 unsigned int out_num
= 0, in_num
= 0;
655 if (le16toh(desc
[i
].flags
) & VRING_DESC_F_INDIRECT
) {
656 if (le32toh(desc
[i
].len
) % sizeof(struct vring_desc
)) {
657 fprintf(stderr
, "Invalid size for indirect buffer table\n");
661 /* loop over the indirect descriptor table */
662 desc_addr
= le64toh(desc
[i
].addr
);
663 desc_len
= le32toh(desc
[i
].len
);
664 max
= desc_len
/ sizeof(struct vring_desc
);
666 desc
= iova_to_va(dev
, &read_len
, desc_addr
);
667 if (unlikely(desc
&& read_len
!= desc_len
)) {
668 /* Failed to use zero copy */
670 if (!vduse_queue_read_indirect_desc(dev
, desc_buf
,
677 fprintf(stderr
, "Invalid indirect buffer table\n");
683 /* Collect all the descriptors */
685 if (le16toh(desc
[i
].flags
) & VRING_DESC_F_WRITE
) {
686 if (!vduse_queue_map_single_desc(vq
, &in_num
, iov
+ out_num
,
687 VIRTQUEUE_MAX_SIZE
- out_num
,
688 true, le64toh(desc
[i
].addr
),
689 le32toh(desc
[i
].len
))) {
694 fprintf(stderr
, "Incorrect order for descriptors\n");
697 if (!vduse_queue_map_single_desc(vq
, &out_num
, iov
,
698 VIRTQUEUE_MAX_SIZE
, false,
699 le64toh(desc
[i
].addr
),
700 le32toh(desc
[i
].len
))) {
705 /* If we've got too many, that implies a descriptor loop. */
706 if ((in_num
+ out_num
) > max
) {
707 fprintf(stderr
, "Looped descriptor\n");
710 rc
= vduse_queue_read_next_desc(desc
, i
, max
, &i
);
711 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
713 if (rc
== VIRTQUEUE_READ_DESC_ERROR
) {
714 fprintf(stderr
, "read descriptor error\n");
718 /* Now copy what we have collected and mapped */
719 elem
= vduse_queue_alloc_element(sz
, out_num
, in_num
);
721 fprintf(stderr
, "read descriptor error\n");
725 for (i
= 0; i
< out_num
; i
++) {
726 elem
->out_sg
[i
] = iov
[i
];
728 for (i
= 0; i
< in_num
; i
++) {
729 elem
->in_sg
[i
] = iov
[out_num
+ i
];
735 void *vduse_queue_pop(VduseVirtq
*vq
, size_t sz
)
738 VduseVirtqElement
*elem
;
739 VduseDev
*dev
= vq
->dev
;
742 if (unlikely(!vq
->vring
.avail
)) {
746 if (unlikely(vq
->resubmit_list
&& vq
->resubmit_num
> 0)) {
747 i
= (--vq
->resubmit_num
);
748 elem
= vduse_queue_map_desc(vq
, vq
->resubmit_list
[i
].index
, sz
);
750 if (!vq
->resubmit_num
) {
751 free(vq
->resubmit_list
);
752 vq
->resubmit_list
= NULL
;
758 if (vduse_queue_empty(vq
)) {
761 /* Needed after virtio_queue_empty() */
764 if (vq
->inuse
>= vq
->vring
.num
) {
765 fprintf(stderr
, "Virtqueue size exceeded: %d\n", vq
->inuse
);
769 if (!vduse_queue_get_head(vq
, vq
->last_avail_idx
++, &head
)) {
773 if (vduse_dev_has_feature(dev
, VIRTIO_RING_F_EVENT_IDX
)) {
774 vring_set_avail_event(vq
, vq
->last_avail_idx
);
777 elem
= vduse_queue_map_desc(vq
, head
, sz
);
785 vduse_queue_inflight_get(vq
, head
);
790 static inline void vring_used_write(VduseVirtq
*vq
,
791 struct vring_used_elem
*uelem
, int i
)
793 struct vring_used
*used
= vq
->vring
.used
;
795 used
->ring
[i
] = *uelem
;
798 static void vduse_queue_fill(VduseVirtq
*vq
, const VduseVirtqElement
*elem
,
799 unsigned int len
, unsigned int idx
)
801 struct vring_used_elem uelem
;
803 if (unlikely(!vq
->vring
.used
)) {
807 idx
= (idx
+ vq
->used_idx
) % vq
->vring
.num
;
809 uelem
.id
= htole32(elem
->index
);
810 uelem
.len
= htole32(len
);
811 vring_used_write(vq
, &uelem
, idx
);
814 static inline void vring_used_idx_set(VduseVirtq
*vq
, uint16_t val
)
816 vq
->vring
.used
->idx
= htole16(val
);
820 static void vduse_queue_flush(VduseVirtq
*vq
, unsigned int count
)
824 if (unlikely(!vq
->vring
.used
)) {
828 /* Make sure buffer is written before we update index. */
833 vring_used_idx_set(vq
, new);
835 if (unlikely((int16_t)(new - vq
->signalled_used
) < (uint16_t)(new - old
))) {
836 vq
->signalled_used_valid
= false;
840 void vduse_queue_push(VduseVirtq
*vq
, const VduseVirtqElement
*elem
,
843 vduse_queue_fill(vq
, elem
, len
, 0);
844 vduse_queue_inflight_pre_put(vq
, elem
->index
);
845 vduse_queue_flush(vq
, 1);
846 vduse_queue_inflight_post_put(vq
, elem
->index
);
849 static int vduse_queue_update_vring(VduseVirtq
*vq
, uint64_t desc_addr
,
850 uint64_t avail_addr
, uint64_t used_addr
)
852 struct VduseDev
*dev
= vq
->dev
;
855 len
= sizeof(struct vring_desc
);
856 vq
->vring
.desc
= iova_to_va(dev
, &len
, desc_addr
);
857 if (len
!= sizeof(struct vring_desc
)) {
861 len
= sizeof(struct vring_avail
);
862 vq
->vring
.avail
= iova_to_va(dev
, &len
, avail_addr
);
863 if (len
!= sizeof(struct vring_avail
)) {
867 len
= sizeof(struct vring_used
);
868 vq
->vring
.used
= iova_to_va(dev
, &len
, used_addr
);
869 if (len
!= sizeof(struct vring_used
)) {
873 if (!vq
->vring
.desc
|| !vq
->vring
.avail
|| !vq
->vring
.used
) {
874 fprintf(stderr
, "Failed to get vq[%d] iova mapping\n", vq
->index
);
881 static void vduse_queue_enable(VduseVirtq
*vq
)
883 struct VduseDev
*dev
= vq
->dev
;
884 struct vduse_vq_info vq_info
;
885 struct vduse_vq_eventfd vq_eventfd
;
888 vq_info
.index
= vq
->index
;
889 if (ioctl(dev
->fd
, VDUSE_VQ_GET_INFO
, &vq_info
)) {
890 fprintf(stderr
, "Failed to get vq[%d] info: %s\n",
891 vq
->index
, strerror(errno
));
895 if (!vq_info
.ready
) {
899 vq
->vring
.num
= vq_info
.num
;
900 vq
->vring
.desc_addr
= vq_info
.desc_addr
;
901 vq
->vring
.avail_addr
= vq_info
.driver_addr
;
902 vq
->vring
.used_addr
= vq_info
.device_addr
;
904 if (vduse_queue_update_vring(vq
, vq_info
.desc_addr
,
905 vq_info
.driver_addr
, vq_info
.device_addr
)) {
906 fprintf(stderr
, "Failed to update vring for vq[%d]\n", vq
->index
);
910 fd
= eventfd(0, EFD_NONBLOCK
| EFD_CLOEXEC
);
912 fprintf(stderr
, "Failed to init eventfd for vq[%d]\n", vq
->index
);
916 vq_eventfd
.index
= vq
->index
;
918 if (ioctl(dev
->fd
, VDUSE_VQ_SETUP_KICKFD
, &vq_eventfd
)) {
919 fprintf(stderr
, "Failed to setup kick fd for vq[%d]\n", vq
->index
);
925 vq
->signalled_used_valid
= false;
928 if (vduse_queue_check_inflights(vq
)) {
929 fprintf(stderr
, "Failed to check inflights for vq[%d]\n", vq
->index
);
934 dev
->ops
->enable_queue(dev
, vq
);
937 static void vduse_queue_disable(VduseVirtq
*vq
)
939 struct VduseDev
*dev
= vq
->dev
;
940 struct vduse_vq_eventfd eventfd
;
946 dev
->ops
->disable_queue(dev
, vq
);
948 eventfd
.index
= vq
->index
;
949 eventfd
.fd
= VDUSE_EVENTFD_DEASSIGN
;
950 ioctl(dev
->fd
, VDUSE_VQ_SETUP_KICKFD
, &eventfd
);
953 assert(vq
->inuse
== 0);
956 vq
->vring
.desc_addr
= 0;
957 vq
->vring
.avail_addr
= 0;
958 vq
->vring
.used_addr
= 0;
966 static void vduse_dev_start_dataplane(VduseDev
*dev
)
970 if (ioctl(dev
->fd
, VDUSE_DEV_GET_FEATURES
, &dev
->features
)) {
971 fprintf(stderr
, "Failed to get features: %s\n", strerror(errno
));
974 assert(vduse_dev_has_feature(dev
, VIRTIO_F_VERSION_1
));
976 for (i
= 0; i
< dev
->num_queues
; i
++) {
977 vduse_queue_enable(&dev
->vqs
[i
]);
981 static void vduse_dev_stop_dataplane(VduseDev
*dev
)
983 size_t log_size
= dev
->num_queues
* vduse_vq_log_size(VIRTQUEUE_MAX_SIZE
);
986 for (i
= 0; i
< dev
->num_queues
; i
++) {
987 vduse_queue_disable(&dev
->vqs
[i
]);
990 memset(dev
->log
, 0, log_size
);
993 vduse_iova_remove_region(dev
, 0, ULONG_MAX
);
996 int vduse_dev_handler(VduseDev
*dev
)
998 struct vduse_dev_request req
;
999 struct vduse_dev_response resp
= { 0 };
1003 ret
= read(dev
->fd
, &req
, sizeof(req
));
1004 if (ret
!= sizeof(req
)) {
1005 fprintf(stderr
, "Read request error [%d]: %s\n",
1006 ret
, strerror(errno
));
1009 resp
.request_id
= req
.request_id
;
1012 case VDUSE_GET_VQ_STATE
:
1013 vq
= &dev
->vqs
[req
.vq_state
.index
];
1014 resp
.vq_state
.split
.avail_index
= vq
->last_avail_idx
;
1015 resp
.result
= VDUSE_REQ_RESULT_OK
;
1017 case VDUSE_SET_STATUS
:
1018 if (req
.s
.status
& VIRTIO_CONFIG_S_DRIVER_OK
) {
1019 vduse_dev_start_dataplane(dev
);
1020 } else if (req
.s
.status
== 0) {
1021 vduse_dev_stop_dataplane(dev
);
1023 resp
.result
= VDUSE_REQ_RESULT_OK
;
1025 case VDUSE_UPDATE_IOTLB
:
1026 /* The iova will be updated by iova_to_va() later, so just remove it */
1027 vduse_iova_remove_region(dev
, req
.iova
.start
, req
.iova
.last
);
1028 for (i
= 0; i
< dev
->num_queues
; i
++) {
1029 VduseVirtq
*vq
= &dev
->vqs
[i
];
1031 if (vduse_queue_update_vring(vq
, vq
->vring
.desc_addr
,
1032 vq
->vring
.avail_addr
,
1033 vq
->vring
.used_addr
)) {
1034 fprintf(stderr
, "Failed to update vring for vq[%d]\n",
1039 resp
.result
= VDUSE_REQ_RESULT_OK
;
1042 resp
.result
= VDUSE_REQ_RESULT_FAILED
;
1046 ret
= write(dev
->fd
, &resp
, sizeof(resp
));
1047 if (ret
!= sizeof(resp
)) {
1048 fprintf(stderr
, "Write request %d error [%d]: %s\n",
1049 req
.type
, ret
, strerror(errno
));
1055 int vduse_dev_update_config(VduseDev
*dev
, uint32_t size
,
1056 uint32_t offset
, char *buffer
)
1059 struct vduse_config_data
*data
;
1061 data
= malloc(offsetof(struct vduse_config_data
, buffer
) + size
);
1066 data
->offset
= offset
;
1067 data
->length
= size
;
1068 memcpy(data
->buffer
, buffer
, size
);
1070 ret
= ioctl(dev
->fd
, VDUSE_DEV_SET_CONFIG
, data
);
1077 if (ioctl(dev
->fd
, VDUSE_DEV_INJECT_CONFIG_IRQ
)) {
1084 int vduse_dev_setup_queue(VduseDev
*dev
, int index
, int max_size
)
1086 VduseVirtq
*vq
= &dev
->vqs
[index
];
1087 struct vduse_vq_config vq_config
= { 0 };
1089 if (max_size
> VIRTQUEUE_MAX_SIZE
) {
1093 vq_config
.index
= vq
->index
;
1094 vq_config
.max_size
= max_size
;
1096 if (ioctl(dev
->fd
, VDUSE_VQ_SETUP
, &vq_config
)) {
1100 vduse_queue_enable(vq
);
1105 int vduse_set_reconnect_log_file(VduseDev
*dev
, const char *filename
)
1108 size_t log_size
= dev
->num_queues
* vduse_vq_log_size(VIRTQUEUE_MAX_SIZE
);
1112 dev
->log
= log
= vduse_log_get(filename
, log_size
);
1113 if (log
== MAP_FAILED
) {
1114 fprintf(stderr
, "Failed to get vduse log\n");
1118 for (i
= 0; i
< dev
->num_queues
; i
++) {
1119 dev
->vqs
[i
].log
= log
;
1120 dev
->vqs
[i
].log
->inflight
.desc_num
= VIRTQUEUE_MAX_SIZE
;
1121 log
= (void *)((char *)log
+ vduse_vq_log_size(VIRTQUEUE_MAX_SIZE
));
1127 static int vduse_dev_init_vqs(VduseDev
*dev
, uint16_t num_queues
)
1132 vqs
= calloc(sizeof(VduseVirtq
), num_queues
);
1137 for (i
= 0; i
< num_queues
; i
++) {
1147 static int vduse_dev_init(VduseDev
*dev
, const char *name
,
1148 uint16_t num_queues
, const VduseOps
*ops
,
1151 char *dev_path
, *dev_name
;
1154 dev_path
= malloc(strlen(name
) + strlen("/dev/vduse/") + 1);
1158 sprintf(dev_path
, "/dev/vduse/%s", name
);
1160 fd
= open(dev_path
, O_RDWR
);
1163 fprintf(stderr
, "Failed to open vduse dev %s: %s\n",
1164 name
, strerror(errno
));
1168 if (ioctl(fd
, VDUSE_DEV_GET_FEATURES
, &dev
->features
)) {
1169 fprintf(stderr
, "Failed to get features: %s\n", strerror(errno
));
1174 dev_name
= strdup(name
);
1180 ret
= vduse_dev_init_vqs(dev
, num_queues
);
1187 dev
->name
= dev_name
;
1188 dev
->num_queues
= num_queues
;
1196 static inline bool vduse_name_is_invalid(const char *name
)
1198 return strlen(name
) >= VDUSE_NAME_MAX
|| strstr(name
, "..");
1201 VduseDev
*vduse_dev_create_by_fd(int fd
, uint16_t num_queues
,
1202 const VduseOps
*ops
, void *priv
)
1207 if (!ops
|| !ops
->enable_queue
|| !ops
->disable_queue
) {
1208 fprintf(stderr
, "Invalid parameter for vduse\n");
1212 dev
= calloc(sizeof(VduseDev
), 1);
1214 fprintf(stderr
, "Failed to allocate vduse device\n");
1218 if (ioctl(fd
, VDUSE_DEV_GET_FEATURES
, &dev
->features
)) {
1219 fprintf(stderr
, "Failed to get features: %s\n", strerror(errno
));
1224 ret
= vduse_dev_init_vqs(dev
, num_queues
);
1226 fprintf(stderr
, "Failed to init vqs\n");
1231 dev
->num_queues
= num_queues
;
1239 VduseDev
*vduse_dev_create_by_name(const char *name
, uint16_t num_queues
,
1240 const VduseOps
*ops
, void *priv
)
1245 if (!name
|| vduse_name_is_invalid(name
) || !ops
||
1246 !ops
->enable_queue
|| !ops
->disable_queue
) {
1247 fprintf(stderr
, "Invalid parameter for vduse\n");
1251 dev
= calloc(sizeof(VduseDev
), 1);
1253 fprintf(stderr
, "Failed to allocate vduse device\n");
1257 ret
= vduse_dev_init(dev
, name
, num_queues
, ops
, priv
);
1259 fprintf(stderr
, "Failed to init vduse device %s: %s\n",
1260 name
, strerror(-ret
));
1268 VduseDev
*vduse_dev_create(const char *name
, uint32_t device_id
,
1269 uint32_t vendor_id
, uint64_t features
,
1270 uint16_t num_queues
, uint32_t config_size
,
1271 char *config
, const VduseOps
*ops
, void *priv
)
1276 struct vduse_dev_config
*dev_config
;
1277 size_t size
= offsetof(struct vduse_dev_config
, config
);
1279 if (!name
|| vduse_name_is_invalid(name
) ||
1280 !has_feature(features
, VIRTIO_F_VERSION_1
) || !config
||
1281 !config_size
|| !ops
|| !ops
->enable_queue
|| !ops
->disable_queue
) {
1282 fprintf(stderr
, "Invalid parameter for vduse\n");
1286 dev
= calloc(sizeof(VduseDev
), 1);
1288 fprintf(stderr
, "Failed to allocate vduse device\n");
1292 ctrl_fd
= open("/dev/vduse/control", O_RDWR
);
1294 fprintf(stderr
, "Failed to open /dev/vduse/control: %s\n",
1299 version
= VDUSE_API_VERSION
;
1300 if (ioctl(ctrl_fd
, VDUSE_SET_API_VERSION
, &version
)) {
1301 fprintf(stderr
, "Failed to set api version %" PRIu64
": %s\n",
1302 version
, strerror(errno
));
1306 dev_config
= calloc(size
+ config_size
, 1);
1308 fprintf(stderr
, "Failed to allocate config space\n");
1312 assert(!vduse_name_is_invalid(name
));
1313 strcpy(dev_config
->name
, name
);
1314 dev_config
->device_id
= device_id
;
1315 dev_config
->vendor_id
= vendor_id
;
1316 dev_config
->features
= features
;
1317 dev_config
->vq_num
= num_queues
;
1318 dev_config
->vq_align
= VDUSE_VQ_ALIGN
;
1319 dev_config
->config_size
= config_size
;
1320 memcpy(dev_config
->config
, config
, config_size
);
1322 ret
= ioctl(ctrl_fd
, VDUSE_CREATE_DEV
, dev_config
);
1324 if (ret
&& errno
!= EEXIST
) {
1325 fprintf(stderr
, "Failed to create vduse device %s: %s\n",
1326 name
, strerror(errno
));
1329 dev
->ctrl_fd
= ctrl_fd
;
1331 ret
= vduse_dev_init(dev
, name
, num_queues
, ops
, priv
);
1333 fprintf(stderr
, "Failed to init vduse device %s: %s\n",
1334 name
, strerror(-ret
));
1340 ioctl(ctrl_fd
, VDUSE_DESTROY_DEV
, name
);
1349 int vduse_dev_destroy(VduseDev
*dev
)
1351 size_t log_size
= dev
->num_queues
* vduse_vq_log_size(VIRTQUEUE_MAX_SIZE
);
1355 munmap(dev
->log
, log_size
);
1357 for (i
= 0; i
< dev
->num_queues
; i
++) {
1358 free(dev
->vqs
[i
].resubmit_list
);
1365 if (dev
->ctrl_fd
>= 0) {
1366 if (ioctl(dev
->ctrl_fd
, VDUSE_DESTROY_DEV
, dev
->name
)) {
1369 close(dev
->ctrl_fd
);