4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
19 /* The alignment to use between consumer and producer parts of vring.
20 * x86 pagesize again. */
21 #define VIRTIO_PCI_VRING_ALIGN 4096
23 /* QEMU doesn't strictly need write barriers since everything runs in
24 * lock-step. We'll leave the calls to wmb() in though to make it obvious for
25 * KVM or if kqemu gets SMP support.
26 * In any case, we must prevent the compiler from reordering the code.
27 * TODO: we likely need some rmb()/mb() as well.
30 #define wmb() __asm__ __volatile__("": : :"memory")
32 typedef struct VRingDesc
40 typedef struct VRingAvail
47 typedef struct VRingUsedElem
53 typedef struct VRingUsed
57 VRingUsedElem ring
[0];
63 target_phys_addr_t desc
;
64 target_phys_addr_t avail
;
65 target_phys_addr_t used
;
71 target_phys_addr_t pa
;
72 uint16_t last_avail_idx
;
75 void (*handle_output
)(VirtIODevice
*vdev
, VirtQueue
*vq
);
78 #define VIRTIO_PCI_QUEUE_MAX 16
80 /* virt queue functions */
81 static void virtqueue_init(VirtQueue
*vq
)
83 target_phys_addr_t pa
= vq
->pa
;
86 vq
->vring
.avail
= pa
+ vq
->vring
.num
* sizeof(VRingDesc
);
87 vq
->vring
.used
= vring_align(vq
->vring
.avail
+
88 offsetof(VRingAvail
, ring
[vq
->vring
.num
]),
89 VIRTIO_PCI_VRING_ALIGN
);
92 static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa
, int i
)
94 target_phys_addr_t pa
;
95 pa
= desc_pa
+ sizeof(VRingDesc
) * i
+ offsetof(VRingDesc
, addr
);
99 static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa
, int i
)
101 target_phys_addr_t pa
;
102 pa
= desc_pa
+ sizeof(VRingDesc
) * i
+ offsetof(VRingDesc
, len
);
106 static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa
, int i
)
108 target_phys_addr_t pa
;
109 pa
= desc_pa
+ sizeof(VRingDesc
) * i
+ offsetof(VRingDesc
, flags
);
110 return lduw_phys(pa
);
113 static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa
, int i
)
115 target_phys_addr_t pa
;
116 pa
= desc_pa
+ sizeof(VRingDesc
) * i
+ offsetof(VRingDesc
, next
);
117 return lduw_phys(pa
);
120 static inline uint16_t vring_avail_flags(VirtQueue
*vq
)
122 target_phys_addr_t pa
;
123 pa
= vq
->vring
.avail
+ offsetof(VRingAvail
, flags
);
124 return lduw_phys(pa
);
127 static inline uint16_t vring_avail_idx(VirtQueue
*vq
)
129 target_phys_addr_t pa
;
130 pa
= vq
->vring
.avail
+ offsetof(VRingAvail
, idx
);
131 return lduw_phys(pa
);
134 static inline uint16_t vring_avail_ring(VirtQueue
*vq
, int i
)
136 target_phys_addr_t pa
;
137 pa
= vq
->vring
.avail
+ offsetof(VRingAvail
, ring
[i
]);
138 return lduw_phys(pa
);
141 static inline void vring_used_ring_id(VirtQueue
*vq
, int i
, uint32_t val
)
143 target_phys_addr_t pa
;
144 pa
= vq
->vring
.used
+ offsetof(VRingUsed
, ring
[i
].id
);
148 static inline void vring_used_ring_len(VirtQueue
*vq
, int i
, uint32_t val
)
150 target_phys_addr_t pa
;
151 pa
= vq
->vring
.used
+ offsetof(VRingUsed
, ring
[i
].len
);
155 static uint16_t vring_used_idx(VirtQueue
*vq
)
157 target_phys_addr_t pa
;
158 pa
= vq
->vring
.used
+ offsetof(VRingUsed
, idx
);
159 return lduw_phys(pa
);
162 static inline void vring_used_idx_increment(VirtQueue
*vq
, uint16_t val
)
164 target_phys_addr_t pa
;
165 pa
= vq
->vring
.used
+ offsetof(VRingUsed
, idx
);
166 stw_phys(pa
, vring_used_idx(vq
) + val
);
169 static inline void vring_used_flags_set_bit(VirtQueue
*vq
, int mask
)
171 target_phys_addr_t pa
;
172 pa
= vq
->vring
.used
+ offsetof(VRingUsed
, flags
);
173 stw_phys(pa
, lduw_phys(pa
) | mask
);
176 static inline void vring_used_flags_unset_bit(VirtQueue
*vq
, int mask
)
178 target_phys_addr_t pa
;
179 pa
= vq
->vring
.used
+ offsetof(VRingUsed
, flags
);
180 stw_phys(pa
, lduw_phys(pa
) & ~mask
);
183 void virtio_queue_set_notification(VirtQueue
*vq
, int enable
)
186 vring_used_flags_unset_bit(vq
, VRING_USED_F_NO_NOTIFY
);
188 vring_used_flags_set_bit(vq
, VRING_USED_F_NO_NOTIFY
);
191 int virtio_queue_ready(VirtQueue
*vq
)
193 return vq
->vring
.avail
!= 0;
196 int virtio_queue_empty(VirtQueue
*vq
)
198 return vring_avail_idx(vq
) == vq
->last_avail_idx
;
201 void virtqueue_fill(VirtQueue
*vq
, const VirtQueueElement
*elem
,
202 unsigned int len
, unsigned int idx
)
208 for (i
= 0; i
< elem
->in_num
; i
++) {
209 size_t size
= MIN(len
- offset
, elem
->in_sg
[i
].iov_len
);
211 cpu_physical_memory_unmap(elem
->in_sg
[i
].iov_base
,
212 elem
->in_sg
[i
].iov_len
,
215 offset
+= elem
->in_sg
[i
].iov_len
;
218 for (i
= 0; i
< elem
->out_num
; i
++)
219 cpu_physical_memory_unmap(elem
->out_sg
[i
].iov_base
,
220 elem
->out_sg
[i
].iov_len
,
221 0, elem
->out_sg
[i
].iov_len
);
223 idx
= (idx
+ vring_used_idx(vq
)) % vq
->vring
.num
;
225 /* Get a pointer to the next entry in the used ring. */
226 vring_used_ring_id(vq
, idx
, elem
->index
);
227 vring_used_ring_len(vq
, idx
, len
);
230 void virtqueue_flush(VirtQueue
*vq
, unsigned int count
)
232 /* Make sure buffer is written before we update index. */
234 vring_used_idx_increment(vq
, count
);
238 void virtqueue_push(VirtQueue
*vq
, const VirtQueueElement
*elem
,
241 virtqueue_fill(vq
, elem
, len
, 0);
242 virtqueue_flush(vq
, 1);
245 static int virtqueue_num_heads(VirtQueue
*vq
, unsigned int idx
)
247 uint16_t num_heads
= vring_avail_idx(vq
) - idx
;
249 /* Check it isn't doing very strange things with descriptor numbers. */
250 if (num_heads
> vq
->vring
.num
) {
251 fprintf(stderr
, "Guest moved used index from %u to %u",
252 idx
, vring_avail_idx(vq
));
259 static unsigned int virtqueue_get_head(VirtQueue
*vq
, unsigned int idx
)
263 /* Grab the next descriptor number they're advertising, and increment
264 * the index we've seen. */
265 head
= vring_avail_ring(vq
, idx
% vq
->vring
.num
);
267 /* If their number is silly, that's a fatal mistake. */
268 if (head
>= vq
->vring
.num
) {
269 fprintf(stderr
, "Guest says index %u is available", head
);
276 static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa
,
277 unsigned int i
, unsigned int max
)
281 /* If this descriptor says it doesn't chain, we're done. */
282 if (!(vring_desc_flags(desc_pa
, i
) & VRING_DESC_F_NEXT
))
285 /* Check they're not leading us off end of descriptors. */
286 next
= vring_desc_next(desc_pa
, i
);
287 /* Make sure compiler knows to grab that: we don't want it changing! */
291 fprintf(stderr
, "Desc next is %u", next
);
298 int virtqueue_avail_bytes(VirtQueue
*vq
, int in_bytes
, int out_bytes
)
301 int total_bufs
, in_total
, out_total
;
303 idx
= vq
->last_avail_idx
;
305 total_bufs
= in_total
= out_total
= 0;
306 while (virtqueue_num_heads(vq
, idx
)) {
307 unsigned int max
, num_bufs
, indirect
= 0;
308 target_phys_addr_t desc_pa
;
312 num_bufs
= total_bufs
;
313 i
= virtqueue_get_head(vq
, idx
++);
314 desc_pa
= vq
->vring
.desc
;
316 if (vring_desc_flags(desc_pa
, i
) & VRING_DESC_F_INDIRECT
) {
317 if (vring_desc_len(desc_pa
, i
) % sizeof(VRingDesc
)) {
318 fprintf(stderr
, "Invalid size for indirect buffer table\n");
322 /* If we've got too many, that implies a descriptor loop. */
323 if (num_bufs
>= max
) {
324 fprintf(stderr
, "Looped descriptor");
328 /* loop over the indirect descriptor table */
330 max
= vring_desc_len(desc_pa
, i
) / sizeof(VRingDesc
);
332 desc_pa
= vring_desc_addr(desc_pa
, i
);
336 /* If we've got too many, that implies a descriptor loop. */
337 if (++num_bufs
> max
) {
338 fprintf(stderr
, "Looped descriptor");
342 if (vring_desc_flags(desc_pa
, i
) & VRING_DESC_F_WRITE
) {
344 (in_total
+= vring_desc_len(desc_pa
, i
)) >= in_bytes
)
348 (out_total
+= vring_desc_len(desc_pa
, i
)) >= out_bytes
)
351 } while ((i
= virtqueue_next_desc(desc_pa
, i
, max
)) != max
);
354 total_bufs
= num_bufs
;
362 int virtqueue_pop(VirtQueue
*vq
, VirtQueueElement
*elem
)
364 unsigned int i
, head
, max
;
365 target_phys_addr_t desc_pa
= vq
->vring
.desc
;
366 target_phys_addr_t len
;
368 if (!virtqueue_num_heads(vq
, vq
->last_avail_idx
))
371 /* When we start there are none of either input nor output. */
372 elem
->out_num
= elem
->in_num
= 0;
376 i
= head
= virtqueue_get_head(vq
, vq
->last_avail_idx
++);
378 if (vring_desc_flags(desc_pa
, i
) & VRING_DESC_F_INDIRECT
) {
379 if (vring_desc_len(desc_pa
, i
) % sizeof(VRingDesc
)) {
380 fprintf(stderr
, "Invalid size for indirect buffer table\n");
384 /* loop over the indirect descriptor table */
385 max
= vring_desc_len(desc_pa
, i
) / sizeof(VRingDesc
);
386 desc_pa
= vring_desc_addr(desc_pa
, i
);
394 if (vring_desc_flags(desc_pa
, i
) & VRING_DESC_F_WRITE
) {
395 elem
->in_addr
[elem
->in_num
] = vring_desc_addr(desc_pa
, i
);
396 sg
= &elem
->in_sg
[elem
->in_num
++];
399 sg
= &elem
->out_sg
[elem
->out_num
++];
401 /* Grab the first descriptor, and check it's OK. */
402 sg
->iov_len
= vring_desc_len(desc_pa
, i
);
405 sg
->iov_base
= cpu_physical_memory_map(vring_desc_addr(desc_pa
, i
),
408 if (sg
->iov_base
== NULL
|| len
!= sg
->iov_len
) {
409 fprintf(stderr
, "virtio: trying to map MMIO memory\n");
413 /* If we've got too many, that implies a descriptor loop. */
414 if ((elem
->in_num
+ elem
->out_num
) > max
) {
415 fprintf(stderr
, "Looped descriptor");
418 } while ((i
= virtqueue_next_desc(desc_pa
, i
, max
)) != max
);
424 return elem
->in_num
+ elem
->out_num
;
428 static void virtio_notify_vector(VirtIODevice
*vdev
, uint16_t vector
)
430 if (vdev
->binding
->notify
) {
431 vdev
->binding
->notify(vdev
->binding_opaque
, vector
);
435 void virtio_update_irq(VirtIODevice
*vdev
)
437 virtio_notify_vector(vdev
, VIRTIO_NO_VECTOR
);
440 void virtio_reset(void *opaque
)
442 VirtIODevice
*vdev
= opaque
;
452 vdev
->config_vector
= VIRTIO_NO_VECTOR
;
453 virtio_notify_vector(vdev
, vdev
->config_vector
);
455 for(i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
456 vdev
->vq
[i
].vring
.desc
= 0;
457 vdev
->vq
[i
].vring
.avail
= 0;
458 vdev
->vq
[i
].vring
.used
= 0;
459 vdev
->vq
[i
].last_avail_idx
= 0;
461 vdev
->vq
[i
].vector
= VIRTIO_NO_VECTOR
;
465 uint32_t virtio_config_readb(VirtIODevice
*vdev
, uint32_t addr
)
469 vdev
->get_config(vdev
, vdev
->config
);
471 if (addr
> (vdev
->config_len
- sizeof(val
)))
474 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
478 uint32_t virtio_config_readw(VirtIODevice
*vdev
, uint32_t addr
)
482 vdev
->get_config(vdev
, vdev
->config
);
484 if (addr
> (vdev
->config_len
- sizeof(val
)))
487 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
491 uint32_t virtio_config_readl(VirtIODevice
*vdev
, uint32_t addr
)
495 vdev
->get_config(vdev
, vdev
->config
);
497 if (addr
> (vdev
->config_len
- sizeof(val
)))
500 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
504 void virtio_config_writeb(VirtIODevice
*vdev
, uint32_t addr
, uint32_t data
)
508 if (addr
> (vdev
->config_len
- sizeof(val
)))
511 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
513 if (vdev
->set_config
)
514 vdev
->set_config(vdev
, vdev
->config
);
517 void virtio_config_writew(VirtIODevice
*vdev
, uint32_t addr
, uint32_t data
)
521 if (addr
> (vdev
->config_len
- sizeof(val
)))
524 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
526 if (vdev
->set_config
)
527 vdev
->set_config(vdev
, vdev
->config
);
530 void virtio_config_writel(VirtIODevice
*vdev
, uint32_t addr
, uint32_t data
)
534 if (addr
> (vdev
->config_len
- sizeof(val
)))
537 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
539 if (vdev
->set_config
)
540 vdev
->set_config(vdev
, vdev
->config
);
543 void virtio_queue_set_addr(VirtIODevice
*vdev
, int n
, target_phys_addr_t addr
)
545 vdev
->vq
[n
].pa
= addr
;
546 virtqueue_init(&vdev
->vq
[n
]);
549 target_phys_addr_t
virtio_queue_get_addr(VirtIODevice
*vdev
, int n
)
551 return vdev
->vq
[n
].pa
;
554 int virtio_queue_get_num(VirtIODevice
*vdev
, int n
)
556 return vdev
->vq
[n
].vring
.num
;
559 void virtio_queue_notify(VirtIODevice
*vdev
, int n
)
561 if (n
< VIRTIO_PCI_QUEUE_MAX
&& vdev
->vq
[n
].vring
.desc
) {
562 vdev
->vq
[n
].handle_output(vdev
, &vdev
->vq
[n
]);
566 uint16_t virtio_queue_vector(VirtIODevice
*vdev
, int n
)
568 return n
< VIRTIO_PCI_QUEUE_MAX
? vdev
->vq
[n
].vector
:
572 void virtio_queue_set_vector(VirtIODevice
*vdev
, int n
, uint16_t vector
)
574 if (n
< VIRTIO_PCI_QUEUE_MAX
)
575 vdev
->vq
[n
].vector
= vector
;
578 VirtQueue
*virtio_add_queue(VirtIODevice
*vdev
, int queue_size
,
579 void (*handle_output
)(VirtIODevice
*, VirtQueue
*))
583 for (i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
584 if (vdev
->vq
[i
].vring
.num
== 0)
588 if (i
== VIRTIO_PCI_QUEUE_MAX
|| queue_size
> VIRTQUEUE_MAX_SIZE
)
591 vdev
->vq
[i
].vring
.num
= queue_size
;
592 vdev
->vq
[i
].handle_output
= handle_output
;
597 void virtio_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
599 /* Always notify when queue is empty (when feature acknowledge) */
600 if ((vring_avail_flags(vq
) & VRING_AVAIL_F_NO_INTERRUPT
) &&
601 (!(vdev
->features
& (1 << VIRTIO_F_NOTIFY_ON_EMPTY
)) ||
602 (vq
->inuse
|| vring_avail_idx(vq
) != vq
->last_avail_idx
)))
606 virtio_notify_vector(vdev
, vq
->vector
);
609 void virtio_notify_config(VirtIODevice
*vdev
)
611 if (!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
))
615 virtio_notify_vector(vdev
, vdev
->config_vector
);
618 void virtio_save(VirtIODevice
*vdev
, QEMUFile
*f
)
622 if (vdev
->binding
->save_config
)
623 vdev
->binding
->save_config(vdev
->binding_opaque
, f
);
625 qemu_put_8s(f
, &vdev
->status
);
626 qemu_put_8s(f
, &vdev
->isr
);
627 qemu_put_be16s(f
, &vdev
->queue_sel
);
628 qemu_put_be32s(f
, &vdev
->features
);
629 qemu_put_be32(f
, vdev
->config_len
);
630 qemu_put_buffer(f
, vdev
->config
, vdev
->config_len
);
632 for (i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
633 if (vdev
->vq
[i
].vring
.num
== 0)
639 for (i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
640 if (vdev
->vq
[i
].vring
.num
== 0)
643 qemu_put_be32(f
, vdev
->vq
[i
].vring
.num
);
644 qemu_put_be64(f
, vdev
->vq
[i
].pa
);
645 qemu_put_be16s(f
, &vdev
->vq
[i
].last_avail_idx
);
646 if (vdev
->binding
->save_queue
)
647 vdev
->binding
->save_queue(vdev
->binding_opaque
, i
, f
);
651 int virtio_load(VirtIODevice
*vdev
, QEMUFile
*f
)
655 uint32_t supported_features
= vdev
->get_features(vdev
) |
656 vdev
->binding
->get_features(vdev
->binding_opaque
);
658 if (vdev
->binding
->load_config
) {
659 ret
= vdev
->binding
->load_config(vdev
->binding_opaque
, f
);
664 qemu_get_8s(f
, &vdev
->status
);
665 qemu_get_8s(f
, &vdev
->isr
);
666 qemu_get_be16s(f
, &vdev
->queue_sel
);
667 qemu_get_be32s(f
, &features
);
668 if (features
& ~supported_features
) {
669 fprintf(stderr
, "Features 0x%x unsupported. Allowed features: 0x%x\n",
670 features
, supported_features
);
673 vdev
->features
= features
;
674 vdev
->config_len
= qemu_get_be32(f
);
675 qemu_get_buffer(f
, vdev
->config
, vdev
->config_len
);
677 num
= qemu_get_be32(f
);
679 for (i
= 0; i
< num
; i
++) {
680 vdev
->vq
[i
].vring
.num
= qemu_get_be32(f
);
681 vdev
->vq
[i
].pa
= qemu_get_be64(f
);
682 qemu_get_be16s(f
, &vdev
->vq
[i
].last_avail_idx
);
684 if (vdev
->vq
[i
].pa
) {
685 virtqueue_init(&vdev
->vq
[i
]);
687 if (vdev
->binding
->load_queue
) {
688 ret
= vdev
->binding
->load_queue(vdev
->binding_opaque
, i
, f
);
694 virtio_notify_vector(vdev
, VIRTIO_NO_VECTOR
);
698 void virtio_cleanup(VirtIODevice
*vdev
)
701 qemu_free(vdev
->config
);
705 VirtIODevice
*virtio_common_init(const char *name
, uint16_t device_id
,
706 size_t config_size
, size_t struct_size
)
711 vdev
= qemu_mallocz(struct_size
);
713 vdev
->device_id
= device_id
;
717 vdev
->config_vector
= VIRTIO_NO_VECTOR
;
718 vdev
->vq
= qemu_mallocz(sizeof(VirtQueue
) * VIRTIO_PCI_QUEUE_MAX
);
719 for(i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++)
720 vdev
->vq
[i
].vector
= VIRTIO_NO_VECTOR
;
723 vdev
->config_len
= config_size
;
724 if (vdev
->config_len
)
725 vdev
->config
= qemu_mallocz(config_size
);
732 void virtio_bind_device(VirtIODevice
*vdev
, const VirtIOBindings
*binding
,
735 vdev
->binding
= binding
;
736 vdev
->binding_opaque
= opaque
;