e1000: add interrupt mitigation support
[qemu/cris-port.git] / hw / virtio / virtio.c
blob2f1e73bc750cab541c7c94b16a1d7025d40c8737
1 /*
2 * Virtio Support
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <inttypes.h>
16 #include "trace.h"
17 #include "qemu/error-report.h"
18 #include "hw/virtio/virtio.h"
19 #include "qemu/atomic.h"
20 #include "hw/virtio/virtio-bus.h"
23 * The alignment to use between consumer and producer parts of vring.
24 * x86 pagesize again. This is the default, used by transports like PCI
25 * which don't provide a means for the guest to tell the host the alignment.
27 #define VIRTIO_PCI_VRING_ALIGN 4096
29 typedef struct VRingDesc
31 uint64_t addr;
32 uint32_t len;
33 uint16_t flags;
34 uint16_t next;
35 } VRingDesc;
37 typedef struct VRingAvail
39 uint16_t flags;
40 uint16_t idx;
41 uint16_t ring[0];
42 } VRingAvail;
44 typedef struct VRingUsedElem
46 uint32_t id;
47 uint32_t len;
48 } VRingUsedElem;
50 typedef struct VRingUsed
52 uint16_t flags;
53 uint16_t idx;
54 VRingUsedElem ring[0];
55 } VRingUsed;
57 typedef struct VRing
59 unsigned int num;
60 unsigned int align;
61 hwaddr desc;
62 hwaddr avail;
63 hwaddr used;
64 } VRing;
66 struct VirtQueue
68 VRing vring;
69 hwaddr pa;
70 uint16_t last_avail_idx;
71 /* Last used index value we have signalled on */
72 uint16_t signalled_used;
74 /* Last used index value we have signalled on */
75 bool signalled_used_valid;
77 /* Notification enabled? */
78 bool notification;
80 uint16_t queue_index;
82 int inuse;
84 uint16_t vector;
85 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
86 VirtIODevice *vdev;
87 EventNotifier guest_notifier;
88 EventNotifier host_notifier;
91 /* virt queue functions */
92 static void virtqueue_init(VirtQueue *vq)
94 hwaddr pa = vq->pa;
96 vq->vring.desc = pa;
97 vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
98 vq->vring.used = vring_align(vq->vring.avail +
99 offsetof(VRingAvail, ring[vq->vring.num]),
100 vq->vring.align);
103 static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
105 hwaddr pa;
106 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
107 return ldq_phys(pa);
110 static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
112 hwaddr pa;
113 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
114 return ldl_phys(pa);
117 static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
119 hwaddr pa;
120 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
121 return lduw_phys(pa);
124 static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
126 hwaddr pa;
127 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
128 return lduw_phys(pa);
131 static inline uint16_t vring_avail_flags(VirtQueue *vq)
133 hwaddr pa;
134 pa = vq->vring.avail + offsetof(VRingAvail, flags);
135 return lduw_phys(pa);
138 static inline uint16_t vring_avail_idx(VirtQueue *vq)
140 hwaddr pa;
141 pa = vq->vring.avail + offsetof(VRingAvail, idx);
142 return lduw_phys(pa);
145 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
147 hwaddr pa;
148 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
149 return lduw_phys(pa);
152 static inline uint16_t vring_used_event(VirtQueue *vq)
154 return vring_avail_ring(vq, vq->vring.num);
157 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
159 hwaddr pa;
160 pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
161 stl_phys(pa, val);
164 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
166 hwaddr pa;
167 pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
168 stl_phys(pa, val);
171 static uint16_t vring_used_idx(VirtQueue *vq)
173 hwaddr pa;
174 pa = vq->vring.used + offsetof(VRingUsed, idx);
175 return lduw_phys(pa);
178 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
180 hwaddr pa;
181 pa = vq->vring.used + offsetof(VRingUsed, idx);
182 stw_phys(pa, val);
185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
187 hwaddr pa;
188 pa = vq->vring.used + offsetof(VRingUsed, flags);
189 stw_phys(pa, lduw_phys(pa) | mask);
192 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
194 hwaddr pa;
195 pa = vq->vring.used + offsetof(VRingUsed, flags);
196 stw_phys(pa, lduw_phys(pa) & ~mask);
199 static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
201 hwaddr pa;
202 if (!vq->notification) {
203 return;
205 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
206 stw_phys(pa, val);
209 void virtio_queue_set_notification(VirtQueue *vq, int enable)
211 vq->notification = enable;
212 if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
213 vring_avail_event(vq, vring_avail_idx(vq));
214 } else if (enable) {
215 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
216 } else {
217 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
219 if (enable) {
220 /* Expose avail event/used flags before caller checks the avail idx. */
221 smp_mb();
225 int virtio_queue_ready(VirtQueue *vq)
227 return vq->vring.avail != 0;
230 int virtio_queue_empty(VirtQueue *vq)
232 return vring_avail_idx(vq) == vq->last_avail_idx;
235 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
236 unsigned int len, unsigned int idx)
238 unsigned int offset;
239 int i;
241 trace_virtqueue_fill(vq, elem, len, idx);
243 offset = 0;
244 for (i = 0; i < elem->in_num; i++) {
245 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
247 cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
248 elem->in_sg[i].iov_len,
249 1, size);
251 offset += size;
254 for (i = 0; i < elem->out_num; i++)
255 cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
256 elem->out_sg[i].iov_len,
257 0, elem->out_sg[i].iov_len);
259 idx = (idx + vring_used_idx(vq)) % vq->vring.num;
261 /* Get a pointer to the next entry in the used ring. */
262 vring_used_ring_id(vq, idx, elem->index);
263 vring_used_ring_len(vq, idx, len);
266 void virtqueue_flush(VirtQueue *vq, unsigned int count)
268 uint16_t old, new;
269 /* Make sure buffer is written before we update index. */
270 smp_wmb();
271 trace_virtqueue_flush(vq, count);
272 old = vring_used_idx(vq);
273 new = old + count;
274 vring_used_idx_set(vq, new);
275 vq->inuse -= count;
276 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
277 vq->signalled_used_valid = false;
280 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
281 unsigned int len)
283 virtqueue_fill(vq, elem, len, 0);
284 virtqueue_flush(vq, 1);
287 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
289 uint16_t num_heads = vring_avail_idx(vq) - idx;
291 /* Check it isn't doing very strange things with descriptor numbers. */
292 if (num_heads > vq->vring.num) {
293 error_report("Guest moved used index from %u to %u",
294 idx, vring_avail_idx(vq));
295 exit(1);
297 /* On success, callers read a descriptor at vq->last_avail_idx.
298 * Make sure descriptor read does not bypass avail index read. */
299 if (num_heads) {
300 smp_rmb();
303 return num_heads;
306 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
308 unsigned int head;
310 /* Grab the next descriptor number they're advertising, and increment
311 * the index we've seen. */
312 head = vring_avail_ring(vq, idx % vq->vring.num);
314 /* If their number is silly, that's a fatal mistake. */
315 if (head >= vq->vring.num) {
316 error_report("Guest says index %u is available", head);
317 exit(1);
320 return head;
323 static unsigned virtqueue_next_desc(hwaddr desc_pa,
324 unsigned int i, unsigned int max)
326 unsigned int next;
328 /* If this descriptor says it doesn't chain, we're done. */
329 if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
330 return max;
332 /* Check they're not leading us off end of descriptors. */
333 next = vring_desc_next(desc_pa, i);
334 /* Make sure compiler knows to grab that: we don't want it changing! */
335 smp_wmb();
337 if (next >= max) {
338 error_report("Desc next is %u", next);
339 exit(1);
342 return next;
345 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
346 unsigned int *out_bytes,
347 unsigned max_in_bytes, unsigned max_out_bytes)
349 unsigned int idx;
350 unsigned int total_bufs, in_total, out_total;
352 idx = vq->last_avail_idx;
354 total_bufs = in_total = out_total = 0;
355 while (virtqueue_num_heads(vq, idx)) {
356 unsigned int max, num_bufs, indirect = 0;
357 hwaddr desc_pa;
358 int i;
360 max = vq->vring.num;
361 num_bufs = total_bufs;
362 i = virtqueue_get_head(vq, idx++);
363 desc_pa = vq->vring.desc;
365 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
366 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
367 error_report("Invalid size for indirect buffer table");
368 exit(1);
371 /* If we've got too many, that implies a descriptor loop. */
372 if (num_bufs >= max) {
373 error_report("Looped descriptor");
374 exit(1);
377 /* loop over the indirect descriptor table */
378 indirect = 1;
379 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
380 desc_pa = vring_desc_addr(desc_pa, i);
381 num_bufs = i = 0;
384 do {
385 /* If we've got too many, that implies a descriptor loop. */
386 if (++num_bufs > max) {
387 error_report("Looped descriptor");
388 exit(1);
391 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
392 in_total += vring_desc_len(desc_pa, i);
393 } else {
394 out_total += vring_desc_len(desc_pa, i);
396 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
397 goto done;
399 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
401 if (!indirect)
402 total_bufs = num_bufs;
403 else
404 total_bufs++;
406 done:
407 if (in_bytes) {
408 *in_bytes = in_total;
410 if (out_bytes) {
411 *out_bytes = out_total;
415 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
416 unsigned int out_bytes)
418 unsigned int in_total, out_total;
420 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
421 return in_bytes <= in_total && out_bytes <= out_total;
424 void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
425 size_t num_sg, int is_write)
427 unsigned int i;
428 hwaddr len;
430 for (i = 0; i < num_sg; i++) {
431 len = sg[i].iov_len;
432 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
433 if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
434 error_report("virtio: trying to map MMIO memory");
435 exit(1);
440 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
442 unsigned int i, head, max;
443 hwaddr desc_pa = vq->vring.desc;
445 if (!virtqueue_num_heads(vq, vq->last_avail_idx))
446 return 0;
448 /* When we start there are none of either input nor output. */
449 elem->out_num = elem->in_num = 0;
451 max = vq->vring.num;
453 i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
454 if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
455 vring_avail_event(vq, vring_avail_idx(vq));
458 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
459 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
460 error_report("Invalid size for indirect buffer table");
461 exit(1);
464 /* loop over the indirect descriptor table */
465 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
466 desc_pa = vring_desc_addr(desc_pa, i);
467 i = 0;
470 /* Collect all the descriptors */
471 do {
472 struct iovec *sg;
474 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
475 if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
476 error_report("Too many write descriptors in indirect table");
477 exit(1);
479 elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
480 sg = &elem->in_sg[elem->in_num++];
481 } else {
482 if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
483 error_report("Too many read descriptors in indirect table");
484 exit(1);
486 elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
487 sg = &elem->out_sg[elem->out_num++];
490 sg->iov_len = vring_desc_len(desc_pa, i);
492 /* If we've got too many, that implies a descriptor loop. */
493 if ((elem->in_num + elem->out_num) > max) {
494 error_report("Looped descriptor");
495 exit(1);
497 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
499 /* Now map what we have collected */
500 virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
501 virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
503 elem->index = head;
505 vq->inuse++;
507 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
508 return elem->in_num + elem->out_num;
511 /* virtio device */
512 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
514 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
515 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
517 if (k->notify) {
518 k->notify(qbus->parent, vector);
522 void virtio_update_irq(VirtIODevice *vdev)
524 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
527 void virtio_set_status(VirtIODevice *vdev, uint8_t val)
529 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
530 trace_virtio_set_status(vdev, val);
532 if (k->set_status) {
533 k->set_status(vdev, val);
535 vdev->status = val;
538 void virtio_reset(void *opaque)
540 VirtIODevice *vdev = opaque;
541 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
542 int i;
544 virtio_set_status(vdev, 0);
546 if (k->reset) {
547 k->reset(vdev);
550 vdev->guest_features = 0;
551 vdev->queue_sel = 0;
552 vdev->status = 0;
553 vdev->isr = 0;
554 vdev->config_vector = VIRTIO_NO_VECTOR;
555 virtio_notify_vector(vdev, vdev->config_vector);
557 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
558 vdev->vq[i].vring.desc = 0;
559 vdev->vq[i].vring.avail = 0;
560 vdev->vq[i].vring.used = 0;
561 vdev->vq[i].last_avail_idx = 0;
562 vdev->vq[i].pa = 0;
563 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
564 vdev->vq[i].signalled_used = 0;
565 vdev->vq[i].signalled_used_valid = false;
566 vdev->vq[i].notification = true;
570 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
572 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
573 uint8_t val;
575 if (addr + sizeof(val) > vdev->config_len) {
576 return (uint32_t)-1;
579 k->get_config(vdev, vdev->config);
581 val = ldub_p(vdev->config + addr);
582 return val;
585 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
587 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
588 uint16_t val;
590 if (addr + sizeof(val) > vdev->config_len) {
591 return (uint32_t)-1;
594 k->get_config(vdev, vdev->config);
596 val = lduw_p(vdev->config + addr);
597 return val;
600 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
602 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
603 uint32_t val;
605 if (addr + sizeof(val) > vdev->config_len) {
606 return (uint32_t)-1;
609 k->get_config(vdev, vdev->config);
611 val = ldl_p(vdev->config + addr);
612 return val;
615 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
617 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
618 uint8_t val = data;
620 if (addr + sizeof(val) > vdev->config_len) {
621 return;
624 stb_p(vdev->config + addr, val);
626 if (k->set_config) {
627 k->set_config(vdev, vdev->config);
631 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
633 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
634 uint16_t val = data;
636 if (addr + sizeof(val) > vdev->config_len) {
637 return;
640 stw_p(vdev->config + addr, val);
642 if (k->set_config) {
643 k->set_config(vdev, vdev->config);
647 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
649 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
650 uint32_t val = data;
652 if (addr + sizeof(val) > vdev->config_len) {
653 return;
656 stl_p(vdev->config + addr, val);
658 if (k->set_config) {
659 k->set_config(vdev, vdev->config);
663 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
665 vdev->vq[n].pa = addr;
666 virtqueue_init(&vdev->vq[n]);
669 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
671 return vdev->vq[n].pa;
674 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
676 /* Don't allow guest to flip queue between existent and
677 * nonexistent states, or to set it to an invalid size.
679 if (!!num != !!vdev->vq[n].vring.num ||
680 num > VIRTQUEUE_MAX_SIZE ||
681 num < 0) {
682 return;
684 vdev->vq[n].vring.num = num;
685 virtqueue_init(&vdev->vq[n]);
688 int virtio_queue_get_num(VirtIODevice *vdev, int n)
690 return vdev->vq[n].vring.num;
693 int virtio_queue_get_id(VirtQueue *vq)
695 VirtIODevice *vdev = vq->vdev;
696 assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
697 return vq - &vdev->vq[0];
700 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
702 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
703 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
705 /* Check that the transport told us it was going to do this
706 * (so a buggy transport will immediately assert rather than
707 * silently failing to migrate this state)
709 assert(k->has_variable_vring_alignment);
711 vdev->vq[n].vring.align = align;
712 virtqueue_init(&vdev->vq[n]);
715 void virtio_queue_notify_vq(VirtQueue *vq)
717 if (vq->vring.desc) {
718 VirtIODevice *vdev = vq->vdev;
719 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
720 vq->handle_output(vdev, vq);
724 void virtio_queue_notify(VirtIODevice *vdev, int n)
726 virtio_queue_notify_vq(&vdev->vq[n]);
729 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
731 return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
732 VIRTIO_NO_VECTOR;
735 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
737 if (n < VIRTIO_PCI_QUEUE_MAX)
738 vdev->vq[n].vector = vector;
741 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
742 void (*handle_output)(VirtIODevice *, VirtQueue *))
744 int i;
746 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
747 if (vdev->vq[i].vring.num == 0)
748 break;
751 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
752 abort();
754 vdev->vq[i].vring.num = queue_size;
755 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
756 vdev->vq[i].handle_output = handle_output;
758 return &vdev->vq[i];
761 void virtio_del_queue(VirtIODevice *vdev, int n)
763 if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
764 abort();
767 vdev->vq[n].vring.num = 0;
770 void virtio_irq(VirtQueue *vq)
772 trace_virtio_irq(vq);
773 vq->vdev->isr |= 0x01;
774 virtio_notify_vector(vq->vdev, vq->vector);
777 /* Assuming a given event_idx value from the other size, if
778 * we have just incremented index from old to new_idx,
779 * should we trigger an event? */
780 static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
782 /* Note: Xen has similar logic for notification hold-off
783 * in include/xen/interface/io/ring.h with req_event and req_prod
784 * corresponding to event_idx + 1 and new respectively.
785 * Note also that req_event and req_prod in Xen start at 1,
786 * event indexes in virtio start at 0. */
787 return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
790 static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
792 uint16_t old, new;
793 bool v;
794 /* We need to expose used array entries before checking used event. */
795 smp_mb();
796 /* Always notify when queue is empty (when feature acknowledge) */
797 if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
798 !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
799 return true;
802 if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
803 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
806 v = vq->signalled_used_valid;
807 vq->signalled_used_valid = true;
808 old = vq->signalled_used;
809 new = vq->signalled_used = vring_used_idx(vq);
810 return !v || vring_need_event(vring_used_event(vq), new, old);
813 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
815 if (!vring_notify(vdev, vq)) {
816 return;
819 trace_virtio_notify(vdev, vq);
820 vdev->isr |= 0x01;
821 virtio_notify_vector(vdev, vq->vector);
824 void virtio_notify_config(VirtIODevice *vdev)
826 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
827 return;
829 vdev->isr |= 0x03;
830 virtio_notify_vector(vdev, vdev->config_vector);
833 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
835 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
836 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
837 int i;
839 if (k->save_config) {
840 k->save_config(qbus->parent, f);
843 qemu_put_8s(f, &vdev->status);
844 qemu_put_8s(f, &vdev->isr);
845 qemu_put_be16s(f, &vdev->queue_sel);
846 qemu_put_be32s(f, &vdev->guest_features);
847 qemu_put_be32(f, vdev->config_len);
848 qemu_put_buffer(f, vdev->config, vdev->config_len);
850 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
851 if (vdev->vq[i].vring.num == 0)
852 break;
855 qemu_put_be32(f, i);
857 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
858 if (vdev->vq[i].vring.num == 0)
859 break;
861 qemu_put_be32(f, vdev->vq[i].vring.num);
862 if (k->has_variable_vring_alignment) {
863 qemu_put_be32(f, vdev->vq[i].vring.align);
865 qemu_put_be64(f, vdev->vq[i].pa);
866 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
867 if (k->save_queue) {
868 k->save_queue(qbus->parent, i, f);
873 int virtio_set_features(VirtIODevice *vdev, uint32_t val)
875 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
876 VirtioBusClass *vbusk = VIRTIO_BUS_GET_CLASS(qbus);
877 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
878 uint32_t supported_features = vbusk->get_features(qbus->parent);
879 bool bad = (val & ~supported_features) != 0;
881 val &= supported_features;
882 if (k->set_features) {
883 k->set_features(vdev, val);
885 vdev->guest_features = val;
886 return bad ? -1 : 0;
889 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
891 int num, i, ret;
892 uint32_t features;
893 uint32_t supported_features;
894 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
895 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
897 if (k->load_config) {
898 ret = k->load_config(qbus->parent, f);
899 if (ret)
900 return ret;
903 qemu_get_8s(f, &vdev->status);
904 qemu_get_8s(f, &vdev->isr);
905 qemu_get_be16s(f, &vdev->queue_sel);
906 qemu_get_be32s(f, &features);
908 if (virtio_set_features(vdev, features) < 0) {
909 supported_features = k->get_features(qbus->parent);
910 error_report("Features 0x%x unsupported. Allowed features: 0x%x",
911 features, supported_features);
912 return -1;
914 vdev->config_len = qemu_get_be32(f);
915 qemu_get_buffer(f, vdev->config, vdev->config_len);
917 num = qemu_get_be32(f);
919 for (i = 0; i < num; i++) {
920 vdev->vq[i].vring.num = qemu_get_be32(f);
921 if (k->has_variable_vring_alignment) {
922 vdev->vq[i].vring.align = qemu_get_be32(f);
924 vdev->vq[i].pa = qemu_get_be64(f);
925 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
926 vdev->vq[i].signalled_used_valid = false;
927 vdev->vq[i].notification = true;
929 if (vdev->vq[i].pa) {
930 uint16_t nheads;
931 virtqueue_init(&vdev->vq[i]);
932 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
933 /* Check it isn't doing very strange things with descriptor numbers. */
934 if (nheads > vdev->vq[i].vring.num) {
935 error_report("VQ %d size 0x%x Guest index 0x%x "
936 "inconsistent with Host index 0x%x: delta 0x%x",
937 i, vdev->vq[i].vring.num,
938 vring_avail_idx(&vdev->vq[i]),
939 vdev->vq[i].last_avail_idx, nheads);
940 return -1;
942 } else if (vdev->vq[i].last_avail_idx) {
943 error_report("VQ %d address 0x0 "
944 "inconsistent with Host index 0x%x",
945 i, vdev->vq[i].last_avail_idx);
946 return -1;
948 if (k->load_queue) {
949 ret = k->load_queue(qbus->parent, i, f);
950 if (ret)
951 return ret;
955 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
956 return 0;
959 void virtio_cleanup(VirtIODevice *vdev)
961 qemu_del_vm_change_state_handler(vdev->vmstate);
962 g_free(vdev->config);
963 g_free(vdev->vq);
966 static void virtio_vmstate_change(void *opaque, int running, RunState state)
968 VirtIODevice *vdev = opaque;
969 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
970 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
971 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
972 vdev->vm_running = running;
974 if (backend_run) {
975 virtio_set_status(vdev, vdev->status);
978 if (k->vmstate_change) {
979 k->vmstate_change(qbus->parent, backend_run);
982 if (!backend_run) {
983 virtio_set_status(vdev, vdev->status);
987 void virtio_init(VirtIODevice *vdev, const char *name,
988 uint16_t device_id, size_t config_size)
990 int i;
991 vdev->device_id = device_id;
992 vdev->status = 0;
993 vdev->isr = 0;
994 vdev->queue_sel = 0;
995 vdev->config_vector = VIRTIO_NO_VECTOR;
996 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
997 vdev->vm_running = runstate_is_running();
998 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
999 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1000 vdev->vq[i].vdev = vdev;
1001 vdev->vq[i].queue_index = i;
1004 vdev->name = name;
1005 vdev->config_len = config_size;
1006 if (vdev->config_len) {
1007 vdev->config = g_malloc0(config_size);
1008 } else {
1009 vdev->config = NULL;
1011 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1012 vdev);
1015 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1017 return vdev->vq[n].vring.desc;
1020 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1022 return vdev->vq[n].vring.avail;
1025 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1027 return vdev->vq[n].vring.used;
1030 hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1032 return vdev->vq[n].vring.desc;
1035 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1037 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1040 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1042 return offsetof(VRingAvail, ring) +
1043 sizeof(uint64_t) * vdev->vq[n].vring.num;
1046 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1048 return offsetof(VRingUsed, ring) +
1049 sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1052 hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1054 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
1055 virtio_queue_get_used_size(vdev, n);
1058 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1060 return vdev->vq[n].last_avail_idx;
1063 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1065 vdev->vq[n].last_avail_idx = idx;
1068 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
1070 vdev->vq[n].signalled_used_valid = false;
1073 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
1075 return vdev->vq + n;
1078 uint16_t virtio_get_queue_index(VirtQueue *vq)
1080 return vq->queue_index;
1083 static void virtio_queue_guest_notifier_read(EventNotifier *n)
1085 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
1086 if (event_notifier_test_and_clear(n)) {
1087 virtio_irq(vq);
1091 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
1092 bool with_irqfd)
1094 if (assign && !with_irqfd) {
1095 event_notifier_set_handler(&vq->guest_notifier,
1096 virtio_queue_guest_notifier_read);
1097 } else {
1098 event_notifier_set_handler(&vq->guest_notifier, NULL);
1100 if (!assign) {
1101 /* Test and clear notifier before closing it,
1102 * in case poll callback didn't have time to run. */
1103 virtio_queue_guest_notifier_read(&vq->guest_notifier);
1107 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
1109 return &vq->guest_notifier;
1112 static void virtio_queue_host_notifier_read(EventNotifier *n)
1114 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
1115 if (event_notifier_test_and_clear(n)) {
1116 virtio_queue_notify_vq(vq);
1120 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
1121 bool set_handler)
1123 if (assign && set_handler) {
1124 event_notifier_set_handler(&vq->host_notifier,
1125 virtio_queue_host_notifier_read);
1126 } else {
1127 event_notifier_set_handler(&vq->host_notifier, NULL);
1129 if (!assign) {
1130 /* Test and clear notifier before after disabling event,
1131 * in case poll callback didn't have time to run. */
1132 virtio_queue_host_notifier_read(&vq->host_notifier);
1136 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
1138 return &vq->host_notifier;
1141 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
1143 if (vdev->bus_name) {
1144 g_free(vdev->bus_name);
1145 vdev->bus_name = NULL;
1148 if (bus_name) {
1149 vdev->bus_name = g_strdup(bus_name);
1153 static int virtio_device_init(DeviceState *qdev)
1155 VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1156 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
1157 assert(k->init != NULL);
1158 if (k->init(vdev) < 0) {
1159 return -1;
1161 virtio_bus_plug_device(vdev);
1162 return 0;
1165 static int virtio_device_exit(DeviceState *qdev)
1167 VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
1169 if (vdev->bus_name) {
1170 g_free(vdev->bus_name);
1171 vdev->bus_name = NULL;
1173 return 0;
1176 static void virtio_device_class_init(ObjectClass *klass, void *data)
1178 /* Set the default value here. */
1179 DeviceClass *dc = DEVICE_CLASS(klass);
1180 dc->init = virtio_device_init;
1181 dc->exit = virtio_device_exit;
1182 dc->bus_type = TYPE_VIRTIO_BUS;
1185 static const TypeInfo virtio_device_info = {
1186 .name = TYPE_VIRTIO_DEVICE,
1187 .parent = TYPE_DEVICE,
1188 .instance_size = sizeof(VirtIODevice),
1189 .class_init = virtio_device_class_init,
1190 .abstract = true,
1191 .class_size = sizeof(VirtioDeviceClass),
1194 static void virtio_register_types(void)
1196 type_register_static(&virtio_device_info);
1199 type_init(virtio_register_types)