Merge remote branch 'mst/for_anthony' into staging
[qemu/aliguori-queue.git] / hw / virtio.c
blob7c020a3383a7c0df01fe8d792e7b2ffa0590fb94
1 /*
2 * Virtio Support
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <inttypes.h>
16 #include "virtio.h"
17 #include "sysemu.h"
19 /* The alignment to use between consumer and producer parts of vring.
20 * x86 pagesize again. */
21 #define VIRTIO_PCI_VRING_ALIGN 4096
23 /* QEMU doesn't strictly need write barriers since everything runs in
24 * lock-step. We'll leave the calls to wmb() in though to make it obvious for
25 * KVM or if kqemu gets SMP support.
26 * In any case, we must prevent the compiler from reordering the code.
27 * TODO: we likely need some rmb()/mb() as well.
30 #define wmb() __asm__ __volatile__("": : :"memory")
32 typedef struct VRingDesc
34 uint64_t addr;
35 uint32_t len;
36 uint16_t flags;
37 uint16_t next;
38 } VRingDesc;
40 typedef struct VRingAvail
42 uint16_t flags;
43 uint16_t idx;
44 uint16_t ring[0];
45 } VRingAvail;
47 typedef struct VRingUsedElem
49 uint32_t id;
50 uint32_t len;
51 } VRingUsedElem;
53 typedef struct VRingUsed
55 uint16_t flags;
56 uint16_t idx;
57 VRingUsedElem ring[0];
58 } VRingUsed;
60 typedef struct VRing
62 unsigned int num;
63 target_phys_addr_t desc;
64 target_phys_addr_t avail;
65 target_phys_addr_t used;
66 } VRing;
68 struct VirtQueue
70 VRing vring;
71 target_phys_addr_t pa;
72 uint16_t last_avail_idx;
73 int inuse;
74 uint16_t vector;
75 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
78 /* virt queue functions */
79 static void virtqueue_init(VirtQueue *vq)
81 target_phys_addr_t pa = vq->pa;
83 vq->vring.desc = pa;
84 vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
85 vq->vring.used = vring_align(vq->vring.avail +
86 offsetof(VRingAvail, ring[vq->vring.num]),
87 VIRTIO_PCI_VRING_ALIGN);
90 static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
92 target_phys_addr_t pa;
93 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
94 return ldq_phys(pa);
97 static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
99 target_phys_addr_t pa;
100 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
101 return ldl_phys(pa);
104 static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
106 target_phys_addr_t pa;
107 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
108 return lduw_phys(pa);
111 static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
113 target_phys_addr_t pa;
114 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
115 return lduw_phys(pa);
118 static inline uint16_t vring_avail_flags(VirtQueue *vq)
120 target_phys_addr_t pa;
121 pa = vq->vring.avail + offsetof(VRingAvail, flags);
122 return lduw_phys(pa);
125 static inline uint16_t vring_avail_idx(VirtQueue *vq)
127 target_phys_addr_t pa;
128 pa = vq->vring.avail + offsetof(VRingAvail, idx);
129 return lduw_phys(pa);
132 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
134 target_phys_addr_t pa;
135 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
136 return lduw_phys(pa);
139 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
141 target_phys_addr_t pa;
142 pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
143 stl_phys(pa, val);
146 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
148 target_phys_addr_t pa;
149 pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
150 stl_phys(pa, val);
153 static uint16_t vring_used_idx(VirtQueue *vq)
155 target_phys_addr_t pa;
156 pa = vq->vring.used + offsetof(VRingUsed, idx);
157 return lduw_phys(pa);
160 static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
162 target_phys_addr_t pa;
163 pa = vq->vring.used + offsetof(VRingUsed, idx);
164 stw_phys(pa, vring_used_idx(vq) + val);
167 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
169 target_phys_addr_t pa;
170 pa = vq->vring.used + offsetof(VRingUsed, flags);
171 stw_phys(pa, lduw_phys(pa) | mask);
174 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
176 target_phys_addr_t pa;
177 pa = vq->vring.used + offsetof(VRingUsed, flags);
178 stw_phys(pa, lduw_phys(pa) & ~mask);
181 void virtio_queue_set_notification(VirtQueue *vq, int enable)
183 if (enable)
184 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
185 else
186 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
189 int virtio_queue_ready(VirtQueue *vq)
191 return vq->vring.avail != 0;
194 int virtio_queue_empty(VirtQueue *vq)
196 return vring_avail_idx(vq) == vq->last_avail_idx;
199 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
200 unsigned int len, unsigned int idx)
202 unsigned int offset;
203 int i;
205 offset = 0;
206 for (i = 0; i < elem->in_num; i++) {
207 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
209 cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
210 elem->in_sg[i].iov_len,
211 1, size);
213 offset += elem->in_sg[i].iov_len;
216 for (i = 0; i < elem->out_num; i++)
217 cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
218 elem->out_sg[i].iov_len,
219 0, elem->out_sg[i].iov_len);
221 idx = (idx + vring_used_idx(vq)) % vq->vring.num;
223 /* Get a pointer to the next entry in the used ring. */
224 vring_used_ring_id(vq, idx, elem->index);
225 vring_used_ring_len(vq, idx, len);
228 void virtqueue_flush(VirtQueue *vq, unsigned int count)
230 /* Make sure buffer is written before we update index. */
231 wmb();
232 vring_used_idx_increment(vq, count);
233 vq->inuse -= count;
236 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
237 unsigned int len)
239 virtqueue_fill(vq, elem, len, 0);
240 virtqueue_flush(vq, 1);
243 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
245 uint16_t num_heads = vring_avail_idx(vq) - idx;
247 /* Check it isn't doing very strange things with descriptor numbers. */
248 if (num_heads > vq->vring.num) {
249 fprintf(stderr, "Guest moved used index from %u to %u",
250 idx, vring_avail_idx(vq));
251 exit(1);
254 return num_heads;
257 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
259 unsigned int head;
261 /* Grab the next descriptor number they're advertising, and increment
262 * the index we've seen. */
263 head = vring_avail_ring(vq, idx % vq->vring.num);
265 /* If their number is silly, that's a fatal mistake. */
266 if (head >= vq->vring.num) {
267 fprintf(stderr, "Guest says index %u is available", head);
268 exit(1);
271 return head;
274 static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
275 unsigned int i, unsigned int max)
277 unsigned int next;
279 /* If this descriptor says it doesn't chain, we're done. */
280 if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
281 return max;
283 /* Check they're not leading us off end of descriptors. */
284 next = vring_desc_next(desc_pa, i);
285 /* Make sure compiler knows to grab that: we don't want it changing! */
286 wmb();
288 if (next >= max) {
289 fprintf(stderr, "Desc next is %u", next);
290 exit(1);
293 return next;
296 int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
298 unsigned int idx;
299 int total_bufs, in_total, out_total;
301 idx = vq->last_avail_idx;
303 total_bufs = in_total = out_total = 0;
304 while (virtqueue_num_heads(vq, idx)) {
305 unsigned int max, num_bufs, indirect = 0;
306 target_phys_addr_t desc_pa;
307 int i;
309 max = vq->vring.num;
310 num_bufs = total_bufs;
311 i = virtqueue_get_head(vq, idx++);
312 desc_pa = vq->vring.desc;
314 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
315 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
316 fprintf(stderr, "Invalid size for indirect buffer table\n");
317 exit(1);
320 /* If we've got too many, that implies a descriptor loop. */
321 if (num_bufs >= max) {
322 fprintf(stderr, "Looped descriptor");
323 exit(1);
326 /* loop over the indirect descriptor table */
327 indirect = 1;
328 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
329 num_bufs = i = 0;
330 desc_pa = vring_desc_addr(desc_pa, i);
333 do {
334 /* If we've got too many, that implies a descriptor loop. */
335 if (++num_bufs > max) {
336 fprintf(stderr, "Looped descriptor");
337 exit(1);
340 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
341 if (in_bytes > 0 &&
342 (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
343 return 1;
344 } else {
345 if (out_bytes > 0 &&
346 (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
347 return 1;
349 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
351 if (!indirect)
352 total_bufs = num_bufs;
353 else
354 total_bufs++;
357 return 0;
360 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
362 unsigned int i, head, max;
363 target_phys_addr_t desc_pa = vq->vring.desc;
364 target_phys_addr_t len;
366 if (!virtqueue_num_heads(vq, vq->last_avail_idx))
367 return 0;
369 /* When we start there are none of either input nor output. */
370 elem->out_num = elem->in_num = 0;
372 max = vq->vring.num;
374 i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
376 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
377 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
378 fprintf(stderr, "Invalid size for indirect buffer table\n");
379 exit(1);
382 /* loop over the indirect descriptor table */
383 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
384 desc_pa = vring_desc_addr(desc_pa, i);
385 i = 0;
388 do {
389 struct iovec *sg;
390 int is_write = 0;
392 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
393 elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
394 sg = &elem->in_sg[elem->in_num++];
395 is_write = 1;
396 } else
397 sg = &elem->out_sg[elem->out_num++];
399 /* Grab the first descriptor, and check it's OK. */
400 sg->iov_len = vring_desc_len(desc_pa, i);
401 len = sg->iov_len;
403 sg->iov_base = cpu_physical_memory_map(vring_desc_addr(desc_pa, i),
404 &len, is_write);
406 if (sg->iov_base == NULL || len != sg->iov_len) {
407 fprintf(stderr, "virtio: trying to map MMIO memory\n");
408 exit(1);
411 /* If we've got too many, that implies a descriptor loop. */
412 if ((elem->in_num + elem->out_num) > max) {
413 fprintf(stderr, "Looped descriptor");
414 exit(1);
416 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
418 elem->index = head;
420 vq->inuse++;
422 return elem->in_num + elem->out_num;
425 /* virtio device */
426 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
428 if (vdev->binding->notify) {
429 vdev->binding->notify(vdev->binding_opaque, vector);
433 void virtio_update_irq(VirtIODevice *vdev)
435 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
438 void virtio_reset(void *opaque)
440 VirtIODevice *vdev = opaque;
441 int i;
443 if (vdev->reset)
444 vdev->reset(vdev);
446 vdev->guest_features = 0;
447 vdev->queue_sel = 0;
448 vdev->status = 0;
449 vdev->isr = 0;
450 vdev->config_vector = VIRTIO_NO_VECTOR;
451 virtio_notify_vector(vdev, vdev->config_vector);
453 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
454 vdev->vq[i].vring.desc = 0;
455 vdev->vq[i].vring.avail = 0;
456 vdev->vq[i].vring.used = 0;
457 vdev->vq[i].last_avail_idx = 0;
458 vdev->vq[i].pa = 0;
459 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
463 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
465 uint8_t val;
467 vdev->get_config(vdev, vdev->config);
469 if (addr > (vdev->config_len - sizeof(val)))
470 return (uint32_t)-1;
472 memcpy(&val, vdev->config + addr, sizeof(val));
473 return val;
476 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
478 uint16_t val;
480 vdev->get_config(vdev, vdev->config);
482 if (addr > (vdev->config_len - sizeof(val)))
483 return (uint32_t)-1;
485 memcpy(&val, vdev->config + addr, sizeof(val));
486 return val;
489 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
491 uint32_t val;
493 vdev->get_config(vdev, vdev->config);
495 if (addr > (vdev->config_len - sizeof(val)))
496 return (uint32_t)-1;
498 memcpy(&val, vdev->config + addr, sizeof(val));
499 return val;
502 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
504 uint8_t val = data;
506 if (addr > (vdev->config_len - sizeof(val)))
507 return;
509 memcpy(vdev->config + addr, &val, sizeof(val));
511 if (vdev->set_config)
512 vdev->set_config(vdev, vdev->config);
515 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
517 uint16_t val = data;
519 if (addr > (vdev->config_len - sizeof(val)))
520 return;
522 memcpy(vdev->config + addr, &val, sizeof(val));
524 if (vdev->set_config)
525 vdev->set_config(vdev, vdev->config);
528 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
530 uint32_t val = data;
532 if (addr > (vdev->config_len - sizeof(val)))
533 return;
535 memcpy(vdev->config + addr, &val, sizeof(val));
537 if (vdev->set_config)
538 vdev->set_config(vdev, vdev->config);
541 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
543 vdev->vq[n].pa = addr;
544 virtqueue_init(&vdev->vq[n]);
547 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
549 return vdev->vq[n].pa;
552 int virtio_queue_get_num(VirtIODevice *vdev, int n)
554 return vdev->vq[n].vring.num;
557 void virtio_queue_notify(VirtIODevice *vdev, int n)
559 if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
560 vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
564 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
566 return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
567 VIRTIO_NO_VECTOR;
570 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
572 if (n < VIRTIO_PCI_QUEUE_MAX)
573 vdev->vq[n].vector = vector;
576 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
577 void (*handle_output)(VirtIODevice *, VirtQueue *))
579 int i;
581 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
582 if (vdev->vq[i].vring.num == 0)
583 break;
586 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
587 abort();
589 vdev->vq[i].vring.num = queue_size;
590 vdev->vq[i].handle_output = handle_output;
592 return &vdev->vq[i];
595 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
597 /* Always notify when queue is empty (when feature acknowledge) */
598 if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
599 (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
600 (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
601 return;
603 vdev->isr |= 0x01;
604 virtio_notify_vector(vdev, vq->vector);
607 void virtio_notify_config(VirtIODevice *vdev)
609 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
610 return;
612 vdev->isr |= 0x03;
613 virtio_notify_vector(vdev, vdev->config_vector);
616 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
618 int i;
620 if (vdev->binding->save_config)
621 vdev->binding->save_config(vdev->binding_opaque, f);
623 qemu_put_8s(f, &vdev->status);
624 qemu_put_8s(f, &vdev->isr);
625 qemu_put_be16s(f, &vdev->queue_sel);
626 qemu_put_be32s(f, &vdev->guest_features);
627 qemu_put_be32(f, vdev->config_len);
628 qemu_put_buffer(f, vdev->config, vdev->config_len);
630 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
631 if (vdev->vq[i].vring.num == 0)
632 break;
635 qemu_put_be32(f, i);
637 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
638 if (vdev->vq[i].vring.num == 0)
639 break;
641 qemu_put_be32(f, vdev->vq[i].vring.num);
642 qemu_put_be64(f, vdev->vq[i].pa);
643 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
644 if (vdev->binding->save_queue)
645 vdev->binding->save_queue(vdev->binding_opaque, i, f);
649 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
651 int num, i, ret;
652 uint32_t features;
653 uint32_t supported_features =
654 vdev->binding->get_features(vdev->binding_opaque);
656 if (vdev->binding->load_config) {
657 ret = vdev->binding->load_config(vdev->binding_opaque, f);
658 if (ret)
659 return ret;
662 qemu_get_8s(f, &vdev->status);
663 qemu_get_8s(f, &vdev->isr);
664 qemu_get_be16s(f, &vdev->queue_sel);
665 qemu_get_be32s(f, &features);
666 if (features & ~supported_features) {
667 fprintf(stderr, "Features 0x%x unsupported. Allowed features: 0x%x\n",
668 features, supported_features);
669 return -1;
671 vdev->guest_features = features;
672 vdev->config_len = qemu_get_be32(f);
673 qemu_get_buffer(f, vdev->config, vdev->config_len);
675 num = qemu_get_be32(f);
677 for (i = 0; i < num; i++) {
678 vdev->vq[i].vring.num = qemu_get_be32(f);
679 vdev->vq[i].pa = qemu_get_be64(f);
680 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
682 if (vdev->vq[i].pa) {
683 virtqueue_init(&vdev->vq[i]);
685 if (vdev->binding->load_queue) {
686 ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
687 if (ret)
688 return ret;
692 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
693 return 0;
696 void virtio_cleanup(VirtIODevice *vdev)
698 if (vdev->config)
699 qemu_free(vdev->config);
700 qemu_free(vdev->vq);
703 VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
704 size_t config_size, size_t struct_size)
706 VirtIODevice *vdev;
707 int i;
709 vdev = qemu_mallocz(struct_size);
711 vdev->device_id = device_id;
712 vdev->status = 0;
713 vdev->isr = 0;
714 vdev->queue_sel = 0;
715 vdev->config_vector = VIRTIO_NO_VECTOR;
716 vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
717 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++)
718 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
720 vdev->name = name;
721 vdev->config_len = config_size;
722 if (vdev->config_len)
723 vdev->config = qemu_mallocz(config_size);
724 else
725 vdev->config = NULL;
727 return vdev;
730 void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
731 void *opaque)
733 vdev->binding = binding;
734 vdev->binding_opaque = opaque;