Support PCI based option rom loading
[qemu.git] / hw / virtio.c
blobcecd0dc04258314ab77b85db58b252c681587d02
1 /*
2 * Virtio Support
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <inttypes.h>
16 #include "virtio.h"
17 #include "sysemu.h"
19 /* The alignment to use between consumer and producer parts of vring.
20 * x86 pagesize again. */
21 #define VIRTIO_PCI_VRING_ALIGN 4096
23 /* QEMU doesn't strictly need write barriers since everything runs in
24 * lock-step. We'll leave the calls to wmb() in though to make it obvious for
25 * KVM or if kqemu gets SMP support.
26 * In any case, we must prevent the compiler from reordering the code.
27 * TODO: we likely need some rmb()/mb() as well.
30 #define wmb() __asm__ __volatile__("": : :"memory")
32 typedef struct VRingDesc
34 uint64_t addr;
35 uint32_t len;
36 uint16_t flags;
37 uint16_t next;
38 } VRingDesc;
40 typedef struct VRingAvail
42 uint16_t flags;
43 uint16_t idx;
44 uint16_t ring[0];
45 } VRingAvail;
47 typedef struct VRingUsedElem
49 uint32_t id;
50 uint32_t len;
51 } VRingUsedElem;
53 typedef struct VRingUsed
55 uint16_t flags;
56 uint16_t idx;
57 VRingUsedElem ring[0];
58 } VRingUsed;
60 typedef struct VRing
62 unsigned int num;
63 target_phys_addr_t desc;
64 target_phys_addr_t avail;
65 target_phys_addr_t used;
66 } VRing;
68 struct VirtQueue
70 VRing vring;
71 target_phys_addr_t pa;
72 uint16_t last_avail_idx;
73 int inuse;
74 uint16_t vector;
75 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
78 #define VIRTIO_PCI_QUEUE_MAX 16
80 /* virt queue functions */
81 static void virtqueue_init(VirtQueue *vq)
83 target_phys_addr_t pa = vq->pa;
85 vq->vring.desc = pa;
86 vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
87 vq->vring.used = vring_align(vq->vring.avail +
88 offsetof(VRingAvail, ring[vq->vring.num]),
89 VIRTIO_PCI_VRING_ALIGN);
92 static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
94 target_phys_addr_t pa;
95 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
96 return ldq_phys(pa);
99 static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
101 target_phys_addr_t pa;
102 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
103 return ldl_phys(pa);
106 static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
108 target_phys_addr_t pa;
109 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
110 return lduw_phys(pa);
113 static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
115 target_phys_addr_t pa;
116 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
117 return lduw_phys(pa);
120 static inline uint16_t vring_avail_flags(VirtQueue *vq)
122 target_phys_addr_t pa;
123 pa = vq->vring.avail + offsetof(VRingAvail, flags);
124 return lduw_phys(pa);
127 static inline uint16_t vring_avail_idx(VirtQueue *vq)
129 target_phys_addr_t pa;
130 pa = vq->vring.avail + offsetof(VRingAvail, idx);
131 return lduw_phys(pa);
134 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
136 target_phys_addr_t pa;
137 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
138 return lduw_phys(pa);
141 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
143 target_phys_addr_t pa;
144 pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
145 stl_phys(pa, val);
148 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
150 target_phys_addr_t pa;
151 pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
152 stl_phys(pa, val);
155 static uint16_t vring_used_idx(VirtQueue *vq)
157 target_phys_addr_t pa;
158 pa = vq->vring.used + offsetof(VRingUsed, idx);
159 return lduw_phys(pa);
162 static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
164 target_phys_addr_t pa;
165 pa = vq->vring.used + offsetof(VRingUsed, idx);
166 stw_phys(pa, vring_used_idx(vq) + val);
169 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
171 target_phys_addr_t pa;
172 pa = vq->vring.used + offsetof(VRingUsed, flags);
173 stw_phys(pa, lduw_phys(pa) | mask);
176 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
178 target_phys_addr_t pa;
179 pa = vq->vring.used + offsetof(VRingUsed, flags);
180 stw_phys(pa, lduw_phys(pa) & ~mask);
183 void virtio_queue_set_notification(VirtQueue *vq, int enable)
185 if (enable)
186 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
187 else
188 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
191 int virtio_queue_ready(VirtQueue *vq)
193 return vq->vring.avail != 0;
196 int virtio_queue_empty(VirtQueue *vq)
198 return vring_avail_idx(vq) == vq->last_avail_idx;
201 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
202 unsigned int len, unsigned int idx)
204 unsigned int offset;
205 int i;
207 offset = 0;
208 for (i = 0; i < elem->in_num; i++) {
209 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
211 cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
212 elem->in_sg[i].iov_len,
213 1, size);
215 offset += elem->in_sg[i].iov_len;
218 for (i = 0; i < elem->out_num; i++)
219 cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
220 elem->out_sg[i].iov_len,
221 0, elem->out_sg[i].iov_len);
223 idx = (idx + vring_used_idx(vq)) % vq->vring.num;
225 /* Get a pointer to the next entry in the used ring. */
226 vring_used_ring_id(vq, idx, elem->index);
227 vring_used_ring_len(vq, idx, len);
230 void virtqueue_flush(VirtQueue *vq, unsigned int count)
232 /* Make sure buffer is written before we update index. */
233 wmb();
234 vring_used_idx_increment(vq, count);
235 vq->inuse -= count;
238 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
239 unsigned int len)
241 virtqueue_fill(vq, elem, len, 0);
242 virtqueue_flush(vq, 1);
245 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
247 uint16_t num_heads = vring_avail_idx(vq) - idx;
249 /* Check it isn't doing very strange things with descriptor numbers. */
250 if (num_heads > vq->vring.num) {
251 fprintf(stderr, "Guest moved used index from %u to %u",
252 idx, vring_avail_idx(vq));
253 exit(1);
256 return num_heads;
259 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
261 unsigned int head;
263 /* Grab the next descriptor number they're advertising, and increment
264 * the index we've seen. */
265 head = vring_avail_ring(vq, idx % vq->vring.num);
267 /* If their number is silly, that's a fatal mistake. */
268 if (head >= vq->vring.num) {
269 fprintf(stderr, "Guest says index %u is available", head);
270 exit(1);
273 return head;
276 static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
277 unsigned int i, unsigned int max)
279 unsigned int next;
281 /* If this descriptor says it doesn't chain, we're done. */
282 if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
283 return max;
285 /* Check they're not leading us off end of descriptors. */
286 next = vring_desc_next(desc_pa, i);
287 /* Make sure compiler knows to grab that: we don't want it changing! */
288 wmb();
290 if (next >= max) {
291 fprintf(stderr, "Desc next is %u", next);
292 exit(1);
295 return next;
298 int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
300 unsigned int idx;
301 int total_bufs, in_total, out_total;
303 idx = vq->last_avail_idx;
305 total_bufs = in_total = out_total = 0;
306 while (virtqueue_num_heads(vq, idx)) {
307 unsigned int max, num_bufs, indirect = 0;
308 target_phys_addr_t desc_pa;
309 int i;
311 max = vq->vring.num;
312 num_bufs = total_bufs;
313 i = virtqueue_get_head(vq, idx++);
314 desc_pa = vq->vring.desc;
316 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
317 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
318 fprintf(stderr, "Invalid size for indirect buffer table\n");
319 exit(1);
322 /* If we've got too many, that implies a descriptor loop. */
323 if (num_bufs >= max) {
324 fprintf(stderr, "Looped descriptor");
325 exit(1);
328 /* loop over the indirect descriptor table */
329 indirect = 1;
330 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
331 num_bufs = i = 0;
332 desc_pa = vring_desc_addr(desc_pa, i);
335 do {
336 /* If we've got too many, that implies a descriptor loop. */
337 if (++num_bufs > max) {
338 fprintf(stderr, "Looped descriptor");
339 exit(1);
342 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
343 if (in_bytes > 0 &&
344 (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
345 return 1;
346 } else {
347 if (out_bytes > 0 &&
348 (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
349 return 1;
351 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
353 if (!indirect)
354 total_bufs = num_bufs;
355 else
356 total_bufs++;
359 return 0;
362 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
364 unsigned int i, head, max;
365 target_phys_addr_t desc_pa = vq->vring.desc;
366 target_phys_addr_t len;
368 if (!virtqueue_num_heads(vq, vq->last_avail_idx))
369 return 0;
371 /* When we start there are none of either input nor output. */
372 elem->out_num = elem->in_num = 0;
374 max = vq->vring.num;
376 i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
378 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
379 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
380 fprintf(stderr, "Invalid size for indirect buffer table\n");
381 exit(1);
384 /* loop over the indirect descriptor table */
385 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
386 desc_pa = vring_desc_addr(desc_pa, i);
387 i = 0;
390 do {
391 struct iovec *sg;
392 int is_write = 0;
394 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
395 elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
396 sg = &elem->in_sg[elem->in_num++];
397 is_write = 1;
398 } else
399 sg = &elem->out_sg[elem->out_num++];
401 /* Grab the first descriptor, and check it's OK. */
402 sg->iov_len = vring_desc_len(desc_pa, i);
403 len = sg->iov_len;
405 sg->iov_base = cpu_physical_memory_map(vring_desc_addr(desc_pa, i),
406 &len, is_write);
408 if (sg->iov_base == NULL || len != sg->iov_len) {
409 fprintf(stderr, "virtio: trying to map MMIO memory\n");
410 exit(1);
413 /* If we've got too many, that implies a descriptor loop. */
414 if ((elem->in_num + elem->out_num) > max) {
415 fprintf(stderr, "Looped descriptor");
416 exit(1);
418 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
420 elem->index = head;
422 vq->inuse++;
424 return elem->in_num + elem->out_num;
427 /* virtio device */
428 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
430 if (vdev->binding->notify) {
431 vdev->binding->notify(vdev->binding_opaque, vector);
435 void virtio_update_irq(VirtIODevice *vdev)
437 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
440 void virtio_reset(void *opaque)
442 VirtIODevice *vdev = opaque;
443 int i;
445 if (vdev->reset)
446 vdev->reset(vdev);
448 vdev->features = 0;
449 vdev->queue_sel = 0;
450 vdev->status = 0;
451 vdev->isr = 0;
452 vdev->config_vector = VIRTIO_NO_VECTOR;
453 virtio_notify_vector(vdev, vdev->config_vector);
455 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
456 vdev->vq[i].vring.desc = 0;
457 vdev->vq[i].vring.avail = 0;
458 vdev->vq[i].vring.used = 0;
459 vdev->vq[i].last_avail_idx = 0;
460 vdev->vq[i].pa = 0;
461 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
465 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
467 uint8_t val;
469 vdev->get_config(vdev, vdev->config);
471 if (addr > (vdev->config_len - sizeof(val)))
472 return (uint32_t)-1;
474 memcpy(&val, vdev->config + addr, sizeof(val));
475 return val;
478 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
480 uint16_t val;
482 vdev->get_config(vdev, vdev->config);
484 if (addr > (vdev->config_len - sizeof(val)))
485 return (uint32_t)-1;
487 memcpy(&val, vdev->config + addr, sizeof(val));
488 return val;
491 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
493 uint32_t val;
495 vdev->get_config(vdev, vdev->config);
497 if (addr > (vdev->config_len - sizeof(val)))
498 return (uint32_t)-1;
500 memcpy(&val, vdev->config + addr, sizeof(val));
501 return val;
504 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
506 uint8_t val = data;
508 if (addr > (vdev->config_len - sizeof(val)))
509 return;
511 memcpy(vdev->config + addr, &val, sizeof(val));
513 if (vdev->set_config)
514 vdev->set_config(vdev, vdev->config);
517 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
519 uint16_t val = data;
521 if (addr > (vdev->config_len - sizeof(val)))
522 return;
524 memcpy(vdev->config + addr, &val, sizeof(val));
526 if (vdev->set_config)
527 vdev->set_config(vdev, vdev->config);
530 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
532 uint32_t val = data;
534 if (addr > (vdev->config_len - sizeof(val)))
535 return;
537 memcpy(vdev->config + addr, &val, sizeof(val));
539 if (vdev->set_config)
540 vdev->set_config(vdev, vdev->config);
543 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
545 vdev->vq[n].pa = addr;
546 virtqueue_init(&vdev->vq[n]);
549 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
551 return vdev->vq[n].pa;
554 int virtio_queue_get_num(VirtIODevice *vdev, int n)
556 return vdev->vq[n].vring.num;
559 void virtio_queue_notify(VirtIODevice *vdev, int n)
561 if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
562 vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
566 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
568 return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
569 VIRTIO_NO_VECTOR;
572 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
574 if (n < VIRTIO_PCI_QUEUE_MAX)
575 vdev->vq[n].vector = vector;
578 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
579 void (*handle_output)(VirtIODevice *, VirtQueue *))
581 int i;
583 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
584 if (vdev->vq[i].vring.num == 0)
585 break;
588 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
589 abort();
591 vdev->vq[i].vring.num = queue_size;
592 vdev->vq[i].handle_output = handle_output;
594 return &vdev->vq[i];
597 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
599 /* Always notify when queue is empty (when feature acknowledge) */
600 if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
601 (!(vdev->features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
602 (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
603 return;
605 vdev->isr |= 0x01;
606 virtio_notify_vector(vdev, vq->vector);
609 void virtio_notify_config(VirtIODevice *vdev)
611 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
612 return;
614 vdev->isr |= 0x03;
615 virtio_notify_vector(vdev, vdev->config_vector);
618 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
620 int i;
622 if (vdev->binding->save_config)
623 vdev->binding->save_config(vdev->binding_opaque, f);
625 qemu_put_8s(f, &vdev->status);
626 qemu_put_8s(f, &vdev->isr);
627 qemu_put_be16s(f, &vdev->queue_sel);
628 qemu_put_be32s(f, &vdev->features);
629 qemu_put_be32(f, vdev->config_len);
630 qemu_put_buffer(f, vdev->config, vdev->config_len);
632 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
633 if (vdev->vq[i].vring.num == 0)
634 break;
637 qemu_put_be32(f, i);
639 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
640 if (vdev->vq[i].vring.num == 0)
641 break;
643 qemu_put_be32(f, vdev->vq[i].vring.num);
644 qemu_put_be64(f, vdev->vq[i].pa);
645 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
646 if (vdev->binding->save_queue)
647 vdev->binding->save_queue(vdev->binding_opaque, i, f);
651 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
653 int num, i, ret;
654 uint32_t features;
655 uint32_t supported_features = vdev->get_features(vdev) |
656 vdev->binding->get_features(vdev->binding_opaque);
658 if (vdev->binding->load_config) {
659 ret = vdev->binding->load_config(vdev->binding_opaque, f);
660 if (ret)
661 return ret;
664 qemu_get_8s(f, &vdev->status);
665 qemu_get_8s(f, &vdev->isr);
666 qemu_get_be16s(f, &vdev->queue_sel);
667 qemu_get_be32s(f, &features);
668 if (features & ~supported_features) {
669 fprintf(stderr, "Features 0x%x unsupported. Allowed features: 0x%x\n",
670 features, supported_features);
671 return -1;
673 vdev->features = features;
674 vdev->config_len = qemu_get_be32(f);
675 qemu_get_buffer(f, vdev->config, vdev->config_len);
677 num = qemu_get_be32(f);
679 for (i = 0; i < num; i++) {
680 vdev->vq[i].vring.num = qemu_get_be32(f);
681 vdev->vq[i].pa = qemu_get_be64(f);
682 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
684 if (vdev->vq[i].pa) {
685 virtqueue_init(&vdev->vq[i]);
687 if (vdev->binding->load_queue) {
688 ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
689 if (ret)
690 return ret;
694 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
695 return 0;
698 void virtio_cleanup(VirtIODevice *vdev)
700 if (vdev->config)
701 qemu_free(vdev->config);
702 qemu_free(vdev->vq);
705 VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
706 size_t config_size, size_t struct_size)
708 VirtIODevice *vdev;
709 int i;
711 vdev = qemu_mallocz(struct_size);
713 vdev->device_id = device_id;
714 vdev->status = 0;
715 vdev->isr = 0;
716 vdev->queue_sel = 0;
717 vdev->config_vector = VIRTIO_NO_VECTOR;
718 vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
719 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++)
720 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
722 vdev->name = name;
723 vdev->config_len = config_size;
724 if (vdev->config_len)
725 vdev->config = qemu_mallocz(config_size);
726 else
727 vdev->config = NULL;
729 return vdev;
732 void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
733 void *opaque)
735 vdev->binding = binding;
736 vdev->binding_opaque = opaque;