Fix hw/acpi.c build w/ DEBUG enabled
[qemu-kvm/amd-iommu.git] / hw / virtio.c
blobcb529ac4fc7abdf45a816ccd6976c9cc3d795483
1 /*
2 * Virtio Support
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <inttypes.h>
15 #include <err.h>
17 #include "virtio.h"
18 #include "sysemu.h"
20 /* from Linux's linux/virtio_pci.h */
22 /* A 32-bit r/o bitmask of the features supported by the host */
23 #define VIRTIO_PCI_HOST_FEATURES 0
25 /* A 32-bit r/w bitmask of features activated by the guest */
26 #define VIRTIO_PCI_GUEST_FEATURES 4
28 /* A 32-bit r/w PFN for the currently selected queue */
29 #define VIRTIO_PCI_QUEUE_PFN 8
31 /* A 16-bit r/o queue size for the currently selected queue */
32 #define VIRTIO_PCI_QUEUE_NUM 12
34 /* A 16-bit r/w queue selector */
35 #define VIRTIO_PCI_QUEUE_SEL 14
37 /* A 16-bit r/w queue notifier */
38 #define VIRTIO_PCI_QUEUE_NOTIFY 16
40 /* An 8-bit device status register. */
41 #define VIRTIO_PCI_STATUS 18
43 /* An 8-bit r/o interrupt status register. Reading the value will return the
44 * current contents of the ISR and will also clear it. This is effectively
45 * a read-and-acknowledge. */
46 #define VIRTIO_PCI_ISR 19
48 #define VIRTIO_PCI_CONFIG 20
50 /* Virtio ABI version, if we increment this, we break the guest driver. */
51 #define VIRTIO_PCI_ABI_VERSION 0
53 /* QEMU doesn't strictly need write barriers since everything runs in
54 * lock-step. We'll leave the calls to wmb() in though to make it obvious for
55 * KVM or if kqemu gets SMP support.
57 #define wmb() do { } while (0)
59 /* virt queue functions */
61 static void *virtio_map_gpa(target_phys_addr_t addr, size_t size)
63 ram_addr_t off;
64 target_phys_addr_t addr1;
66 off = cpu_get_physical_page_desc(addr);
67 if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
68 fprintf(stderr, "virtio DMA to IO ram\n");
69 exit(1);
72 off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK);
74 for (addr1 = addr + TARGET_PAGE_SIZE;
75 addr1 < TARGET_PAGE_ALIGN(addr + size);
76 addr1 += TARGET_PAGE_SIZE) {
77 ram_addr_t off1;
79 off1 = cpu_get_physical_page_desc(addr1);
80 if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
81 fprintf(stderr, "virtio DMA to IO ram\n");
82 exit(1);
85 off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK);
87 if (off1 != (off + (addr1 - addr))) {
88 fprintf(stderr, "discontigous virtio memory\n");
89 exit(1);
93 return phys_ram_base + off;
96 static size_t virtqueue_size(int num)
98 return TARGET_PAGE_ALIGN((sizeof(VRingDesc) * num) +
99 (sizeof(VRingAvail) + sizeof(uint16_t) * num)) +
100 (sizeof(VRingUsed) + sizeof(VRingUsedElem) * num);
103 static void virtqueue_init(VirtQueue *vq, void *p)
105 vq->vring.desc = p;
106 vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc);
107 vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]);
110 static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
112 unsigned int next;
114 /* If this descriptor says it doesn't chain, we're done. */
115 if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
116 return vq->vring.num;
118 /* Check they're not leading us off end of descriptors. */
119 next = vq->vring.desc[i].next;
120 /* Make sure compiler knows to grab that: we don't want it changing! */
121 wmb();
123 if (next >= vq->vring.num)
124 errx(1, "Desc next is %u", next);
126 return next;
129 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
130 unsigned int len)
132 VRingUsedElem *used;
134 /* Get a pointer to the next entry in the used ring. */
135 used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
136 used->id = elem->index;
137 used->len = len;
138 /* Make sure buffer is written before we update index. */
139 wmb();
140 vq->vring.used->idx++;
143 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
145 unsigned int i, head;
146 unsigned int position;
148 /* Check it isn't doing very strange things with descriptor numbers. */
149 if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
150 errx(1, "Guest moved used index from %u to %u",
151 vq->last_avail_idx, vq->vring.avail->idx);
153 /* If there's nothing new since last we looked, return invalid. */
154 if (vq->vring.avail->idx == vq->last_avail_idx)
155 return 0;
157 /* Grab the next descriptor number they're advertising, and increment
158 * the index we've seen. */
159 head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
161 /* If their number is silly, that's a fatal mistake. */
162 if (head >= vq->vring.num)
163 errx(1, "Guest says index %u is available", head);
165 /* When we start there are none of either input nor output. */
166 position = elem->out_num = elem->in_num = 0;
168 i = head;
169 do {
170 struct iovec *sg;
172 if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
173 sg = &elem->in_sg[elem->in_num++];
174 else
175 sg = &elem->out_sg[elem->out_num++];
177 /* Grab the first descriptor, and check it's OK. */
178 sg->iov_len = vq->vring.desc[i].len;
179 sg->iov_base = virtio_map_gpa(vq->vring.desc[i].addr, sg->iov_len);
180 if (sg->iov_base == NULL)
181 errx(1, "Invalid mapping\n");
183 /* If we've got too many, that implies a descriptor loop. */
184 if ((elem->in_num + elem->out_num) > vq->vring.num)
185 errx(1, "Looped descriptor");
186 } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
188 elem->index = head;
190 return elem->in_num + elem->out_num;
193 /* virtio device */
195 static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
197 return (VirtIODevice *)pci_dev;
200 static void virtio_update_irq(VirtIODevice *vdev)
202 qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1);
205 void virtio_reset(void *opaque)
207 VirtIODevice *vdev = opaque;
208 int i;
210 vdev->features = 0;
211 vdev->queue_sel = 0;
212 vdev->status = 0;
213 vdev->isr = 0;
215 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
216 vdev->vq[i].vring.desc = NULL;
217 vdev->vq[i].vring.avail = NULL;
218 vdev->vq[i].vring.used = NULL;
219 vdev->vq[i].last_avail_idx = 0;
220 vdev->vq[i].pfn = 0;
224 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
226 VirtIODevice *vdev = to_virtio_device(opaque);
227 ram_addr_t pa;
229 addr -= vdev->addr;
231 switch (addr) {
232 case VIRTIO_PCI_GUEST_FEATURES:
233 if (vdev->set_features)
234 vdev->set_features(vdev, val);
235 vdev->features = val;
236 break;
237 case VIRTIO_PCI_QUEUE_PFN:
238 pa = (ram_addr_t)val << TARGET_PAGE_BITS;
239 vdev->vq[vdev->queue_sel].pfn = val;
240 if (pa == 0) {
241 virtio_reset(vdev);
242 } else {
243 size_t size = virtqueue_size(vdev->vq[vdev->queue_sel].vring.num);
244 virtqueue_init(&vdev->vq[vdev->queue_sel],
245 virtio_map_gpa(pa, size));
247 break;
248 case VIRTIO_PCI_QUEUE_SEL:
249 if (val < VIRTIO_PCI_QUEUE_MAX)
250 vdev->queue_sel = val;
251 break;
252 case VIRTIO_PCI_QUEUE_NOTIFY:
253 if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc)
254 vdev->vq[val].handle_output(vdev, &vdev->vq[val]);
255 break;
256 case VIRTIO_PCI_STATUS:
257 vdev->status = val & 0xFF;
258 if (vdev->status == 0)
259 virtio_reset(vdev);
260 break;
264 static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
266 VirtIODevice *vdev = to_virtio_device(opaque);
267 uint32_t ret = 0xFFFFFFFF;
269 addr -= vdev->addr;
271 switch (addr) {
272 case VIRTIO_PCI_HOST_FEATURES:
273 ret = vdev->get_features(vdev);
274 break;
275 case VIRTIO_PCI_GUEST_FEATURES:
276 ret = vdev->features;
277 break;
278 case VIRTIO_PCI_QUEUE_PFN:
279 ret = vdev->vq[vdev->queue_sel].pfn;
280 break;
281 case VIRTIO_PCI_QUEUE_NUM:
282 ret = vdev->vq[vdev->queue_sel].vring.num;
283 break;
284 case VIRTIO_PCI_QUEUE_SEL:
285 ret = vdev->queue_sel;
286 break;
287 case VIRTIO_PCI_STATUS:
288 ret = vdev->status;
289 break;
290 case VIRTIO_PCI_ISR:
291 /* reading from the ISR also clears it. */
292 ret = vdev->isr;
293 vdev->isr = 0;
294 virtio_update_irq(vdev);
295 break;
296 default:
297 break;
300 return ret;
303 static uint32_t virtio_config_readb(void *opaque, uint32_t addr)
305 VirtIODevice *vdev = opaque;
306 uint8_t val;
308 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
309 if (addr > (vdev->config_len - sizeof(val)))
310 return (uint32_t)-1;
312 memcpy(&val, vdev->config + addr, sizeof(val));
313 return val;
316 static uint32_t virtio_config_readw(void *opaque, uint32_t addr)
318 VirtIODevice *vdev = opaque;
319 uint16_t val;
321 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
322 if (addr > (vdev->config_len - sizeof(val)))
323 return (uint32_t)-1;
325 memcpy(&val, vdev->config + addr, sizeof(val));
326 return val;
329 static uint32_t virtio_config_readl(void *opaque, uint32_t addr)
331 VirtIODevice *vdev = opaque;
332 uint32_t val;
334 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
335 if (addr > (vdev->config_len - sizeof(val)))
336 return (uint32_t)-1;
338 memcpy(&val, vdev->config + addr, sizeof(val));
339 return val;
342 static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
344 VirtIODevice *vdev = opaque;
345 uint8_t val = data;
347 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
348 if (addr > (vdev->config_len - sizeof(val)))
349 return;
351 memcpy(vdev->config + addr, &val, sizeof(val));
354 static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
356 VirtIODevice *vdev = opaque;
357 uint16_t val = data;
359 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
360 if (addr > (vdev->config_len - sizeof(val)))
361 return;
363 memcpy(vdev->config + addr, &val, sizeof(val));
366 static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
368 VirtIODevice *vdev = opaque;
369 uint32_t val = data;
371 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
372 if (addr > (vdev->config_len - sizeof(val)))
373 return;
375 memcpy(vdev->config + addr, &val, sizeof(val));
378 static void virtio_map(PCIDevice *pci_dev, int region_num,
379 uint32_t addr, uint32_t size, int type)
381 VirtIODevice *vdev = to_virtio_device(pci_dev);
382 int i;
384 vdev->addr = addr;
385 for (i = 0; i < 3; i++) {
386 register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
387 register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
390 if (vdev->config_len) {
391 register_ioport_write(addr + 20, vdev->config_len, 1,
392 virtio_config_writeb, vdev);
393 register_ioport_write(addr + 20, vdev->config_len, 2,
394 virtio_config_writew, vdev);
395 register_ioport_write(addr + 20, vdev->config_len, 4,
396 virtio_config_writel, vdev);
397 register_ioport_read(addr + 20, vdev->config_len, 1,
398 virtio_config_readb, vdev);
399 register_ioport_read(addr + 20, vdev->config_len, 2,
400 virtio_config_readw, vdev);
401 register_ioport_read(addr + 20, vdev->config_len, 4,
402 virtio_config_readl, vdev);
404 vdev->update_config(vdev, vdev->config);
408 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
409 void (*handle_output)(VirtIODevice *, VirtQueue *))
411 int i;
413 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
414 if (vdev->vq[i].vring.num == 0)
415 break;
418 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
419 abort();
421 vdev->vq[i].vring.num = queue_size;
422 vdev->vq[i].handle_output = handle_output;
424 return &vdev->vq[i];
427 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
429 /* Always notify when queue is empty */
430 if (vq->vring.avail->idx != vq->last_avail_idx &&
431 (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
432 return;
434 vdev->isr = 1;
435 virtio_update_irq(vdev);
438 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
440 int i;
442 pci_device_save(&vdev->pci_dev, f);
444 qemu_put_be32s(f, &vdev->addr);
445 qemu_put_8s(f, &vdev->status);
446 qemu_put_8s(f, &vdev->isr);
447 qemu_put_be16s(f, &vdev->queue_sel);
448 qemu_put_be32s(f, &vdev->features);
449 qemu_put_be32(f, vdev->config_len);
450 qemu_put_buffer(f, vdev->config, vdev->config_len);
452 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
453 if (vdev->vq[i].vring.num == 0)
454 break;
457 qemu_put_be32(f, i);
459 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
460 if (vdev->vq[i].vring.num == 0)
461 break;
463 qemu_put_be32(f, vdev->vq[i].vring.num);
464 qemu_put_be32s(f, &vdev->vq[i].pfn);
465 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
469 void virtio_load(VirtIODevice *vdev, QEMUFile *f)
471 int num, i;
473 pci_device_load(&vdev->pci_dev, f);
475 qemu_get_be32s(f, &vdev->addr);
476 qemu_get_8s(f, &vdev->status);
477 qemu_get_8s(f, &vdev->isr);
478 qemu_get_be16s(f, &vdev->queue_sel);
479 qemu_get_be32s(f, &vdev->features);
480 vdev->config_len = qemu_get_be32(f);
481 qemu_get_buffer(f, vdev->config, vdev->config_len);
483 num = qemu_get_be32(f);
485 for (i = 0; i < num; i++) {
486 vdev->vq[i].vring.num = qemu_get_be32(f);
487 qemu_get_be32s(f, &vdev->vq[i].pfn);
488 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
490 if (vdev->vq[i].pfn) {
491 size_t size;
492 target_phys_addr_t pa;
494 pa = (ram_addr_t)vdev->vq[i].pfn << TARGET_PAGE_BITS;
495 size = virtqueue_size(vdev->vq[i].vring.num);
496 virtqueue_init(&vdev->vq[i], virtio_map_gpa(pa, size));
500 virtio_update_irq(vdev);
503 VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
504 uint16_t vendor, uint16_t device,
505 uint16_t subvendor, uint16_t subdevice,
506 uint8_t class_code, uint8_t subclass_code,
507 uint8_t pif, size_t config_size,
508 size_t struct_size)
510 VirtIODevice *vdev;
511 PCIDevice *pci_dev;
512 uint8_t *config;
513 uint32_t size;
515 pci_dev = pci_register_device(bus, name, struct_size,
516 -1, NULL, NULL);
517 if (!pci_dev)
518 return NULL;
520 vdev = to_virtio_device(pci_dev);
522 vdev->status = 0;
523 vdev->isr = 0;
524 vdev->queue_sel = 0;
525 memset(vdev->vq, 0, sizeof(vdev->vq));
527 config = pci_dev->config;
528 config[0x00] = vendor & 0xFF;
529 config[0x01] = (vendor >> 8) & 0xFF;
530 config[0x02] = device & 0xFF;
531 config[0x03] = (device >> 8) & 0xFF;
533 config[0x08] = VIRTIO_PCI_ABI_VERSION;
535 config[0x09] = pif;
536 config[0x0a] = subclass_code;
537 config[0x0b] = class_code;
538 config[0x0e] = 0x00;
540 config[0x2c] = subvendor & 0xFF;
541 config[0x2d] = (subvendor >> 8) & 0xFF;
542 config[0x2e] = subdevice & 0xFF;
543 config[0x2f] = (subdevice >> 8) & 0xFF;
545 config[0x3d] = 1;
547 vdev->name = name;
548 vdev->config_len = config_size;
549 if (vdev->config_len)
550 vdev->config = qemu_mallocz(config_size);
551 else
552 vdev->config = NULL;
554 size = 20 + config_size;
555 if (size & (size-1))
556 size = 1 << fls(size);
558 pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
559 virtio_map);
560 qemu_register_reset(virtio_reset, vdev);
562 return vdev;