Pci hotplug GPE support
[qemu-kvm/fedora.git] / hw / virtio.c
blob634f869c83fd899621b69bb7908b0b1a965c31e9
1 /*
2 * Virtio Support
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <inttypes.h>
15 #include <err.h>
17 #include "virtio.h"
18 #include "sysemu.h"
20 /* from Linux's linux/virtio_pci.h */
22 /* A 32-bit r/o bitmask of the features supported by the host */
23 #define VIRTIO_PCI_HOST_FEATURES 0
25 /* A 32-bit r/w bitmask of features activated by the guest */
26 #define VIRTIO_PCI_GUEST_FEATURES 4
28 /* A 32-bit r/w PFN for the currently selected queue */
29 #define VIRTIO_PCI_QUEUE_PFN 8
31 /* A 16-bit r/o queue size for the currently selected queue */
32 #define VIRTIO_PCI_QUEUE_NUM 12
34 /* A 16-bit r/w queue selector */
35 #define VIRTIO_PCI_QUEUE_SEL 14
37 /* A 16-bit r/w queue notifier */
38 #define VIRTIO_PCI_QUEUE_NOTIFY 16
40 /* An 8-bit device status register. */
41 #define VIRTIO_PCI_STATUS 18
43 /* An 8-bit r/o interrupt status register. Reading the value will return the
44 * current contents of the ISR and will also clear it. This is effectively
45 * a read-and-acknowledge. */
46 #define VIRTIO_PCI_ISR 19
48 #define VIRTIO_PCI_CONFIG 20
50 /* Virtio ABI version, if we increment this, we break the guest driver. */
51 #define VIRTIO_PCI_ABI_VERSION 0
53 /* QEMU doesn't strictly need write barriers since everything runs in
54 * lock-step. We'll leave the calls to wmb() in though to make it obvious for
55 * KVM or if kqemu gets SMP support.
57 #define wmb() do { } while (0)
59 /* virt queue functions */
61 static void virtqueue_init(VirtQueue *vq, void *p)
63 vq->vring.desc = p;
64 vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc);
65 vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]);
68 static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
70 unsigned int next;
72 /* If this descriptor says it doesn't chain, we're done. */
73 if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
74 return vq->vring.num;
76 /* Check they're not leading us off end of descriptors. */
77 next = vq->vring.desc[i].next;
78 /* Make sure compiler knows to grab that: we don't want it changing! */
79 wmb();
81 if (next >= vq->vring.num)
82 errx(1, "Desc next is %u", next);
84 return next;
87 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
88 unsigned int len)
90 VRingUsedElem *used;
92 /* Get a pointer to the next entry in the used ring. */
93 used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
94 used->id = elem->index;
95 used->len = len;
96 /* Make sure buffer is written before we update index. */
97 wmb();
98 vq->vring.used->idx++;
101 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
103 unsigned int i, head;
104 unsigned int position;
106 /* Check it isn't doing very strange things with descriptor numbers. */
107 if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
108 errx(1, "Guest moved used index from %u to %u",
109 vq->last_avail_idx, vq->vring.avail->idx);
111 /* If there's nothing new since last we looked, return invalid. */
112 if (vq->vring.avail->idx == vq->last_avail_idx)
113 return 0;
115 /* Grab the next descriptor number they're advertising, and increment
116 * the index we've seen. */
117 head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
119 /* If their number is silly, that's a fatal mistake. */
120 if (head >= vq->vring.num)
121 errx(1, "Guest says index %u is available", head);
123 /* When we start there are none of either input nor output. */
124 position = elem->out_num = elem->in_num = 0;
126 i = head;
127 do {
128 struct iovec *sg;
130 if ((vq->vring.desc[i].addr + vq->vring.desc[i].len) > ram_size)
131 errx(1, "Guest sent invalid pointer");
133 if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
134 sg = &elem->in_sg[elem->in_num++];
135 else
136 sg = &elem->out_sg[elem->out_num++];
138 /* Grab the first descriptor, and check it's OK. */
139 sg->iov_len = vq->vring.desc[i].len;
140 sg->iov_base = phys_ram_base + vq->vring.desc[i].addr;
142 /* If we've got too many, that implies a descriptor loop. */
143 if ((elem->in_num + elem->out_num) > vq->vring.num)
144 errx(1, "Looped descriptor");
145 } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
147 elem->index = head;
149 return elem->in_num + elem->out_num;
152 /* virtio device */
154 static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
156 return (VirtIODevice *)pci_dev;
159 static void virtio_update_irq(VirtIODevice *vdev)
161 qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1);
164 void virtio_reset(void *opaque)
166 VirtIODevice *vdev = opaque;
167 int i;
169 vdev->features = 0;
170 vdev->queue_sel = 0;
171 vdev->status = 0;
172 vdev->isr = 0;
174 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
175 vdev->vq[i].vring.desc = NULL;
176 vdev->vq[i].vring.avail = NULL;
177 vdev->vq[i].vring.used = NULL;
178 vdev->vq[i].last_avail_idx = 0;
179 vdev->vq[i].pfn = 0;
183 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
185 VirtIODevice *vdev = to_virtio_device(opaque);
186 ram_addr_t pa;
188 addr -= vdev->addr;
190 switch (addr) {
191 case VIRTIO_PCI_GUEST_FEATURES:
192 if (vdev->set_features)
193 vdev->set_features(vdev, val);
194 vdev->features = val;
195 break;
196 case VIRTIO_PCI_QUEUE_PFN:
197 pa = (ram_addr_t)val << TARGET_PAGE_BITS;
198 vdev->vq[vdev->queue_sel].pfn = val;
199 if (pa == 0) {
200 virtio_reset(vdev);
201 } else if (pa < (ram_size - TARGET_PAGE_SIZE)) {
202 virtqueue_init(&vdev->vq[vdev->queue_sel], phys_ram_base + pa);
203 /* FIXME if pa == 0, deal with device tear down */
205 break;
206 case VIRTIO_PCI_QUEUE_SEL:
207 if (val < VIRTIO_PCI_QUEUE_MAX)
208 vdev->queue_sel = val;
209 break;
210 case VIRTIO_PCI_QUEUE_NOTIFY:
211 if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc)
212 vdev->vq[val].handle_output(vdev, &vdev->vq[val]);
213 break;
214 case VIRTIO_PCI_STATUS:
215 vdev->status = val & 0xFF;
216 if (vdev->status == 0)
217 virtio_reset(vdev);
218 break;
222 static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
224 VirtIODevice *vdev = to_virtio_device(opaque);
225 uint32_t ret = 0xFFFFFFFF;
227 addr -= vdev->addr;
229 switch (addr) {
230 case VIRTIO_PCI_HOST_FEATURES:
231 ret = vdev->get_features(vdev);
232 break;
233 case VIRTIO_PCI_GUEST_FEATURES:
234 ret = vdev->features;
235 break;
236 case VIRTIO_PCI_QUEUE_PFN:
237 ret = vdev->vq[vdev->queue_sel].pfn;
238 break;
239 case VIRTIO_PCI_QUEUE_NUM:
240 ret = vdev->vq[vdev->queue_sel].vring.num;
241 break;
242 case VIRTIO_PCI_QUEUE_SEL:
243 ret = vdev->queue_sel;
244 break;
245 case VIRTIO_PCI_STATUS:
246 ret = vdev->status;
247 break;
248 case VIRTIO_PCI_ISR:
249 /* reading from the ISR also clears it. */
250 ret = vdev->isr;
251 vdev->isr = 0;
252 virtio_update_irq(vdev);
253 break;
254 default:
255 break;
258 return ret;
261 static uint32_t virtio_config_readb(void *opaque, uint32_t addr)
263 VirtIODevice *vdev = opaque;
264 uint8_t val;
266 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
267 if (addr > (vdev->config_len - sizeof(val)))
268 return (uint32_t)-1;
270 memcpy(&val, vdev->config + addr, sizeof(val));
271 return val;
274 static uint32_t virtio_config_readw(void *opaque, uint32_t addr)
276 VirtIODevice *vdev = opaque;
277 uint16_t val;
279 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
280 if (addr > (vdev->config_len - sizeof(val)))
281 return (uint32_t)-1;
283 memcpy(&val, vdev->config + addr, sizeof(val));
284 return val;
287 static uint32_t virtio_config_readl(void *opaque, uint32_t addr)
289 VirtIODevice *vdev = opaque;
290 uint32_t val;
292 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
293 if (addr > (vdev->config_len - sizeof(val)))
294 return (uint32_t)-1;
296 memcpy(&val, vdev->config + addr, sizeof(val));
297 return val;
300 static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
302 VirtIODevice *vdev = opaque;
303 uint8_t val = data;
305 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
306 if (addr > (vdev->config_len - sizeof(val)))
307 return;
309 memcpy(vdev->config + addr, &val, sizeof(val));
312 static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
314 VirtIODevice *vdev = opaque;
315 uint16_t val = data;
317 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
318 if (addr > (vdev->config_len - sizeof(val)))
319 return;
321 memcpy(vdev->config + addr, &val, sizeof(val));
324 static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
326 VirtIODevice *vdev = opaque;
327 uint32_t val = data;
329 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
330 if (addr > (vdev->config_len - sizeof(val)))
331 return;
333 memcpy(vdev->config + addr, &val, sizeof(val));
336 static void virtio_map(PCIDevice *pci_dev, int region_num,
337 uint32_t addr, uint32_t size, int type)
339 VirtIODevice *vdev = to_virtio_device(pci_dev);
340 int i;
342 vdev->addr = addr;
343 for (i = 0; i < 3; i++) {
344 register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
345 register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
348 if (vdev->config_len) {
349 register_ioport_write(addr + 20, vdev->config_len, 1,
350 virtio_config_writeb, vdev);
351 register_ioport_write(addr + 20, vdev->config_len, 2,
352 virtio_config_writew, vdev);
353 register_ioport_write(addr + 20, vdev->config_len, 4,
354 virtio_config_writel, vdev);
355 register_ioport_read(addr + 20, vdev->config_len, 1,
356 virtio_config_readb, vdev);
357 register_ioport_read(addr + 20, vdev->config_len, 2,
358 virtio_config_readw, vdev);
359 register_ioport_read(addr + 20, vdev->config_len, 4,
360 virtio_config_readl, vdev);
362 vdev->update_config(vdev, vdev->config);
366 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
367 void (*handle_output)(VirtIODevice *, VirtQueue *))
369 int i;
371 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
372 if (vdev->vq[i].vring.num == 0)
373 break;
376 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
377 abort();
379 vdev->vq[i].vring.num = queue_size;
380 vdev->vq[i].handle_output = handle_output;
381 vdev->vq[i].index = i;
383 return &vdev->vq[i];
386 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
388 /* Always notify when queue is empty */
389 if (vq->vring.avail->idx != vq->last_avail_idx &&
390 (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
391 return;
393 vdev->isr = 1;
394 virtio_update_irq(vdev);
397 VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
398 uint16_t vendor, uint16_t device,
399 uint16_t subvendor, uint16_t subdevice,
400 uint8_t class_code, uint8_t subclass_code,
401 uint8_t pif, size_t config_size,
402 size_t struct_size)
404 VirtIODevice *vdev;
405 PCIDevice *pci_dev;
406 uint8_t *config;
408 pci_dev = pci_register_device(bus, name, struct_size,
409 -1, NULL, NULL);
410 vdev = to_virtio_device(pci_dev);
412 vdev->status = 0;
413 vdev->isr = 0;
414 vdev->queue_sel = 0;
415 memset(vdev->vq, 0, sizeof(vdev->vq));
417 config = pci_dev->config;
418 config[0x00] = vendor & 0xFF;
419 config[0x01] = (vendor >> 8) & 0xFF;
420 config[0x02] = device & 0xFF;
421 config[0x03] = (device >> 8) & 0xFF;
423 config[0x08] = VIRTIO_PCI_ABI_VERSION;
425 config[0x09] = pif;
426 config[0x0a] = subclass_code;
427 config[0x0b] = class_code;
428 config[0x0e] = 0x00;
430 config[0x2c] = subvendor & 0xFF;
431 config[0x2d] = (subvendor >> 8) & 0xFF;
432 config[0x2e] = subdevice & 0xFF;
433 config[0x2f] = (subdevice >> 8) & 0xFF;
435 config[0x3d] = 1;
437 vdev->name = name;
438 vdev->config_len = config_size;
439 if (vdev->config_len)
440 vdev->config = qemu_mallocz(config_size);
441 else
442 vdev->config = NULL;
444 pci_register_io_region(pci_dev, 0, 20 + config_size, PCI_ADDRESS_SPACE_IO,
445 virtio_map);
446 qemu_register_reset(virtio_reset, vdev);
448 return vdev;