Remove hack that forced pci interrupts to be enabled
[qemu-kvm/fedora.git] / hw / virtio.c
blobb78c2c54d8e5662a0bcb91b4140a4ec6b78028a7
1 /*
2 * Virtio Support
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <inttypes.h>
15 #include <err.h>
17 #include "virtio.h"
18 #include "sysemu.h"
20 /* from Linux's linux/virtio_pci.h */
22 /* A 32-bit r/o bitmask of the features supported by the host */
23 #define VIRTIO_PCI_HOST_FEATURES 0
25 /* A 32-bit r/w bitmask of features activated by the guest */
26 #define VIRTIO_PCI_GUEST_FEATURES 4
28 /* A 32-bit r/w PFN for the currently selected queue */
29 #define VIRTIO_PCI_QUEUE_PFN 8
31 /* A 16-bit r/o queue size for the currently selected queue */
32 #define VIRTIO_PCI_QUEUE_NUM 12
34 /* A 16-bit r/w queue selector */
35 #define VIRTIO_PCI_QUEUE_SEL 14
37 /* A 16-bit r/w queue notifier */
38 #define VIRTIO_PCI_QUEUE_NOTIFY 16
40 /* An 8-bit device status register. */
41 #define VIRTIO_PCI_STATUS 18
43 /* An 8-bit r/o interrupt status register. Reading the value will return the
44 * current contents of the ISR and will also clear it. This is effectively
45 * a read-and-acknowledge. */
46 #define VIRTIO_PCI_ISR 19
48 #define VIRTIO_PCI_CONFIG 20
50 /* QEMU doesn't strictly need write barriers since everything runs in
51 * lock-step. We'll leave the calls to wmb() in though to make it obvious for
52 * KVM or if kqemu gets SMP support.
54 #define wmb() do { } while (0)
56 /* virt queue functions */
58 static void virtqueue_init(VirtQueue *vq, void *p)
60 vq->vring.desc = p;
61 vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc);
62 vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]);
65 static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
67 unsigned int next;
69 /* If this descriptor says it doesn't chain, we're done. */
70 if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT))
71 return vq->vring.num;
73 /* Check they're not leading us off end of descriptors. */
74 next = vq->vring.desc[i].next;
75 /* Make sure compiler knows to grab that: we don't want it changing! */
76 wmb();
78 if (next >= vq->vring.num)
79 errx(1, "Desc next is %u", next);
81 return next;
84 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
85 unsigned int len)
87 VRingUsedElem *used;
89 /* Get a pointer to the next entry in the used ring. */
90 used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
91 used->id = elem->index;
92 used->len = len;
93 /* Make sure buffer is written before we update index. */
94 wmb();
95 vq->vring.used->idx++;
98 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
100 unsigned int i, head;
101 unsigned int position;
103 /* Check it isn't doing very strange things with descriptor numbers. */
104 if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
105 errx(1, "Guest moved used index from %u to %u",
106 vq->last_avail_idx, vq->vring.avail->idx);
108 /* If there's nothing new since last we looked, return invalid. */
109 if (vq->vring.avail->idx == vq->last_avail_idx)
110 return 0;
112 /* Grab the next descriptor number they're advertising, and increment
113 * the index we've seen. */
114 head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
116 /* If their number is silly, that's a fatal mistake. */
117 if (head >= vq->vring.num)
118 errx(1, "Guest says index %u is available", head);
120 /* When we start there are none of either input nor output. */
121 position = elem->out_num = elem->in_num = 0;
123 i = head;
124 do {
125 struct iovec *sg;
127 if ((vq->vring.desc[i].addr + vq->vring.desc[i].len) > ram_size)
128 errx(1, "Guest sent invalid pointer");
130 if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE)
131 sg = &elem->in_sg[elem->in_num++];
132 else
133 sg = &elem->out_sg[elem->out_num++];
135 /* Grab the first descriptor, and check it's OK. */
136 sg->iov_len = vq->vring.desc[i].len;
137 sg->iov_base = phys_ram_base + vq->vring.desc[i].addr;
139 /* If we've got too many, that implies a descriptor loop. */
140 if ((elem->in_num + elem->out_num) > vq->vring.num)
141 errx(1, "Looped descriptor");
142 } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
144 elem->index = head;
146 return elem->in_num + elem->out_num;
149 /* virtio device */
151 static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
153 return (VirtIODevice *)pci_dev;
156 static void virtio_update_irq(VirtIODevice *vdev)
158 qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1);
161 void virtio_reset(void *opaque)
163 VirtIODevice *vdev = opaque;
164 int i;
166 vdev->features = 0;
167 vdev->queue_sel = 0;
168 vdev->status = 0;
169 vdev->isr = 0;
171 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
172 vdev->vq[i].vring.desc = NULL;
173 vdev->vq[i].vring.avail = NULL;
174 vdev->vq[i].vring.used = NULL;
175 vdev->vq[i].last_avail_idx = 0;
176 vdev->vq[i].pfn = 0;
180 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
182 VirtIODevice *vdev = to_virtio_device(opaque);
183 ram_addr_t pa;
185 addr -= vdev->addr;
187 switch (addr) {
188 case VIRTIO_PCI_GUEST_FEATURES:
189 if (vdev->set_features)
190 vdev->set_features(vdev, val);
191 vdev->features = val;
192 break;
193 case VIRTIO_PCI_QUEUE_PFN:
194 pa = (ram_addr_t)val << TARGET_PAGE_BITS;
195 vdev->vq[vdev->queue_sel].pfn = val;
196 if (pa == 0) {
197 virtio_reset(vdev);
198 } else if (pa < (ram_size - TARGET_PAGE_SIZE)) {
199 virtqueue_init(&vdev->vq[vdev->queue_sel], phys_ram_base + pa);
200 /* FIXME if pa == 0, deal with device tear down */
202 break;
203 case VIRTIO_PCI_QUEUE_SEL:
204 if (val < VIRTIO_PCI_QUEUE_MAX)
205 vdev->queue_sel = val;
206 break;
207 case VIRTIO_PCI_QUEUE_NOTIFY:
208 if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc)
209 vdev->vq[val].handle_output(vdev, &vdev->vq[val]);
210 break;
211 case VIRTIO_PCI_STATUS:
212 vdev->status = val & 0xFF;
213 break;
217 static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
219 VirtIODevice *vdev = to_virtio_device(opaque);
220 uint32_t ret = 0xFFFFFFFF;
222 addr -= vdev->addr;
224 switch (addr) {
225 case VIRTIO_PCI_HOST_FEATURES:
226 ret = vdev->get_features(vdev);
227 break;
228 case VIRTIO_PCI_GUEST_FEATURES:
229 ret = vdev->features;
230 break;
231 case VIRTIO_PCI_QUEUE_PFN:
232 ret = vdev->vq[vdev->queue_sel].pfn;
233 break;
234 case VIRTIO_PCI_QUEUE_NUM:
235 ret = vdev->vq[vdev->queue_sel].vring.num;
236 break;
237 case VIRTIO_PCI_QUEUE_SEL:
238 ret = vdev->queue_sel;
239 break;
240 case VIRTIO_PCI_STATUS:
241 ret = vdev->status;
242 break;
243 case VIRTIO_PCI_ISR:
244 /* reading from the ISR also clears it. */
245 ret = vdev->isr;
246 vdev->isr = 0;
247 virtio_update_irq(vdev);
248 break;
249 default:
250 break;
253 return ret;
256 static uint32_t virtio_config_readb(void *opaque, uint32_t addr)
258 VirtIODevice *vdev = opaque;
259 uint8_t val;
261 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
262 if (addr > (vdev->config_len - sizeof(val)))
263 return (uint32_t)-1;
265 memcpy(&val, vdev->config + addr, sizeof(val));
266 return val;
269 static uint32_t virtio_config_readw(void *opaque, uint32_t addr)
271 VirtIODevice *vdev = opaque;
272 uint16_t val;
274 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
275 if (addr > (vdev->config_len - sizeof(val)))
276 return (uint32_t)-1;
278 memcpy(&val, vdev->config + addr, sizeof(val));
279 return val;
282 static uint32_t virtio_config_readl(void *opaque, uint32_t addr)
284 VirtIODevice *vdev = opaque;
285 uint32_t val;
287 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
288 if (addr > (vdev->config_len - sizeof(val)))
289 return (uint32_t)-1;
291 memcpy(&val, vdev->config + addr, sizeof(val));
292 return val;
295 static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
297 VirtIODevice *vdev = opaque;
298 uint8_t val = data;
300 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
301 if (addr > (vdev->config_len - sizeof(val)))
302 return;
304 memcpy(vdev->config + addr, &val, sizeof(val));
307 static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
309 VirtIODevice *vdev = opaque;
310 uint16_t val = data;
312 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
313 if (addr > (vdev->config_len - sizeof(val)))
314 return;
316 memcpy(vdev->config + addr, &val, sizeof(val));
319 static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
321 VirtIODevice *vdev = opaque;
322 uint32_t val = data;
324 addr -= vdev->addr + VIRTIO_PCI_CONFIG;
325 if (addr > (vdev->config_len - sizeof(val)))
326 return;
328 memcpy(vdev->config + addr, &val, sizeof(val));
331 static void virtio_map(PCIDevice *pci_dev, int region_num,
332 uint32_t addr, uint32_t size, int type)
334 VirtIODevice *vdev = to_virtio_device(pci_dev);
335 int i;
337 vdev->addr = addr;
338 for (i = 0; i < 3; i++) {
339 register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
340 register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
343 if (vdev->config_len) {
344 register_ioport_write(addr + 20, vdev->config_len, 1,
345 virtio_config_writeb, vdev);
346 register_ioport_write(addr + 20, vdev->config_len, 2,
347 virtio_config_writew, vdev);
348 register_ioport_write(addr + 20, vdev->config_len, 4,
349 virtio_config_writel, vdev);
350 register_ioport_read(addr + 20, vdev->config_len, 1,
351 virtio_config_readb, vdev);
352 register_ioport_read(addr + 20, vdev->config_len, 2,
353 virtio_config_readw, vdev);
354 register_ioport_read(addr + 20, vdev->config_len, 4,
355 virtio_config_readl, vdev);
357 vdev->update_config(vdev, vdev->config);
361 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
362 void (*handle_output)(VirtIODevice *, VirtQueue *))
364 int i;
366 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
367 if (vdev->vq[i].vring.num == 0)
368 break;
371 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
372 abort();
374 vdev->vq[i].vring.num = queue_size;
375 vdev->vq[i].handle_output = handle_output;
376 vdev->vq[i].index = i;
378 return &vdev->vq[i];
381 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
383 /* Always notify when queue is empty */
384 if (vq->vring.avail->idx != vq->last_avail_idx &&
385 (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
386 return;
388 vdev->isr = 1;
389 virtio_update_irq(vdev);
392 VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
393 uint16_t vendor, uint16_t device,
394 uint16_t subvendor, uint16_t subdevice,
395 uint8_t class_code, uint8_t subclass_code,
396 uint8_t pif, size_t config_size,
397 size_t struct_size)
399 VirtIODevice *vdev;
400 PCIDevice *pci_dev;
401 uint8_t *config;
403 pci_dev = pci_register_device(bus, name, struct_size,
404 -1, NULL, NULL);
405 vdev = to_virtio_device(pci_dev);
407 vdev->status = 0;
408 vdev->isr = 0;
409 vdev->queue_sel = 0;
410 memset(vdev->vq, 0, sizeof(vdev->vq));
412 config = pci_dev->config;
413 config[0x00] = vendor & 0xFF;
414 config[0x01] = (vendor >> 8) & 0xFF;
415 config[0x02] = device & 0xFF;
416 config[0x03] = (device >> 8) & 0xFF;
418 config[0x09] = pif;
419 config[0x0a] = subclass_code;
420 config[0x0b] = class_code;
421 config[0x0e] = 0x00;
423 config[0x2c] = subvendor & 0xFF;
424 config[0x2d] = (subvendor >> 8) & 0xFF;
425 config[0x2e] = subdevice & 0xFF;
426 config[0x2f] = (subdevice >> 8) & 0xFF;
428 config[0x3d] = 1;
430 vdev->name = name;
431 vdev->config_len = config_size;
432 if (vdev->config_len)
433 vdev->config = qemu_mallocz(config_size);
434 else
435 vdev->config = NULL;
437 pci_register_io_region(pci_dev, 0, 20 + config_size, PCI_ADDRESS_SPACE_IO,
438 virtio_map);
439 qemu_register_reset(virtio_reset, vdev);
441 return vdev;