4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
20 /* from Linux's linux/virtio_pci.h */
22 /* A 32-bit r/o bitmask of the features supported by the host */
23 #define VIRTIO_PCI_HOST_FEATURES 0
25 /* A 32-bit r/w bitmask of features activated by the guest */
26 #define VIRTIO_PCI_GUEST_FEATURES 4
28 /* A 32-bit r/w PFN for the currently selected queue */
29 #define VIRTIO_PCI_QUEUE_PFN 8
31 /* A 16-bit r/o queue size for the currently selected queue */
32 #define VIRTIO_PCI_QUEUE_NUM 12
34 /* A 16-bit r/w queue selector */
35 #define VIRTIO_PCI_QUEUE_SEL 14
37 /* A 16-bit r/w queue notifier */
38 #define VIRTIO_PCI_QUEUE_NOTIFY 16
40 /* An 8-bit device status register. */
41 #define VIRTIO_PCI_STATUS 18
43 /* An 8-bit r/o interrupt status register. Reading the value will return the
44 * current contents of the ISR and will also clear it. This is effectively
45 * a read-and-acknowledge. */
46 #define VIRTIO_PCI_ISR 19
48 #define VIRTIO_PCI_CONFIG 20
50 /* Virtio ABI version, if we increment this, we break the guest driver. */
51 #define VIRTIO_PCI_ABI_VERSION 0
53 /* QEMU doesn't strictly need write barriers since everything runs in
54 * lock-step. We'll leave the calls to wmb() in though to make it obvious for
55 * KVM or if kqemu gets SMP support.
57 #define wmb() do { } while (0)
59 /* virt queue functions */
61 static void *virtio_map_gpa(target_phys_addr_t addr
, size_t size
)
64 target_phys_addr_t addr1
;
66 off
= cpu_get_physical_page_desc(addr
);
67 if ((off
& ~TARGET_PAGE_MASK
) != IO_MEM_RAM
) {
68 fprintf(stderr
, "virtio DMA to IO ram\n");
72 off
= (off
& TARGET_PAGE_MASK
) | (addr
& ~TARGET_PAGE_MASK
);
74 for (addr1
= addr
+ TARGET_PAGE_SIZE
;
75 addr1
< TARGET_PAGE_ALIGN(addr
+ size
);
76 addr1
+= TARGET_PAGE_SIZE
) {
79 off1
= cpu_get_physical_page_desc(addr1
);
80 if ((off1
& ~TARGET_PAGE_MASK
) != IO_MEM_RAM
) {
81 fprintf(stderr
, "virtio DMA to IO ram\n");
85 off1
= (off1
& TARGET_PAGE_MASK
) | (addr1
& ~TARGET_PAGE_MASK
);
87 if (off1
!= (off
+ (addr1
- addr
))) {
88 fprintf(stderr
, "discontigous virtio memory\n");
93 return phys_ram_base
+ off
;
96 static size_t virtqueue_size(int num
)
98 return TARGET_PAGE_ALIGN((sizeof(VRingDesc
) * num
) +
99 (sizeof(VRingAvail
) + sizeof(uint16_t) * num
)) +
100 (sizeof(VRingUsed
) + sizeof(VRingUsedElem
) * num
);
103 static void virtqueue_init(VirtQueue
*vq
, void *p
)
106 vq
->vring
.avail
= p
+ vq
->vring
.num
* sizeof(VRingDesc
);
107 vq
->vring
.used
= (void *)TARGET_PAGE_ALIGN((unsigned long)&vq
->vring
.avail
->ring
[vq
->vring
.num
]);
110 static unsigned virtqueue_next_desc(VirtQueue
*vq
, unsigned int i
)
114 /* If this descriptor says it doesn't chain, we're done. */
115 if (!(vq
->vring
.desc
[i
].flags
& VRING_DESC_F_NEXT
))
116 return vq
->vring
.num
;
118 /* Check they're not leading us off end of descriptors. */
119 next
= vq
->vring
.desc
[i
].next
;
120 /* Make sure compiler knows to grab that: we don't want it changing! */
123 if (next
>= vq
->vring
.num
)
124 errx(1, "Desc next is %u", next
);
129 void virtqueue_push(VirtQueue
*vq
, const VirtQueueElement
*elem
,
134 /* Get a pointer to the next entry in the used ring. */
135 used
= &vq
->vring
.used
->ring
[vq
->vring
.used
->idx
% vq
->vring
.num
];
136 used
->id
= elem
->index
;
138 /* Make sure buffer is written before we update index. */
140 vq
->vring
.used
->idx
++;
143 int virtqueue_pop(VirtQueue
*vq
, VirtQueueElement
*elem
)
145 unsigned int i
, head
;
146 unsigned int position
;
148 /* Check it isn't doing very strange things with descriptor numbers. */
149 if ((uint16_t)(vq
->vring
.avail
->idx
- vq
->last_avail_idx
) > vq
->vring
.num
)
150 errx(1, "Guest moved used index from %u to %u",
151 vq
->last_avail_idx
, vq
->vring
.avail
->idx
);
153 /* If there's nothing new since last we looked, return invalid. */
154 if (vq
->vring
.avail
->idx
== vq
->last_avail_idx
)
157 /* Grab the next descriptor number they're advertising, and increment
158 * the index we've seen. */
159 head
= vq
->vring
.avail
->ring
[vq
->last_avail_idx
++ % vq
->vring
.num
];
161 /* If their number is silly, that's a fatal mistake. */
162 if (head
>= vq
->vring
.num
)
163 errx(1, "Guest says index %u is available", head
);
165 /* When we start there are none of either input nor output. */
166 position
= elem
->out_num
= elem
->in_num
= 0;
172 if (vq
->vring
.desc
[i
].flags
& VRING_DESC_F_WRITE
)
173 sg
= &elem
->in_sg
[elem
->in_num
++];
175 sg
= &elem
->out_sg
[elem
->out_num
++];
177 /* Grab the first descriptor, and check it's OK. */
178 sg
->iov_len
= vq
->vring
.desc
[i
].len
;
179 sg
->iov_base
= virtio_map_gpa(vq
->vring
.desc
[i
].addr
, sg
->iov_len
);
180 if (sg
->iov_base
== NULL
)
181 errx(1, "Invalid mapping\n");
183 /* If we've got too many, that implies a descriptor loop. */
184 if ((elem
->in_num
+ elem
->out_num
) > vq
->vring
.num
)
185 errx(1, "Looped descriptor");
186 } while ((i
= virtqueue_next_desc(vq
, i
)) != vq
->vring
.num
);
190 return elem
->in_num
+ elem
->out_num
;
195 static VirtIODevice
*to_virtio_device(PCIDevice
*pci_dev
)
197 return (VirtIODevice
*)pci_dev
;
200 static void virtio_update_irq(VirtIODevice
*vdev
)
202 qemu_set_irq(vdev
->pci_dev
.irq
[0], vdev
->isr
& 1);
205 void virtio_reset(void *opaque
)
207 VirtIODevice
*vdev
= opaque
;
215 for(i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
216 vdev
->vq
[i
].vring
.desc
= NULL
;
217 vdev
->vq
[i
].vring
.avail
= NULL
;
218 vdev
->vq
[i
].vring
.used
= NULL
;
219 vdev
->vq
[i
].last_avail_idx
= 0;
224 static void virtio_ioport_write(void *opaque
, uint32_t addr
, uint32_t val
)
226 VirtIODevice
*vdev
= to_virtio_device(opaque
);
232 case VIRTIO_PCI_GUEST_FEATURES
:
233 if (vdev
->set_features
)
234 vdev
->set_features(vdev
, val
);
235 vdev
->features
= val
;
237 case VIRTIO_PCI_QUEUE_PFN
:
238 pa
= (ram_addr_t
)val
<< TARGET_PAGE_BITS
;
239 vdev
->vq
[vdev
->queue_sel
].pfn
= val
;
243 size_t size
= virtqueue_size(vdev
->vq
[vdev
->queue_sel
].vring
.num
);
244 virtqueue_init(&vdev
->vq
[vdev
->queue_sel
],
245 virtio_map_gpa(pa
, size
));
248 case VIRTIO_PCI_QUEUE_SEL
:
249 if (val
< VIRTIO_PCI_QUEUE_MAX
)
250 vdev
->queue_sel
= val
;
252 case VIRTIO_PCI_QUEUE_NOTIFY
:
253 if (val
< VIRTIO_PCI_QUEUE_MAX
&& vdev
->vq
[val
].vring
.desc
)
254 vdev
->vq
[val
].handle_output(vdev
, &vdev
->vq
[val
]);
256 case VIRTIO_PCI_STATUS
:
257 vdev
->status
= val
& 0xFF;
258 if (vdev
->status
== 0)
264 static uint32_t virtio_ioport_read(void *opaque
, uint32_t addr
)
266 VirtIODevice
*vdev
= to_virtio_device(opaque
);
267 uint32_t ret
= 0xFFFFFFFF;
272 case VIRTIO_PCI_HOST_FEATURES
:
273 ret
= vdev
->get_features(vdev
);
275 case VIRTIO_PCI_GUEST_FEATURES
:
276 ret
= vdev
->features
;
278 case VIRTIO_PCI_QUEUE_PFN
:
279 ret
= vdev
->vq
[vdev
->queue_sel
].pfn
;
281 case VIRTIO_PCI_QUEUE_NUM
:
282 ret
= vdev
->vq
[vdev
->queue_sel
].vring
.num
;
284 case VIRTIO_PCI_QUEUE_SEL
:
285 ret
= vdev
->queue_sel
;
287 case VIRTIO_PCI_STATUS
:
291 /* reading from the ISR also clears it. */
294 virtio_update_irq(vdev
);
303 static uint32_t virtio_config_readb(void *opaque
, uint32_t addr
)
305 VirtIODevice
*vdev
= opaque
;
308 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
309 if (addr
> (vdev
->config_len
- sizeof(val
)))
312 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
316 static uint32_t virtio_config_readw(void *opaque
, uint32_t addr
)
318 VirtIODevice
*vdev
= opaque
;
321 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
322 if (addr
> (vdev
->config_len
- sizeof(val
)))
325 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
329 static uint32_t virtio_config_readl(void *opaque
, uint32_t addr
)
331 VirtIODevice
*vdev
= opaque
;
334 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
335 if (addr
> (vdev
->config_len
- sizeof(val
)))
338 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
342 static void virtio_config_writeb(void *opaque
, uint32_t addr
, uint32_t data
)
344 VirtIODevice
*vdev
= opaque
;
347 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
348 if (addr
> (vdev
->config_len
- sizeof(val
)))
351 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
354 static void virtio_config_writew(void *opaque
, uint32_t addr
, uint32_t data
)
356 VirtIODevice
*vdev
= opaque
;
359 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
360 if (addr
> (vdev
->config_len
- sizeof(val
)))
363 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
366 static void virtio_config_writel(void *opaque
, uint32_t addr
, uint32_t data
)
368 VirtIODevice
*vdev
= opaque
;
371 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
372 if (addr
> (vdev
->config_len
- sizeof(val
)))
375 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
378 static void virtio_map(PCIDevice
*pci_dev
, int region_num
,
379 uint32_t addr
, uint32_t size
, int type
)
381 VirtIODevice
*vdev
= to_virtio_device(pci_dev
);
385 for (i
= 0; i
< 3; i
++) {
386 register_ioport_write(addr
, 20, 1 << i
, virtio_ioport_write
, vdev
);
387 register_ioport_read(addr
, 20, 1 << i
, virtio_ioport_read
, vdev
);
390 if (vdev
->config_len
) {
391 register_ioport_write(addr
+ 20, vdev
->config_len
, 1,
392 virtio_config_writeb
, vdev
);
393 register_ioport_write(addr
+ 20, vdev
->config_len
, 2,
394 virtio_config_writew
, vdev
);
395 register_ioport_write(addr
+ 20, vdev
->config_len
, 4,
396 virtio_config_writel
, vdev
);
397 register_ioport_read(addr
+ 20, vdev
->config_len
, 1,
398 virtio_config_readb
, vdev
);
399 register_ioport_read(addr
+ 20, vdev
->config_len
, 2,
400 virtio_config_readw
, vdev
);
401 register_ioport_read(addr
+ 20, vdev
->config_len
, 4,
402 virtio_config_readl
, vdev
);
404 vdev
->update_config(vdev
, vdev
->config
);
408 VirtQueue
*virtio_add_queue(VirtIODevice
*vdev
, int queue_size
,
409 void (*handle_output
)(VirtIODevice
*, VirtQueue
*))
413 for (i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
414 if (vdev
->vq
[i
].vring
.num
== 0)
418 if (i
== VIRTIO_PCI_QUEUE_MAX
|| queue_size
> VIRTQUEUE_MAX_SIZE
)
421 vdev
->vq
[i
].vring
.num
= queue_size
;
422 vdev
->vq
[i
].handle_output
= handle_output
;
427 void virtio_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
429 /* Always notify when queue is empty */
430 if (vq
->vring
.avail
->idx
!= vq
->last_avail_idx
&&
431 (vq
->vring
.avail
->flags
& VRING_AVAIL_F_NO_INTERRUPT
))
435 virtio_update_irq(vdev
);
438 void virtio_save(VirtIODevice
*vdev
, QEMUFile
*f
)
442 pci_device_save(&vdev
->pci_dev
, f
);
444 qemu_put_be32s(f
, &vdev
->addr
);
445 qemu_put_8s(f
, &vdev
->status
);
446 qemu_put_8s(f
, &vdev
->isr
);
447 qemu_put_be16s(f
, &vdev
->queue_sel
);
448 qemu_put_be32s(f
, &vdev
->features
);
449 qemu_put_be32(f
, vdev
->config_len
);
450 qemu_put_buffer(f
, vdev
->config
, vdev
->config_len
);
452 for (i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
453 if (vdev
->vq
[i
].vring
.num
== 0)
459 for (i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
460 if (vdev
->vq
[i
].vring
.num
== 0)
463 qemu_put_be32(f
, vdev
->vq
[i
].vring
.num
);
464 qemu_put_be32s(f
, &vdev
->vq
[i
].pfn
);
465 qemu_put_be16s(f
, &vdev
->vq
[i
].last_avail_idx
);
469 void virtio_load(VirtIODevice
*vdev
, QEMUFile
*f
)
473 pci_device_load(&vdev
->pci_dev
, f
);
475 qemu_get_be32s(f
, &vdev
->addr
);
476 qemu_get_8s(f
, &vdev
->status
);
477 qemu_get_8s(f
, &vdev
->isr
);
478 qemu_get_be16s(f
, &vdev
->queue_sel
);
479 qemu_get_be32s(f
, &vdev
->features
);
480 vdev
->config_len
= qemu_get_be32(f
);
481 qemu_get_buffer(f
, vdev
->config
, vdev
->config_len
);
483 num
= qemu_get_be32(f
);
485 for (i
= 0; i
< num
; i
++) {
486 vdev
->vq
[i
].vring
.num
= qemu_get_be32(f
);
487 qemu_get_be32s(f
, &vdev
->vq
[i
].pfn
);
488 qemu_get_be16s(f
, &vdev
->vq
[i
].last_avail_idx
);
490 if (vdev
->vq
[i
].pfn
) {
492 target_phys_addr_t pa
;
494 pa
= (ram_addr_t
)vdev
->vq
[i
].pfn
<< TARGET_PAGE_BITS
;
495 size
= virtqueue_size(vdev
->vq
[i
].vring
.num
);
496 virtqueue_init(&vdev
->vq
[i
], virtio_map_gpa(pa
, size
));
500 virtio_update_irq(vdev
);
503 VirtIODevice
*virtio_init_pci(PCIBus
*bus
, const char *name
,
504 uint16_t vendor
, uint16_t device
,
505 uint16_t subvendor
, uint16_t subdevice
,
506 uint8_t class_code
, uint8_t subclass_code
,
507 uint8_t pif
, size_t config_size
,
515 pci_dev
= pci_register_device(bus
, name
, struct_size
,
520 vdev
= to_virtio_device(pci_dev
);
525 memset(vdev
->vq
, 0, sizeof(vdev
->vq
));
527 config
= pci_dev
->config
;
528 config
[0x00] = vendor
& 0xFF;
529 config
[0x01] = (vendor
>> 8) & 0xFF;
530 config
[0x02] = device
& 0xFF;
531 config
[0x03] = (device
>> 8) & 0xFF;
533 config
[0x08] = VIRTIO_PCI_ABI_VERSION
;
536 config
[0x0a] = subclass_code
;
537 config
[0x0b] = class_code
;
540 config
[0x2c] = subvendor
& 0xFF;
541 config
[0x2d] = (subvendor
>> 8) & 0xFF;
542 config
[0x2e] = subdevice
& 0xFF;
543 config
[0x2f] = (subdevice
>> 8) & 0xFF;
548 vdev
->config_len
= config_size
;
549 if (vdev
->config_len
)
550 vdev
->config
= qemu_mallocz(config_size
);
554 size
= 20 + config_size
;
556 size
= 1 << fls(size
);
558 pci_register_io_region(pci_dev
, 0, size
, PCI_ADDRESS_SPACE_IO
,
560 qemu_register_reset(virtio_reset
, vdev
);