4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
20 /* from Linux's linux/virtio_pci.h */
22 /* A 32-bit r/o bitmask of the features supported by the host */
23 #define VIRTIO_PCI_HOST_FEATURES 0
25 /* A 32-bit r/w bitmask of features activated by the guest */
26 #define VIRTIO_PCI_GUEST_FEATURES 4
28 /* A 32-bit r/w PFN for the currently selected queue */
29 #define VIRTIO_PCI_QUEUE_PFN 8
31 /* A 16-bit r/o queue size for the currently selected queue */
32 #define VIRTIO_PCI_QUEUE_NUM 12
34 /* A 16-bit r/w queue selector */
35 #define VIRTIO_PCI_QUEUE_SEL 14
37 /* A 16-bit r/w queue notifier */
38 #define VIRTIO_PCI_QUEUE_NOTIFY 16
40 /* An 8-bit device status register. */
41 #define VIRTIO_PCI_STATUS 18
43 /* An 8-bit r/o interrupt status register. Reading the value will return the
44 * current contents of the ISR and will also clear it. This is effectively
45 * a read-and-acknowledge. */
46 #define VIRTIO_PCI_ISR 19
48 #define VIRTIO_PCI_CONFIG 20
50 /* QEMU doesn't strictly need write barriers since everything runs in
51 * lock-step. We'll leave the calls to wmb() in though to make it obvious for
52 * KVM or if kqemu gets SMP support.
54 #define wmb() do { } while (0)
56 /* virt queue functions */
58 static void virtqueue_init(VirtQueue
*vq
, void *p
)
61 vq
->vring
.avail
= p
+ vq
->vring
.num
* sizeof(VRingDesc
);
62 vq
->vring
.used
= (void *)TARGET_PAGE_ALIGN((unsigned long)&vq
->vring
.avail
->ring
[vq
->vring
.num
]);
65 static unsigned virtqueue_next_desc(VirtQueue
*vq
, unsigned int i
)
69 /* If this descriptor says it doesn't chain, we're done. */
70 if (!(vq
->vring
.desc
[i
].flags
& VRING_DESC_F_NEXT
))
73 /* Check they're not leading us off end of descriptors. */
74 next
= vq
->vring
.desc
[i
].next
;
75 /* Make sure compiler knows to grab that: we don't want it changing! */
78 if (next
>= vq
->vring
.num
)
79 errx(1, "Desc next is %u", next
);
84 void virtqueue_push(VirtQueue
*vq
, const VirtQueueElement
*elem
,
89 /* Get a pointer to the next entry in the used ring. */
90 used
= &vq
->vring
.used
->ring
[vq
->vring
.used
->idx
% vq
->vring
.num
];
91 used
->id
= elem
->index
;
93 /* Make sure buffer is written before we update index. */
95 vq
->vring
.used
->idx
++;
98 int virtqueue_pop(VirtQueue
*vq
, VirtQueueElement
*elem
)
100 unsigned int i
, head
;
101 unsigned int position
;
103 /* Check it isn't doing very strange things with descriptor numbers. */
104 if ((uint16_t)(vq
->vring
.avail
->idx
- vq
->last_avail_idx
) > vq
->vring
.num
)
105 errx(1, "Guest moved used index from %u to %u",
106 vq
->last_avail_idx
, vq
->vring
.avail
->idx
);
108 /* If there's nothing new since last we looked, return invalid. */
109 if (vq
->vring
.avail
->idx
== vq
->last_avail_idx
)
112 /* Grab the next descriptor number they're advertising, and increment
113 * the index we've seen. */
114 head
= vq
->vring
.avail
->ring
[vq
->last_avail_idx
++ % vq
->vring
.num
];
116 /* If their number is silly, that's a fatal mistake. */
117 if (head
>= vq
->vring
.num
)
118 errx(1, "Guest says index %u is available", head
);
120 /* When we start there are none of either input nor output. */
121 position
= elem
->out_num
= elem
->in_num
= 0;
127 if ((vq
->vring
.desc
[i
].addr
+ vq
->vring
.desc
[i
].len
) > ram_size
)
128 errx(1, "Guest sent invalid pointer");
130 if (vq
->vring
.desc
[i
].flags
& VRING_DESC_F_WRITE
)
131 sg
= &elem
->in_sg
[elem
->in_num
++];
133 sg
= &elem
->out_sg
[elem
->out_num
++];
135 /* Grab the first descriptor, and check it's OK. */
136 sg
->iov_len
= vq
->vring
.desc
[i
].len
;
137 sg
->iov_base
= phys_ram_base
+ vq
->vring
.desc
[i
].addr
;
139 /* If we've got too many, that implies a descriptor loop. */
140 if ((elem
->in_num
+ elem
->out_num
) > vq
->vring
.num
)
141 errx(1, "Looped descriptor");
142 } while ((i
= virtqueue_next_desc(vq
, i
)) != vq
->vring
.num
);
146 return elem
->in_num
+ elem
->out_num
;
151 static VirtIODevice
*to_virtio_device(PCIDevice
*pci_dev
)
153 return (VirtIODevice
*)pci_dev
;
156 static void virtio_update_irq(VirtIODevice
*vdev
)
158 qemu_set_irq(vdev
->pci_dev
.irq
[0], vdev
->isr
& 1);
161 void virtio_reset(void *opaque
)
163 VirtIODevice
*vdev
= opaque
;
171 for(i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
172 vdev
->vq
[i
].vring
.desc
= NULL
;
173 vdev
->vq
[i
].vring
.avail
= NULL
;
174 vdev
->vq
[i
].vring
.used
= NULL
;
175 vdev
->vq
[i
].last_avail_idx
= 0;
180 static void virtio_ioport_write(void *opaque
, uint32_t addr
, uint32_t val
)
182 VirtIODevice
*vdev
= to_virtio_device(opaque
);
188 case VIRTIO_PCI_GUEST_FEATURES
:
189 if (vdev
->set_features
)
190 vdev
->set_features(vdev
, val
);
191 vdev
->features
= val
;
193 case VIRTIO_PCI_QUEUE_PFN
:
194 pa
= (ram_addr_t
)val
<< TARGET_PAGE_BITS
;
195 vdev
->vq
[vdev
->queue_sel
].pfn
= val
;
198 } else if (pa
< (ram_size
- TARGET_PAGE_SIZE
)) {
199 virtqueue_init(&vdev
->vq
[vdev
->queue_sel
], phys_ram_base
+ pa
);
200 /* FIXME if pa == 0, deal with device tear down */
203 case VIRTIO_PCI_QUEUE_SEL
:
204 if (val
< VIRTIO_PCI_QUEUE_MAX
)
205 vdev
->queue_sel
= val
;
207 case VIRTIO_PCI_QUEUE_NOTIFY
:
208 if (val
< VIRTIO_PCI_QUEUE_MAX
&& vdev
->vq
[val
].vring
.desc
)
209 vdev
->vq
[val
].handle_output(vdev
, &vdev
->vq
[val
]);
211 case VIRTIO_PCI_STATUS
:
212 vdev
->status
= val
& 0xFF;
217 static uint32_t virtio_ioport_read(void *opaque
, uint32_t addr
)
219 VirtIODevice
*vdev
= to_virtio_device(opaque
);
220 uint32_t ret
= 0xFFFFFFFF;
225 case VIRTIO_PCI_HOST_FEATURES
:
226 ret
= vdev
->get_features(vdev
);
228 case VIRTIO_PCI_GUEST_FEATURES
:
229 ret
= vdev
->features
;
231 case VIRTIO_PCI_QUEUE_PFN
:
232 ret
= vdev
->vq
[vdev
->queue_sel
].pfn
;
234 case VIRTIO_PCI_QUEUE_NUM
:
235 ret
= vdev
->vq
[vdev
->queue_sel
].vring
.num
;
237 case VIRTIO_PCI_QUEUE_SEL
:
238 ret
= vdev
->queue_sel
;
240 case VIRTIO_PCI_STATUS
:
244 /* reading from the ISR also clears it. */
247 virtio_update_irq(vdev
);
256 static uint32_t virtio_config_readb(void *opaque
, uint32_t addr
)
258 VirtIODevice
*vdev
= opaque
;
261 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
262 if (addr
> (vdev
->config_len
- sizeof(val
)))
265 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
269 static uint32_t virtio_config_readw(void *opaque
, uint32_t addr
)
271 VirtIODevice
*vdev
= opaque
;
274 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
275 if (addr
> (vdev
->config_len
- sizeof(val
)))
278 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
282 static uint32_t virtio_config_readl(void *opaque
, uint32_t addr
)
284 VirtIODevice
*vdev
= opaque
;
287 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
288 if (addr
> (vdev
->config_len
- sizeof(val
)))
291 memcpy(&val
, vdev
->config
+ addr
, sizeof(val
));
295 static void virtio_config_writeb(void *opaque
, uint32_t addr
, uint32_t data
)
297 VirtIODevice
*vdev
= opaque
;
300 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
301 if (addr
> (vdev
->config_len
- sizeof(val
)))
304 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
307 static void virtio_config_writew(void *opaque
, uint32_t addr
, uint32_t data
)
309 VirtIODevice
*vdev
= opaque
;
312 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
313 if (addr
> (vdev
->config_len
- sizeof(val
)))
316 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
319 static void virtio_config_writel(void *opaque
, uint32_t addr
, uint32_t data
)
321 VirtIODevice
*vdev
= opaque
;
324 addr
-= vdev
->addr
+ VIRTIO_PCI_CONFIG
;
325 if (addr
> (vdev
->config_len
- sizeof(val
)))
328 memcpy(vdev
->config
+ addr
, &val
, sizeof(val
));
331 static void virtio_map(PCIDevice
*pci_dev
, int region_num
,
332 uint32_t addr
, uint32_t size
, int type
)
334 VirtIODevice
*vdev
= to_virtio_device(pci_dev
);
338 for (i
= 0; i
< 3; i
++) {
339 register_ioport_write(addr
, 20, 1 << i
, virtio_ioport_write
, vdev
);
340 register_ioport_read(addr
, 20, 1 << i
, virtio_ioport_read
, vdev
);
343 if (vdev
->config_len
) {
344 register_ioport_write(addr
+ 20, vdev
->config_len
, 1,
345 virtio_config_writeb
, vdev
);
346 register_ioport_write(addr
+ 20, vdev
->config_len
, 2,
347 virtio_config_writew
, vdev
);
348 register_ioport_write(addr
+ 20, vdev
->config_len
, 4,
349 virtio_config_writel
, vdev
);
350 register_ioport_read(addr
+ 20, vdev
->config_len
, 1,
351 virtio_config_readb
, vdev
);
352 register_ioport_read(addr
+ 20, vdev
->config_len
, 2,
353 virtio_config_readw
, vdev
);
354 register_ioport_read(addr
+ 20, vdev
->config_len
, 4,
355 virtio_config_readl
, vdev
);
357 vdev
->update_config(vdev
, vdev
->config
);
361 VirtQueue
*virtio_add_queue(VirtIODevice
*vdev
, int queue_size
,
362 void (*handle_output
)(VirtIODevice
*, VirtQueue
*))
366 for (i
= 0; i
< VIRTIO_PCI_QUEUE_MAX
; i
++) {
367 if (vdev
->vq
[i
].vring
.num
== 0)
371 if (i
== VIRTIO_PCI_QUEUE_MAX
|| queue_size
> VIRTQUEUE_MAX_SIZE
)
374 vdev
->vq
[i
].vring
.num
= queue_size
;
375 vdev
->vq
[i
].handle_output
= handle_output
;
376 vdev
->vq
[i
].index
= i
;
381 void virtio_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
383 /* Always notify when queue is empty */
384 if (vq
->vring
.avail
->idx
!= vq
->last_avail_idx
&&
385 (vq
->vring
.avail
->flags
& VRING_AVAIL_F_NO_INTERRUPT
))
389 virtio_update_irq(vdev
);
392 VirtIODevice
*virtio_init_pci(PCIBus
*bus
, const char *name
,
393 uint16_t vendor
, uint16_t device
,
394 uint16_t subvendor
, uint16_t subdevice
,
395 uint8_t class_code
, uint8_t subclass_code
,
396 uint8_t pif
, size_t config_size
,
403 pci_dev
= pci_register_device(bus
, name
, struct_size
,
405 vdev
= to_virtio_device(pci_dev
);
410 memset(vdev
->vq
, 0, sizeof(vdev
->vq
));
412 config
= pci_dev
->config
;
413 config
[0x00] = vendor
& 0xFF;
414 config
[0x01] = (vendor
>> 8) & 0xFF;
415 config
[0x02] = device
& 0xFF;
416 config
[0x03] = (device
>> 8) & 0xFF;
419 config
[0x0a] = subclass_code
;
420 config
[0x0b] = class_code
;
423 config
[0x2c] = subvendor
& 0xFF;
424 config
[0x2d] = (subvendor
>> 8) & 0xFF;
425 config
[0x2e] = subdevice
& 0xFF;
426 config
[0x2f] = (subdevice
>> 8) & 0xFF;
431 vdev
->config_len
= config_size
;
432 if (vdev
->config_len
)
433 vdev
->config
= qemu_mallocz(config_size
);
437 pci_register_io_region(pci_dev
, 0, 20 + config_size
, PCI_ADDRESS_SPACE_IO
,
439 qemu_register_reset(virtio_reset
, vdev
);