4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
7 * Copyright IBM Corp. 2007
10 * Anthony Liguori <aliguori@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <linux/module.h>
18 #include <linux/list.h>
19 #include <linux/pci.h>
20 #include <linux/interrupt.h>
21 #include <linux/virtio.h>
22 #include <linux/virtio_config.h>
23 #include <linux/virtio_ring.h>
24 #include <linux/virtio_pci.h>
25 #include <linux/highmem.h>
26 #include <linux/spinlock.h>
28 MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
29 MODULE_DESCRIPTION("virtio-pci");
30 MODULE_LICENSE("GPL");
33 /* Our device structure */
34 struct virtio_pci_device
36 struct virtio_device vdev
;
37 struct pci_dev
*pci_dev
;
39 /* the IO mapping for the PCI config space */
42 /* a list of queues so we can dispatch IRQs */
44 struct list_head virtqueues
;
49 struct msix_entry
*msix_entries
;
50 /* Name strings for interrupts. This size should be enough,
51 * and I'm too lazy to allocate each name separately. */
52 char (*msix_names
)[256];
53 /* Number of available vectors */
54 unsigned msix_vectors
;
55 /* Vectors allocated, excluding per-vq vectors if any */
56 unsigned msix_used_vectors
;
57 /* Whether we have vector per vq */
61 /* Constants for MSI-X */
62 /* Use first vector for configuration changes, second and the rest for
63 * virtqueues Thus, we need at least 2 vectors for MSI. */
65 VP_MSIX_CONFIG_VECTOR
= 0,
66 VP_MSIX_VQ_VECTOR
= 1,
69 struct virtio_pci_vq_info
71 /* the actual virtqueue */
74 /* the number of entries in the queue */
77 /* the index of the queue */
80 /* the virtual address of the ring queue */
83 /* the list node for the virtqueues list */
84 struct list_head node
;
86 /* MSI-X vector (or none) */
90 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
91 static struct pci_device_id virtio_pci_id_table
[] = {
92 { 0x1af4, PCI_ANY_ID
, PCI_ANY_ID
, PCI_ANY_ID
, 0, 0, 0 },
96 MODULE_DEVICE_TABLE(pci
, virtio_pci_id_table
);
98 /* A PCI device has it's own struct device and so does a virtio device so
99 * we create a place for the virtio devices to show up in sysfs. I think it
100 * would make more sense for virtio to not insist on having it's own device. */
101 static struct device
*virtio_pci_root
;
103 /* Convert a generic virtio device to our structure */
104 static struct virtio_pci_device
*to_vp_device(struct virtio_device
*vdev
)
106 return container_of(vdev
, struct virtio_pci_device
, vdev
);
109 /* virtio config->get_features() implementation */
110 static u32
vp_get_features(struct virtio_device
*vdev
)
112 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
114 /* When someone needs more than 32 feature bits, we'll need to
115 * steal a bit to indicate that the rest are somewhere else. */
116 return ioread32(vp_dev
->ioaddr
+ VIRTIO_PCI_HOST_FEATURES
);
119 /* virtio config->finalize_features() implementation */
120 static void vp_finalize_features(struct virtio_device
*vdev
)
122 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
124 /* Give virtio_ring a chance to accept features. */
125 vring_transport_features(vdev
);
127 /* We only support 32 feature bits. */
128 BUILD_BUG_ON(ARRAY_SIZE(vdev
->features
) != 1);
129 iowrite32(vdev
->features
[0], vp_dev
->ioaddr
+VIRTIO_PCI_GUEST_FEATURES
);
132 /* virtio config->get() implementation */
133 static void vp_get(struct virtio_device
*vdev
, unsigned offset
,
134 void *buf
, unsigned len
)
136 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
137 void __iomem
*ioaddr
= vp_dev
->ioaddr
+
138 VIRTIO_PCI_CONFIG(vp_dev
) + offset
;
142 for (i
= 0; i
< len
; i
++)
143 ptr
[i
] = ioread8(ioaddr
+ i
);
146 /* the config->set() implementation. it's symmetric to the config->get()
148 static void vp_set(struct virtio_device
*vdev
, unsigned offset
,
149 const void *buf
, unsigned len
)
151 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
152 void __iomem
*ioaddr
= vp_dev
->ioaddr
+
153 VIRTIO_PCI_CONFIG(vp_dev
) + offset
;
157 for (i
= 0; i
< len
; i
++)
158 iowrite8(ptr
[i
], ioaddr
+ i
);
161 /* config->{get,set}_status() implementations */
162 static u8
vp_get_status(struct virtio_device
*vdev
)
164 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
165 return ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
168 static void vp_set_status(struct virtio_device
*vdev
, u8 status
)
170 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
171 /* We should never be setting status to 0. */
173 iowrite8(status
, vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
176 static void vp_reset(struct virtio_device
*vdev
)
178 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
179 /* 0 status means a reset. */
180 iowrite8(0, vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
183 /* the notify function used when creating a virt queue */
184 static void vp_notify(struct virtqueue
*vq
)
186 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
187 struct virtio_pci_vq_info
*info
= vq
->priv
;
189 /* we write the queue's selector into the notification register to
190 * signal the other end */
191 iowrite16(info
->queue_index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_NOTIFY
);
194 /* Handle a configuration change: Tell driver if it wants to know. */
195 static irqreturn_t
vp_config_changed(int irq
, void *opaque
)
197 struct virtio_pci_device
*vp_dev
= opaque
;
198 struct virtio_driver
*drv
;
199 drv
= container_of(vp_dev
->vdev
.dev
.driver
,
200 struct virtio_driver
, driver
);
202 if (drv
&& drv
->config_changed
)
203 drv
->config_changed(&vp_dev
->vdev
);
207 /* Notify all virtqueues on an interrupt. */
208 static irqreturn_t
vp_vring_interrupt(int irq
, void *opaque
)
210 struct virtio_pci_device
*vp_dev
= opaque
;
211 struct virtio_pci_vq_info
*info
;
212 irqreturn_t ret
= IRQ_NONE
;
215 spin_lock_irqsave(&vp_dev
->lock
, flags
);
216 list_for_each_entry(info
, &vp_dev
->virtqueues
, node
) {
217 if (vring_interrupt(irq
, info
->vq
) == IRQ_HANDLED
)
220 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
225 /* A small wrapper to also acknowledge the interrupt when it's handled.
226 * I really need an EIO hook for the vring so I can ack the interrupt once we
227 * know that we'll be handling the IRQ but before we invoke the callback since
228 * the callback may notify the host which results in the host attempting to
229 * raise an interrupt that we would then mask once we acknowledged the
231 static irqreturn_t
vp_interrupt(int irq
, void *opaque
)
233 struct virtio_pci_device
*vp_dev
= opaque
;
236 /* reading the ISR has the effect of also clearing it so it's very
237 * important to save off the value. */
238 isr
= ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_ISR
);
240 /* It's definitely not us if the ISR was not high */
244 /* Configuration change? Tell driver if it wants to know. */
245 if (isr
& VIRTIO_PCI_ISR_CONFIG
)
246 vp_config_changed(irq
, opaque
);
248 return vp_vring_interrupt(irq
, opaque
);
251 static void vp_free_vectors(struct virtio_device
*vdev
)
253 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
256 if (vp_dev
->intx_enabled
) {
257 free_irq(vp_dev
->pci_dev
->irq
, vp_dev
);
258 vp_dev
->intx_enabled
= 0;
261 for (i
= 0; i
< vp_dev
->msix_used_vectors
; ++i
)
262 free_irq(vp_dev
->msix_entries
[i
].vector
, vp_dev
);
264 if (vp_dev
->msix_enabled
) {
265 /* Disable the vector used for configuration */
266 iowrite16(VIRTIO_MSI_NO_VECTOR
,
267 vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
268 /* Flush the write out to device */
269 ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
271 pci_disable_msix(vp_dev
->pci_dev
);
272 vp_dev
->msix_enabled
= 0;
273 vp_dev
->msix_vectors
= 0;
276 vp_dev
->msix_used_vectors
= 0;
277 kfree(vp_dev
->msix_names
);
278 vp_dev
->msix_names
= NULL
;
279 kfree(vp_dev
->msix_entries
);
280 vp_dev
->msix_entries
= NULL
;
283 static int vp_request_vectors(struct virtio_device
*vdev
, int nvectors
,
286 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
287 const char *name
= dev_name(&vp_dev
->vdev
.dev
);
292 /* Can't allocate MSI-X vectors, use regular interrupt */
293 vp_dev
->msix_vectors
= 0;
294 err
= request_irq(vp_dev
->pci_dev
->irq
, vp_interrupt
,
295 IRQF_SHARED
, name
, vp_dev
);
298 vp_dev
->intx_enabled
= 1;
302 vp_dev
->msix_entries
= kmalloc(nvectors
* sizeof *vp_dev
->msix_entries
,
304 if (!vp_dev
->msix_entries
)
306 vp_dev
->msix_names
= kmalloc(nvectors
* sizeof *vp_dev
->msix_names
,
308 if (!vp_dev
->msix_names
)
311 for (i
= 0; i
< nvectors
; ++i
)
312 vp_dev
->msix_entries
[i
].entry
= i
;
314 err
= pci_enable_msix(vp_dev
->pci_dev
, vp_dev
->msix_entries
, nvectors
);
319 vp_dev
->msix_vectors
= nvectors
;
320 vp_dev
->msix_enabled
= 1;
322 /* Set the vector used for configuration */
323 v
= vp_dev
->msix_used_vectors
;
324 snprintf(vp_dev
->msix_names
[v
], sizeof *vp_dev
->msix_names
,
326 err
= request_irq(vp_dev
->msix_entries
[v
].vector
,
327 vp_config_changed
, 0, vp_dev
->msix_names
[v
],
331 ++vp_dev
->msix_used_vectors
;
333 iowrite16(v
, vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
334 /* Verify we had enough resources to assign the vector */
335 v
= ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
336 if (v
== VIRTIO_MSI_NO_VECTOR
) {
341 if (!per_vq_vectors
) {
342 /* Shared vector for all VQs */
343 v
= vp_dev
->msix_used_vectors
;
344 snprintf(vp_dev
->msix_names
[v
], sizeof *vp_dev
->msix_names
,
345 "%s-virtqueues", name
);
346 err
= request_irq(vp_dev
->msix_entries
[v
].vector
,
347 vp_vring_interrupt
, 0, vp_dev
->msix_names
[v
],
351 ++vp_dev
->msix_used_vectors
;
355 vp_free_vectors(vdev
);
359 static struct virtqueue
*vp_find_vq(struct virtio_device
*vdev
, unsigned index
,
360 void (*callback
)(struct virtqueue
*vq
),
364 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
365 struct virtio_pci_vq_info
*info
;
366 struct virtqueue
*vq
;
367 unsigned long flags
, size
;
371 /* Select the queue we're interested in */
372 iowrite16(index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_SEL
);
374 /* Check if queue is either not available or already active. */
375 num
= ioread16(vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_NUM
);
376 if (!num
|| ioread32(vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
))
377 return ERR_PTR(-ENOENT
);
379 /* allocate and fill out our structure the represents an active
381 info
= kmalloc(sizeof(struct virtio_pci_vq_info
), GFP_KERNEL
);
383 return ERR_PTR(-ENOMEM
);
385 info
->queue_index
= index
;
387 info
->vector
= vector
;
389 size
= PAGE_ALIGN(vring_size(num
, VIRTIO_PCI_VRING_ALIGN
));
390 info
->queue
= alloc_pages_exact(size
, GFP_KERNEL
|__GFP_ZERO
);
391 if (info
->queue
== NULL
) {
396 /* activate the queue */
397 iowrite32(virt_to_phys(info
->queue
) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT
,
398 vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
400 /* create the vring */
401 vq
= vring_new_virtqueue(info
->num
, VIRTIO_PCI_VRING_ALIGN
,
402 vdev
, info
->queue
, vp_notify
, callback
, name
);
405 goto out_activate_queue
;
411 if (vector
!= VIRTIO_MSI_NO_VECTOR
) {
412 iowrite16(vector
, vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
413 vector
= ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
414 if (vector
== VIRTIO_MSI_NO_VECTOR
) {
420 spin_lock_irqsave(&vp_dev
->lock
, flags
);
421 list_add(&info
->node
, &vp_dev
->virtqueues
);
422 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
427 vring_del_virtqueue(vq
);
429 iowrite32(0, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
430 free_pages_exact(info
->queue
, size
);
436 static void vp_del_vq(struct virtqueue
*vq
)
438 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
439 struct virtio_pci_vq_info
*info
= vq
->priv
;
440 unsigned long flags
, size
;
442 spin_lock_irqsave(&vp_dev
->lock
, flags
);
443 list_del(&info
->node
);
444 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
446 iowrite16(info
->queue_index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_SEL
);
448 if (vp_dev
->msix_enabled
) {
449 iowrite16(VIRTIO_MSI_NO_VECTOR
,
450 vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
451 /* Flush the write out to device */
452 ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_ISR
);
455 vring_del_virtqueue(vq
);
457 /* Select and deactivate the queue */
458 iowrite32(0, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
460 size
= PAGE_ALIGN(vring_size(info
->num
, VIRTIO_PCI_VRING_ALIGN
));
461 free_pages_exact(info
->queue
, size
);
465 /* the config->del_vqs() implementation */
466 static void vp_del_vqs(struct virtio_device
*vdev
)
468 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
469 struct virtqueue
*vq
, *n
;
470 struct virtio_pci_vq_info
*info
;
472 list_for_each_entry_safe(vq
, n
, &vdev
->vqs
, list
) {
474 if (vp_dev
->per_vq_vectors
)
475 free_irq(vp_dev
->msix_entries
[info
->vector
].vector
, vq
);
478 vp_dev
->per_vq_vectors
= false;
480 vp_free_vectors(vdev
);
483 static int vp_try_to_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
484 struct virtqueue
*vqs
[],
485 vq_callback_t
*callbacks
[],
490 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
492 int i
, err
, allocated_vectors
;
494 err
= vp_request_vectors(vdev
, nvectors
, per_vq_vectors
);
498 vp_dev
->per_vq_vectors
= per_vq_vectors
;
499 allocated_vectors
= vp_dev
->msix_used_vectors
;
500 for (i
= 0; i
< nvqs
; ++i
) {
501 if (!callbacks
[i
] || !vp_dev
->msix_enabled
)
502 vector
= VIRTIO_MSI_NO_VECTOR
;
503 else if (vp_dev
->per_vq_vectors
)
504 vector
= allocated_vectors
++;
506 vector
= VP_MSIX_VQ_VECTOR
;
507 vqs
[i
] = vp_find_vq(vdev
, i
, callbacks
[i
], names
[i
], vector
);
508 if (IS_ERR(vqs
[i
])) {
509 err
= PTR_ERR(vqs
[i
]);
512 /* allocate per-vq irq if available and necessary */
513 if (vp_dev
->per_vq_vectors
&& vector
!= VIRTIO_MSI_NO_VECTOR
) {
514 snprintf(vp_dev
->msix_names
[vector
], sizeof *vp_dev
->msix_names
,
515 "%s-%s", dev_name(&vp_dev
->vdev
.dev
), names
[i
]);
516 err
= request_irq(vp_dev
->msix_entries
[vector
].vector
,
518 vp_dev
->msix_names
[vector
], vqs
[i
]);
534 /* the config->find_vqs() implementation */
535 static int vp_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
536 struct virtqueue
*vqs
[],
537 vq_callback_t
*callbacks
[],
541 int i
, uninitialized_var(err
);
543 /* How many vectors would we like? */
544 for (i
= 0; i
< nvqs
; ++i
)
548 /* We want at most one vector per queue and one for config changes. */
549 err
= vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
,
553 /* Fallback to separate vectors for config and a shared for queues. */
554 err
= vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
,
558 /* Finally fall back to regular interrupts. */
559 err
= vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
,
564 static struct virtio_config_ops virtio_pci_config_ops
= {
567 .get_status
= vp_get_status
,
568 .set_status
= vp_set_status
,
570 .find_vqs
= vp_find_vqs
,
571 .del_vqs
= vp_del_vqs
,
572 .get_features
= vp_get_features
,
573 .finalize_features
= vp_finalize_features
,
576 static void virtio_pci_release_dev(struct device
*_d
)
578 struct virtio_device
*dev
= container_of(_d
, struct virtio_device
, dev
);
579 struct virtio_pci_device
*vp_dev
= to_vp_device(dev
);
580 struct pci_dev
*pci_dev
= vp_dev
->pci_dev
;
583 pci_set_drvdata(pci_dev
, NULL
);
584 pci_iounmap(pci_dev
, vp_dev
->ioaddr
);
585 pci_release_regions(pci_dev
);
586 pci_disable_device(pci_dev
);
590 /* the PCI probing function */
591 static int __devinit
virtio_pci_probe(struct pci_dev
*pci_dev
,
592 const struct pci_device_id
*id
)
594 struct virtio_pci_device
*vp_dev
;
597 /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
598 if (pci_dev
->device
< 0x1000 || pci_dev
->device
> 0x103f)
601 if (pci_dev
->revision
!= VIRTIO_PCI_ABI_VERSION
) {
602 printk(KERN_ERR
"virtio_pci: expected ABI version %d, got %d\n",
603 VIRTIO_PCI_ABI_VERSION
, pci_dev
->revision
);
607 /* allocate our structure and fill it out */
608 vp_dev
= kzalloc(sizeof(struct virtio_pci_device
), GFP_KERNEL
);
612 vp_dev
->vdev
.dev
.parent
= virtio_pci_root
;
613 vp_dev
->vdev
.dev
.release
= virtio_pci_release_dev
;
614 vp_dev
->vdev
.config
= &virtio_pci_config_ops
;
615 vp_dev
->pci_dev
= pci_dev
;
616 INIT_LIST_HEAD(&vp_dev
->virtqueues
);
617 spin_lock_init(&vp_dev
->lock
);
619 /* enable the device */
620 err
= pci_enable_device(pci_dev
);
624 err
= pci_request_regions(pci_dev
, "virtio-pci");
626 goto out_enable_device
;
628 vp_dev
->ioaddr
= pci_iomap(pci_dev
, 0, 0);
629 if (vp_dev
->ioaddr
== NULL
)
630 goto out_req_regions
;
632 pci_set_drvdata(pci_dev
, vp_dev
);
634 /* we use the subsystem vendor/device id as the virtio vendor/device
635 * id. this allows us to use the same PCI vendor/device id for all
636 * virtio devices and to identify the particular virtio driver by
637 * the subsytem ids */
638 vp_dev
->vdev
.id
.vendor
= pci_dev
->subsystem_vendor
;
639 vp_dev
->vdev
.id
.device
= pci_dev
->subsystem_device
;
641 /* finally register the virtio device */
642 err
= register_virtio_device(&vp_dev
->vdev
);
644 goto out_set_drvdata
;
649 pci_set_drvdata(pci_dev
, NULL
);
650 pci_iounmap(pci_dev
, vp_dev
->ioaddr
);
652 pci_release_regions(pci_dev
);
654 pci_disable_device(pci_dev
);
660 static void __devexit
virtio_pci_remove(struct pci_dev
*pci_dev
)
662 struct virtio_pci_device
*vp_dev
= pci_get_drvdata(pci_dev
);
664 unregister_virtio_device(&vp_dev
->vdev
);
668 static int virtio_pci_suspend(struct pci_dev
*pci_dev
, pm_message_t state
)
670 pci_save_state(pci_dev
);
671 pci_set_power_state(pci_dev
, PCI_D3hot
);
675 static int virtio_pci_resume(struct pci_dev
*pci_dev
)
677 pci_restore_state(pci_dev
);
678 pci_set_power_state(pci_dev
, PCI_D0
);
683 static struct pci_driver virtio_pci_driver
= {
684 .name
= "virtio-pci",
685 .id_table
= virtio_pci_id_table
,
686 .probe
= virtio_pci_probe
,
687 .remove
= virtio_pci_remove
,
689 .suspend
= virtio_pci_suspend
,
690 .resume
= virtio_pci_resume
,
694 static int __init
virtio_pci_init(void)
698 virtio_pci_root
= root_device_register("virtio-pci");
699 if (IS_ERR(virtio_pci_root
))
700 return PTR_ERR(virtio_pci_root
);
702 err
= pci_register_driver(&virtio_pci_driver
);
704 root_device_unregister(virtio_pci_root
);
709 module_init(virtio_pci_init
);
711 static void __exit
virtio_pci_exit(void)
713 pci_unregister_driver(&virtio_pci_driver
);
714 root_device_unregister(virtio_pci_root
);
717 module_exit(virtio_pci_exit
);