4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
7 * Copyright IBM Corp. 2007
10 * Anthony Liguori <aliguori@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <linux/module.h>
18 #include <linux/list.h>
19 #include <linux/pci.h>
20 #include <linux/interrupt.h>
21 #include <linux/virtio.h>
22 #include <linux/virtio_config.h>
23 #include <linux/virtio_ring.h>
24 #include <linux/virtio_pci.h>
25 #include <linux/highmem.h>
26 #include <linux/spinlock.h>
28 MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
29 MODULE_DESCRIPTION("virtio-pci");
30 MODULE_LICENSE("GPL");
33 /* Our device structure */
34 struct virtio_pci_device
36 struct virtio_device vdev
;
37 struct pci_dev
*pci_dev
;
39 /* the IO mapping for the PCI config space */
42 /* a list of queues so we can dispatch IRQs */
44 struct list_head virtqueues
;
49 struct msix_entry
*msix_entries
;
50 /* Name strings for interrupts. This size should be enough,
51 * and I'm too lazy to allocate each name separately. */
52 char (*msix_names
)[256];
53 /* Number of available vectors */
54 unsigned msix_vectors
;
55 /* Vectors allocated, excluding per-vq vectors if any */
56 unsigned msix_used_vectors
;
57 /* Whether we have vector per vq */
61 /* Constants for MSI-X */
62 /* Use first vector for configuration changes, second and the rest for
63 * virtqueues Thus, we need at least 2 vectors for MSI. */
65 VP_MSIX_CONFIG_VECTOR
= 0,
66 VP_MSIX_VQ_VECTOR
= 1,
69 struct virtio_pci_vq_info
71 /* the actual virtqueue */
74 /* the number of entries in the queue */
77 /* the index of the queue */
80 /* the virtual address of the ring queue */
83 /* the list node for the virtqueues list */
84 struct list_head node
;
86 /* MSI-X vector (or none) */
90 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
91 static struct pci_device_id virtio_pci_id_table
[] = {
92 { 0x1af4, PCI_ANY_ID
, PCI_ANY_ID
, PCI_ANY_ID
, 0, 0, 0 },
96 MODULE_DEVICE_TABLE(pci
, virtio_pci_id_table
);
98 /* Convert a generic virtio device to our structure */
99 static struct virtio_pci_device
*to_vp_device(struct virtio_device
*vdev
)
101 return container_of(vdev
, struct virtio_pci_device
, vdev
);
104 /* virtio config->get_features() implementation */
105 static u32
vp_get_features(struct virtio_device
*vdev
)
107 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
109 /* When someone needs more than 32 feature bits, we'll need to
110 * steal a bit to indicate that the rest are somewhere else. */
111 return ioread32(vp_dev
->ioaddr
+ VIRTIO_PCI_HOST_FEATURES
);
114 /* virtio config->finalize_features() implementation */
115 static void vp_finalize_features(struct virtio_device
*vdev
)
117 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
119 /* Give virtio_ring a chance to accept features. */
120 vring_transport_features(vdev
);
122 /* We only support 32 feature bits. */
123 BUILD_BUG_ON(ARRAY_SIZE(vdev
->features
) != 1);
124 iowrite32(vdev
->features
[0], vp_dev
->ioaddr
+VIRTIO_PCI_GUEST_FEATURES
);
127 /* virtio config->get() implementation */
128 static void vp_get(struct virtio_device
*vdev
, unsigned offset
,
129 void *buf
, unsigned len
)
131 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
132 void __iomem
*ioaddr
= vp_dev
->ioaddr
+
133 VIRTIO_PCI_CONFIG(vp_dev
) + offset
;
137 for (i
= 0; i
< len
; i
++)
138 ptr
[i
] = ioread8(ioaddr
+ i
);
141 /* the config->set() implementation. it's symmetric to the config->get()
143 static void vp_set(struct virtio_device
*vdev
, unsigned offset
,
144 const void *buf
, unsigned len
)
146 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
147 void __iomem
*ioaddr
= vp_dev
->ioaddr
+
148 VIRTIO_PCI_CONFIG(vp_dev
) + offset
;
152 for (i
= 0; i
< len
; i
++)
153 iowrite8(ptr
[i
], ioaddr
+ i
);
156 /* config->{get,set}_status() implementations */
157 static u8
vp_get_status(struct virtio_device
*vdev
)
159 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
160 return ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
163 static void vp_set_status(struct virtio_device
*vdev
, u8 status
)
165 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
166 /* We should never be setting status to 0. */
168 iowrite8(status
, vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
171 static void vp_reset(struct virtio_device
*vdev
)
173 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
174 /* 0 status means a reset. */
175 iowrite8(0, vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
178 /* the notify function used when creating a virt queue */
179 static void vp_notify(struct virtqueue
*vq
)
181 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
182 struct virtio_pci_vq_info
*info
= vq
->priv
;
184 /* we write the queue's selector into the notification register to
185 * signal the other end */
186 iowrite16(info
->queue_index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_NOTIFY
);
189 /* Handle a configuration change: Tell driver if it wants to know. */
190 static irqreturn_t
vp_config_changed(int irq
, void *opaque
)
192 struct virtio_pci_device
*vp_dev
= opaque
;
193 struct virtio_driver
*drv
;
194 drv
= container_of(vp_dev
->vdev
.dev
.driver
,
195 struct virtio_driver
, driver
);
197 if (drv
&& drv
->config_changed
)
198 drv
->config_changed(&vp_dev
->vdev
);
202 /* Notify all virtqueues on an interrupt. */
203 static irqreturn_t
vp_vring_interrupt(int irq
, void *opaque
)
205 struct virtio_pci_device
*vp_dev
= opaque
;
206 struct virtio_pci_vq_info
*info
;
207 irqreturn_t ret
= IRQ_NONE
;
210 spin_lock_irqsave(&vp_dev
->lock
, flags
);
211 list_for_each_entry(info
, &vp_dev
->virtqueues
, node
) {
212 if (vring_interrupt(irq
, info
->vq
) == IRQ_HANDLED
)
215 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
220 /* A small wrapper to also acknowledge the interrupt when it's handled.
221 * I really need an EIO hook for the vring so I can ack the interrupt once we
222 * know that we'll be handling the IRQ but before we invoke the callback since
223 * the callback may notify the host which results in the host attempting to
224 * raise an interrupt that we would then mask once we acknowledged the
226 static irqreturn_t
vp_interrupt(int irq
, void *opaque
)
228 struct virtio_pci_device
*vp_dev
= opaque
;
231 /* reading the ISR has the effect of also clearing it so it's very
232 * important to save off the value. */
233 isr
= ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_ISR
);
235 /* It's definitely not us if the ISR was not high */
239 /* Configuration change? Tell driver if it wants to know. */
240 if (isr
& VIRTIO_PCI_ISR_CONFIG
)
241 vp_config_changed(irq
, opaque
);
243 return vp_vring_interrupt(irq
, opaque
);
246 static void vp_free_vectors(struct virtio_device
*vdev
)
248 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
251 if (vp_dev
->intx_enabled
) {
252 free_irq(vp_dev
->pci_dev
->irq
, vp_dev
);
253 vp_dev
->intx_enabled
= 0;
256 for (i
= 0; i
< vp_dev
->msix_used_vectors
; ++i
)
257 free_irq(vp_dev
->msix_entries
[i
].vector
, vp_dev
);
259 if (vp_dev
->msix_enabled
) {
260 /* Disable the vector used for configuration */
261 iowrite16(VIRTIO_MSI_NO_VECTOR
,
262 vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
263 /* Flush the write out to device */
264 ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
266 pci_disable_msix(vp_dev
->pci_dev
);
267 vp_dev
->msix_enabled
= 0;
268 vp_dev
->msix_vectors
= 0;
271 vp_dev
->msix_used_vectors
= 0;
272 kfree(vp_dev
->msix_names
);
273 vp_dev
->msix_names
= NULL
;
274 kfree(vp_dev
->msix_entries
);
275 vp_dev
->msix_entries
= NULL
;
278 static int vp_request_msix_vectors(struct virtio_device
*vdev
, int nvectors
,
281 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
282 const char *name
= dev_name(&vp_dev
->vdev
.dev
);
286 vp_dev
->msix_entries
= kmalloc(nvectors
* sizeof *vp_dev
->msix_entries
,
288 if (!vp_dev
->msix_entries
)
290 vp_dev
->msix_names
= kmalloc(nvectors
* sizeof *vp_dev
->msix_names
,
292 if (!vp_dev
->msix_names
)
295 for (i
= 0; i
< nvectors
; ++i
)
296 vp_dev
->msix_entries
[i
].entry
= i
;
298 /* pci_enable_msix returns positive if we can't get this many. */
299 err
= pci_enable_msix(vp_dev
->pci_dev
, vp_dev
->msix_entries
, nvectors
);
304 vp_dev
->msix_vectors
= nvectors
;
305 vp_dev
->msix_enabled
= 1;
307 /* Set the vector used for configuration */
308 v
= vp_dev
->msix_used_vectors
;
309 snprintf(vp_dev
->msix_names
[v
], sizeof *vp_dev
->msix_names
,
311 err
= request_irq(vp_dev
->msix_entries
[v
].vector
,
312 vp_config_changed
, 0, vp_dev
->msix_names
[v
],
316 ++vp_dev
->msix_used_vectors
;
318 iowrite16(v
, vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
319 /* Verify we had enough resources to assign the vector */
320 v
= ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
321 if (v
== VIRTIO_MSI_NO_VECTOR
) {
326 if (!per_vq_vectors
) {
327 /* Shared vector for all VQs */
328 v
= vp_dev
->msix_used_vectors
;
329 snprintf(vp_dev
->msix_names
[v
], sizeof *vp_dev
->msix_names
,
330 "%s-virtqueues", name
);
331 err
= request_irq(vp_dev
->msix_entries
[v
].vector
,
332 vp_vring_interrupt
, 0, vp_dev
->msix_names
[v
],
336 ++vp_dev
->msix_used_vectors
;
340 vp_free_vectors(vdev
);
344 static int vp_request_intx(struct virtio_device
*vdev
)
347 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
349 err
= request_irq(vp_dev
->pci_dev
->irq
, vp_interrupt
,
350 IRQF_SHARED
, dev_name(&vdev
->dev
), vp_dev
);
352 vp_dev
->intx_enabled
= 1;
356 static struct virtqueue
*setup_vq(struct virtio_device
*vdev
, unsigned index
,
357 void (*callback
)(struct virtqueue
*vq
),
361 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
362 struct virtio_pci_vq_info
*info
;
363 struct virtqueue
*vq
;
364 unsigned long flags
, size
;
368 /* Select the queue we're interested in */
369 iowrite16(index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_SEL
);
371 /* Check if queue is either not available or already active. */
372 num
= ioread16(vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_NUM
);
373 if (!num
|| ioread32(vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
))
374 return ERR_PTR(-ENOENT
);
376 /* allocate and fill out our structure the represents an active
378 info
= kmalloc(sizeof(struct virtio_pci_vq_info
), GFP_KERNEL
);
380 return ERR_PTR(-ENOMEM
);
382 info
->queue_index
= index
;
384 info
->msix_vector
= msix_vec
;
386 size
= PAGE_ALIGN(vring_size(num
, VIRTIO_PCI_VRING_ALIGN
));
387 info
->queue
= alloc_pages_exact(size
, GFP_KERNEL
|__GFP_ZERO
);
388 if (info
->queue
== NULL
) {
393 /* activate the queue */
394 iowrite32(virt_to_phys(info
->queue
) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT
,
395 vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
397 /* create the vring */
398 vq
= vring_new_virtqueue(info
->num
, VIRTIO_PCI_VRING_ALIGN
,
399 vdev
, info
->queue
, vp_notify
, callback
, name
);
402 goto out_activate_queue
;
408 if (msix_vec
!= VIRTIO_MSI_NO_VECTOR
) {
409 iowrite16(msix_vec
, vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
410 msix_vec
= ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
411 if (msix_vec
== VIRTIO_MSI_NO_VECTOR
) {
417 spin_lock_irqsave(&vp_dev
->lock
, flags
);
418 list_add(&info
->node
, &vp_dev
->virtqueues
);
419 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
424 vring_del_virtqueue(vq
);
426 iowrite32(0, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
427 free_pages_exact(info
->queue
, size
);
433 static void vp_del_vq(struct virtqueue
*vq
)
435 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
436 struct virtio_pci_vq_info
*info
= vq
->priv
;
437 unsigned long flags
, size
;
439 spin_lock_irqsave(&vp_dev
->lock
, flags
);
440 list_del(&info
->node
);
441 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
443 iowrite16(info
->queue_index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_SEL
);
445 if (vp_dev
->msix_enabled
) {
446 iowrite16(VIRTIO_MSI_NO_VECTOR
,
447 vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
448 /* Flush the write out to device */
449 ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_ISR
);
452 vring_del_virtqueue(vq
);
454 /* Select and deactivate the queue */
455 iowrite32(0, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
457 size
= PAGE_ALIGN(vring_size(info
->num
, VIRTIO_PCI_VRING_ALIGN
));
458 free_pages_exact(info
->queue
, size
);
462 /* the config->del_vqs() implementation */
463 static void vp_del_vqs(struct virtio_device
*vdev
)
465 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
466 struct virtqueue
*vq
, *n
;
467 struct virtio_pci_vq_info
*info
;
469 list_for_each_entry_safe(vq
, n
, &vdev
->vqs
, list
) {
471 if (vp_dev
->per_vq_vectors
&&
472 info
->msix_vector
!= VIRTIO_MSI_NO_VECTOR
)
473 free_irq(vp_dev
->msix_entries
[info
->msix_vector
].vector
,
477 vp_dev
->per_vq_vectors
= false;
479 vp_free_vectors(vdev
);
482 static int vp_try_to_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
483 struct virtqueue
*vqs
[],
484 vq_callback_t
*callbacks
[],
489 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
491 int i
, err
, nvectors
, allocated_vectors
;
494 /* Old style: one normal interrupt for change and all vqs. */
495 err
= vp_request_intx(vdev
);
499 if (per_vq_vectors
) {
500 /* Best option: one for change interrupt, one per vq. */
502 for (i
= 0; i
< nvqs
; ++i
)
506 /* Second best: one for change, shared for all vqs. */
510 err
= vp_request_msix_vectors(vdev
, nvectors
, per_vq_vectors
);
515 vp_dev
->per_vq_vectors
= per_vq_vectors
;
516 allocated_vectors
= vp_dev
->msix_used_vectors
;
517 for (i
= 0; i
< nvqs
; ++i
) {
518 if (!callbacks
[i
] || !vp_dev
->msix_enabled
)
519 msix_vec
= VIRTIO_MSI_NO_VECTOR
;
520 else if (vp_dev
->per_vq_vectors
)
521 msix_vec
= allocated_vectors
++;
523 msix_vec
= VP_MSIX_VQ_VECTOR
;
524 vqs
[i
] = setup_vq(vdev
, i
, callbacks
[i
], names
[i
], msix_vec
);
525 if (IS_ERR(vqs
[i
])) {
526 err
= PTR_ERR(vqs
[i
]);
530 if (!vp_dev
->per_vq_vectors
|| msix_vec
== VIRTIO_MSI_NO_VECTOR
)
533 /* allocate per-vq irq if available and necessary */
534 snprintf(vp_dev
->msix_names
[msix_vec
],
535 sizeof *vp_dev
->msix_names
,
537 dev_name(&vp_dev
->vdev
.dev
), names
[i
]);
538 err
= request_irq(vp_dev
->msix_entries
[msix_vec
].vector
,
540 vp_dev
->msix_names
[msix_vec
],
556 /* the config->find_vqs() implementation */
557 static int vp_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
558 struct virtqueue
*vqs
[],
559 vq_callback_t
*callbacks
[],
564 /* Try MSI-X with one vector per queue. */
565 err
= vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
, true, true);
568 /* Fallback: MSI-X with one vector for config, one shared for queues. */
569 err
= vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
,
573 /* Finally fall back to regular interrupts. */
574 return vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
,
578 static struct virtio_config_ops virtio_pci_config_ops
= {
581 .get_status
= vp_get_status
,
582 .set_status
= vp_set_status
,
584 .find_vqs
= vp_find_vqs
,
585 .del_vqs
= vp_del_vqs
,
586 .get_features
= vp_get_features
,
587 .finalize_features
= vp_finalize_features
,
590 static void virtio_pci_release_dev(struct device
*_d
)
592 struct virtio_device
*dev
= container_of(_d
, struct virtio_device
, dev
);
593 struct virtio_pci_device
*vp_dev
= to_vp_device(dev
);
594 struct pci_dev
*pci_dev
= vp_dev
->pci_dev
;
597 pci_set_drvdata(pci_dev
, NULL
);
598 pci_iounmap(pci_dev
, vp_dev
->ioaddr
);
599 pci_release_regions(pci_dev
);
600 pci_disable_device(pci_dev
);
604 /* the PCI probing function */
605 static int __devinit
virtio_pci_probe(struct pci_dev
*pci_dev
,
606 const struct pci_device_id
*id
)
608 struct virtio_pci_device
*vp_dev
;
611 /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
612 if (pci_dev
->device
< 0x1000 || pci_dev
->device
> 0x103f)
615 if (pci_dev
->revision
!= VIRTIO_PCI_ABI_VERSION
) {
616 printk(KERN_ERR
"virtio_pci: expected ABI version %d, got %d\n",
617 VIRTIO_PCI_ABI_VERSION
, pci_dev
->revision
);
621 /* allocate our structure and fill it out */
622 vp_dev
= kzalloc(sizeof(struct virtio_pci_device
), GFP_KERNEL
);
626 vp_dev
->vdev
.dev
.parent
= &pci_dev
->dev
;
627 vp_dev
->vdev
.dev
.release
= virtio_pci_release_dev
;
628 vp_dev
->vdev
.config
= &virtio_pci_config_ops
;
629 vp_dev
->pci_dev
= pci_dev
;
630 INIT_LIST_HEAD(&vp_dev
->virtqueues
);
631 spin_lock_init(&vp_dev
->lock
);
633 /* enable the device */
634 err
= pci_enable_device(pci_dev
);
638 err
= pci_request_regions(pci_dev
, "virtio-pci");
640 goto out_enable_device
;
642 vp_dev
->ioaddr
= pci_iomap(pci_dev
, 0, 0);
643 if (vp_dev
->ioaddr
== NULL
)
644 goto out_req_regions
;
646 pci_set_drvdata(pci_dev
, vp_dev
);
648 /* we use the subsystem vendor/device id as the virtio vendor/device
649 * id. this allows us to use the same PCI vendor/device id for all
650 * virtio devices and to identify the particular virtio driver by
651 * the subsytem ids */
652 vp_dev
->vdev
.id
.vendor
= pci_dev
->subsystem_vendor
;
653 vp_dev
->vdev
.id
.device
= pci_dev
->subsystem_device
;
655 /* finally register the virtio device */
656 err
= register_virtio_device(&vp_dev
->vdev
);
658 goto out_set_drvdata
;
663 pci_set_drvdata(pci_dev
, NULL
);
664 pci_iounmap(pci_dev
, vp_dev
->ioaddr
);
666 pci_release_regions(pci_dev
);
668 pci_disable_device(pci_dev
);
674 static void __devexit
virtio_pci_remove(struct pci_dev
*pci_dev
)
676 struct virtio_pci_device
*vp_dev
= pci_get_drvdata(pci_dev
);
678 unregister_virtio_device(&vp_dev
->vdev
);
682 static int virtio_pci_suspend(struct pci_dev
*pci_dev
, pm_message_t state
)
684 pci_save_state(pci_dev
);
685 pci_set_power_state(pci_dev
, PCI_D3hot
);
689 static int virtio_pci_resume(struct pci_dev
*pci_dev
)
691 pci_restore_state(pci_dev
);
692 pci_set_power_state(pci_dev
, PCI_D0
);
697 static struct pci_driver virtio_pci_driver
= {
698 .name
= "virtio-pci",
699 .id_table
= virtio_pci_id_table
,
700 .probe
= virtio_pci_probe
,
701 .remove
= virtio_pci_remove
,
703 .suspend
= virtio_pci_suspend
,
704 .resume
= virtio_pci_resume
,
708 static int __init
virtio_pci_init(void)
710 return pci_register_driver(&virtio_pci_driver
);
713 module_init(virtio_pci_init
);
715 static void __exit
virtio_pci_exit(void)
717 pci_unregister_driver(&virtio_pci_driver
);
720 module_exit(virtio_pci_exit
);