4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
7 * Copyright IBM Corp. 2007
10 * Anthony Liguori <aliguori@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <linux/module.h>
18 #include <linux/list.h>
19 #include <linux/pci.h>
20 #include <linux/interrupt.h>
21 #include <linux/virtio.h>
22 #include <linux/virtio_config.h>
23 #include <linux/virtio_ring.h>
24 #include <linux/virtio_pci.h>
25 #include <linux/highmem.h>
26 #include <linux/spinlock.h>
28 MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
29 MODULE_DESCRIPTION("virtio-pci");
30 MODULE_LICENSE("GPL");
33 /* Our device structure */
34 struct virtio_pci_device
36 struct virtio_device vdev
;
37 struct pci_dev
*pci_dev
;
39 /* the IO mapping for the PCI config space */
42 /* a list of queues so we can dispatch IRQs */
44 struct list_head virtqueues
;
49 struct msix_entry
*msix_entries
;
50 /* Name strings for interrupts. This size should be enough,
51 * and I'm too lazy to allocate each name separately. */
52 char (*msix_names
)[256];
53 /* Number of available vectors */
54 unsigned msix_vectors
;
55 /* Vectors allocated, excluding per-vq vectors if any */
56 unsigned msix_used_vectors
;
57 /* Whether we have vector per vq */
61 /* Constants for MSI-X */
62 /* Use first vector for configuration changes, second and the rest for
63 * virtqueues Thus, we need at least 2 vectors for MSI. */
65 VP_MSIX_CONFIG_VECTOR
= 0,
66 VP_MSIX_VQ_VECTOR
= 1,
69 struct virtio_pci_vq_info
71 /* the actual virtqueue */
74 /* the number of entries in the queue */
77 /* the index of the queue */
80 /* the virtual address of the ring queue */
83 /* the list node for the virtqueues list */
84 struct list_head node
;
86 /* MSI-X vector (or none) */
90 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
91 static struct pci_device_id virtio_pci_id_table
[] = {
92 { 0x1af4, PCI_ANY_ID
, PCI_ANY_ID
, PCI_ANY_ID
, 0, 0, 0 },
96 MODULE_DEVICE_TABLE(pci
, virtio_pci_id_table
);
98 /* A PCI device has it's own struct device and so does a virtio device so
99 * we create a place for the virtio devices to show up in sysfs. I think it
100 * would make more sense for virtio to not insist on having it's own device. */
101 static struct device
*virtio_pci_root
;
103 /* Convert a generic virtio device to our structure */
104 static struct virtio_pci_device
*to_vp_device(struct virtio_device
*vdev
)
106 return container_of(vdev
, struct virtio_pci_device
, vdev
);
109 /* virtio config->get_features() implementation */
110 static u32
vp_get_features(struct virtio_device
*vdev
)
112 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
114 /* When someone needs more than 32 feature bits, we'll need to
115 * steal a bit to indicate that the rest are somewhere else. */
116 return ioread32(vp_dev
->ioaddr
+ VIRTIO_PCI_HOST_FEATURES
);
119 /* virtio config->finalize_features() implementation */
120 static void vp_finalize_features(struct virtio_device
*vdev
)
122 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
124 /* Give virtio_ring a chance to accept features. */
125 vring_transport_features(vdev
);
127 /* We only support 32 feature bits. */
128 BUILD_BUG_ON(ARRAY_SIZE(vdev
->features
) != 1);
129 iowrite32(vdev
->features
[0], vp_dev
->ioaddr
+VIRTIO_PCI_GUEST_FEATURES
);
132 /* virtio config->get() implementation */
133 static void vp_get(struct virtio_device
*vdev
, unsigned offset
,
134 void *buf
, unsigned len
)
136 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
137 void __iomem
*ioaddr
= vp_dev
->ioaddr
+
138 VIRTIO_PCI_CONFIG(vp_dev
) + offset
;
142 for (i
= 0; i
< len
; i
++)
143 ptr
[i
] = ioread8(ioaddr
+ i
);
146 /* the config->set() implementation. it's symmetric to the config->get()
148 static void vp_set(struct virtio_device
*vdev
, unsigned offset
,
149 const void *buf
, unsigned len
)
151 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
152 void __iomem
*ioaddr
= vp_dev
->ioaddr
+
153 VIRTIO_PCI_CONFIG(vp_dev
) + offset
;
157 for (i
= 0; i
< len
; i
++)
158 iowrite8(ptr
[i
], ioaddr
+ i
);
161 /* config->{get,set}_status() implementations */
162 static u8
vp_get_status(struct virtio_device
*vdev
)
164 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
165 return ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
168 static void vp_set_status(struct virtio_device
*vdev
, u8 status
)
170 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
171 /* We should never be setting status to 0. */
173 iowrite8(status
, vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
176 static void vp_reset(struct virtio_device
*vdev
)
178 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
179 /* 0 status means a reset. */
180 iowrite8(0, vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
183 /* the notify function used when creating a virt queue */
184 static void vp_notify(struct virtqueue
*vq
)
186 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
187 struct virtio_pci_vq_info
*info
= vq
->priv
;
189 /* we write the queue's selector into the notification register to
190 * signal the other end */
191 iowrite16(info
->queue_index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_NOTIFY
);
194 /* Handle a configuration change: Tell driver if it wants to know. */
195 static irqreturn_t
vp_config_changed(int irq
, void *opaque
)
197 struct virtio_pci_device
*vp_dev
= opaque
;
198 struct virtio_driver
*drv
;
199 drv
= container_of(vp_dev
->vdev
.dev
.driver
,
200 struct virtio_driver
, driver
);
202 if (drv
&& drv
->config_changed
)
203 drv
->config_changed(&vp_dev
->vdev
);
207 /* Notify all virtqueues on an interrupt. */
208 static irqreturn_t
vp_vring_interrupt(int irq
, void *opaque
)
210 struct virtio_pci_device
*vp_dev
= opaque
;
211 struct virtio_pci_vq_info
*info
;
212 irqreturn_t ret
= IRQ_NONE
;
215 spin_lock_irqsave(&vp_dev
->lock
, flags
);
216 list_for_each_entry(info
, &vp_dev
->virtqueues
, node
) {
217 if (vring_interrupt(irq
, info
->vq
) == IRQ_HANDLED
)
220 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
225 /* A small wrapper to also acknowledge the interrupt when it's handled.
226 * I really need an EIO hook for the vring so I can ack the interrupt once we
227 * know that we'll be handling the IRQ but before we invoke the callback since
228 * the callback may notify the host which results in the host attempting to
229 * raise an interrupt that we would then mask once we acknowledged the
231 static irqreturn_t
vp_interrupt(int irq
, void *opaque
)
233 struct virtio_pci_device
*vp_dev
= opaque
;
236 /* reading the ISR has the effect of also clearing it so it's very
237 * important to save off the value. */
238 isr
= ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_ISR
);
240 /* It's definitely not us if the ISR was not high */
244 /* Configuration change? Tell driver if it wants to know. */
245 if (isr
& VIRTIO_PCI_ISR_CONFIG
)
246 vp_config_changed(irq
, opaque
);
248 return vp_vring_interrupt(irq
, opaque
);
251 static void vp_free_vectors(struct virtio_device
*vdev
)
253 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
256 if (vp_dev
->intx_enabled
) {
257 free_irq(vp_dev
->pci_dev
->irq
, vp_dev
);
258 vp_dev
->intx_enabled
= 0;
261 for (i
= 0; i
< vp_dev
->msix_used_vectors
; ++i
)
262 free_irq(vp_dev
->msix_entries
[i
].vector
, vp_dev
);
264 if (vp_dev
->msix_enabled
) {
265 /* Disable the vector used for configuration */
266 iowrite16(VIRTIO_MSI_NO_VECTOR
,
267 vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
268 /* Flush the write out to device */
269 ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
271 pci_disable_msix(vp_dev
->pci_dev
);
272 vp_dev
->msix_enabled
= 0;
273 vp_dev
->msix_vectors
= 0;
276 vp_dev
->msix_used_vectors
= 0;
277 kfree(vp_dev
->msix_names
);
278 vp_dev
->msix_names
= NULL
;
279 kfree(vp_dev
->msix_entries
);
280 vp_dev
->msix_entries
= NULL
;
283 static int vp_request_msix_vectors(struct virtio_device
*vdev
, int nvectors
,
286 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
287 const char *name
= dev_name(&vp_dev
->vdev
.dev
);
291 vp_dev
->msix_entries
= kmalloc(nvectors
* sizeof *vp_dev
->msix_entries
,
293 if (!vp_dev
->msix_entries
)
295 vp_dev
->msix_names
= kmalloc(nvectors
* sizeof *vp_dev
->msix_names
,
297 if (!vp_dev
->msix_names
)
300 for (i
= 0; i
< nvectors
; ++i
)
301 vp_dev
->msix_entries
[i
].entry
= i
;
303 /* pci_enable_msix returns positive if we can't get this many. */
304 err
= pci_enable_msix(vp_dev
->pci_dev
, vp_dev
->msix_entries
, nvectors
);
309 vp_dev
->msix_vectors
= nvectors
;
310 vp_dev
->msix_enabled
= 1;
312 /* Set the vector used for configuration */
313 v
= vp_dev
->msix_used_vectors
;
314 snprintf(vp_dev
->msix_names
[v
], sizeof *vp_dev
->msix_names
,
316 err
= request_irq(vp_dev
->msix_entries
[v
].vector
,
317 vp_config_changed
, 0, vp_dev
->msix_names
[v
],
321 ++vp_dev
->msix_used_vectors
;
323 iowrite16(v
, vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
324 /* Verify we had enough resources to assign the vector */
325 v
= ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
326 if (v
== VIRTIO_MSI_NO_VECTOR
) {
331 if (!per_vq_vectors
) {
332 /* Shared vector for all VQs */
333 v
= vp_dev
->msix_used_vectors
;
334 snprintf(vp_dev
->msix_names
[v
], sizeof *vp_dev
->msix_names
,
335 "%s-virtqueues", name
);
336 err
= request_irq(vp_dev
->msix_entries
[v
].vector
,
337 vp_vring_interrupt
, 0, vp_dev
->msix_names
[v
],
341 ++vp_dev
->msix_used_vectors
;
345 vp_free_vectors(vdev
);
349 static int vp_request_intx(struct virtio_device
*vdev
)
352 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
354 err
= request_irq(vp_dev
->pci_dev
->irq
, vp_interrupt
,
355 IRQF_SHARED
, dev_name(&vdev
->dev
), vp_dev
);
357 vp_dev
->intx_enabled
= 1;
361 static struct virtqueue
*setup_vq(struct virtio_device
*vdev
, unsigned index
,
362 void (*callback
)(struct virtqueue
*vq
),
366 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
367 struct virtio_pci_vq_info
*info
;
368 struct virtqueue
*vq
;
369 unsigned long flags
, size
;
373 /* Select the queue we're interested in */
374 iowrite16(index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_SEL
);
376 /* Check if queue is either not available or already active. */
377 num
= ioread16(vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_NUM
);
378 if (!num
|| ioread32(vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
))
379 return ERR_PTR(-ENOENT
);
381 /* allocate and fill out our structure the represents an active
383 info
= kmalloc(sizeof(struct virtio_pci_vq_info
), GFP_KERNEL
);
385 return ERR_PTR(-ENOMEM
);
387 info
->queue_index
= index
;
389 info
->msix_vector
= msix_vec
;
391 size
= PAGE_ALIGN(vring_size(num
, VIRTIO_PCI_VRING_ALIGN
));
392 info
->queue
= alloc_pages_exact(size
, GFP_KERNEL
|__GFP_ZERO
);
393 if (info
->queue
== NULL
) {
398 /* activate the queue */
399 iowrite32(virt_to_phys(info
->queue
) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT
,
400 vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
402 /* create the vring */
403 vq
= vring_new_virtqueue(info
->num
, VIRTIO_PCI_VRING_ALIGN
,
404 vdev
, info
->queue
, vp_notify
, callback
, name
);
407 goto out_activate_queue
;
413 if (msix_vec
!= VIRTIO_MSI_NO_VECTOR
) {
414 iowrite16(msix_vec
, vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
415 msix_vec
= ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
416 if (msix_vec
== VIRTIO_MSI_NO_VECTOR
) {
422 spin_lock_irqsave(&vp_dev
->lock
, flags
);
423 list_add(&info
->node
, &vp_dev
->virtqueues
);
424 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
429 vring_del_virtqueue(vq
);
431 iowrite32(0, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
432 free_pages_exact(info
->queue
, size
);
438 static void vp_del_vq(struct virtqueue
*vq
)
440 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
441 struct virtio_pci_vq_info
*info
= vq
->priv
;
442 unsigned long flags
, size
;
444 spin_lock_irqsave(&vp_dev
->lock
, flags
);
445 list_del(&info
->node
);
446 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
448 iowrite16(info
->queue_index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_SEL
);
450 if (vp_dev
->msix_enabled
) {
451 iowrite16(VIRTIO_MSI_NO_VECTOR
,
452 vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
453 /* Flush the write out to device */
454 ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_ISR
);
457 vring_del_virtqueue(vq
);
459 /* Select and deactivate the queue */
460 iowrite32(0, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
462 size
= PAGE_ALIGN(vring_size(info
->num
, VIRTIO_PCI_VRING_ALIGN
));
463 free_pages_exact(info
->queue
, size
);
467 /* the config->del_vqs() implementation */
468 static void vp_del_vqs(struct virtio_device
*vdev
)
470 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
471 struct virtqueue
*vq
, *n
;
472 struct virtio_pci_vq_info
*info
;
474 list_for_each_entry_safe(vq
, n
, &vdev
->vqs
, list
) {
476 if (vp_dev
->per_vq_vectors
)
477 free_irq(vp_dev
->msix_entries
[info
->msix_vector
].vector
,
481 vp_dev
->per_vq_vectors
= false;
483 vp_free_vectors(vdev
);
486 static int vp_try_to_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
487 struct virtqueue
*vqs
[],
488 vq_callback_t
*callbacks
[],
493 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
495 int i
, err
, nvectors
, allocated_vectors
;
498 /* Old style: one normal interrupt for change and all vqs. */
499 err
= vp_request_intx(vdev
);
503 if (per_vq_vectors
) {
504 /* Best option: one for change interrupt, one per vq. */
506 for (i
= 0; i
< nvqs
; ++i
)
510 /* Second best: one for change, shared for all vqs. */
514 err
= vp_request_msix_vectors(vdev
, nvectors
, per_vq_vectors
);
519 vp_dev
->per_vq_vectors
= per_vq_vectors
;
520 allocated_vectors
= vp_dev
->msix_used_vectors
;
521 for (i
= 0; i
< nvqs
; ++i
) {
522 if (!callbacks
[i
] || !vp_dev
->msix_enabled
)
523 msix_vec
= VIRTIO_MSI_NO_VECTOR
;
524 else if (vp_dev
->per_vq_vectors
)
525 msix_vec
= allocated_vectors
++;
527 msix_vec
= VP_MSIX_VQ_VECTOR
;
528 vqs
[i
] = setup_vq(vdev
, i
, callbacks
[i
], names
[i
], msix_vec
);
529 if (IS_ERR(vqs
[i
])) {
530 err
= PTR_ERR(vqs
[i
]);
534 if (!vp_dev
->per_vq_vectors
|| msix_vec
== VIRTIO_MSI_NO_VECTOR
)
537 /* allocate per-vq irq if available and necessary */
538 snprintf(vp_dev
->msix_names
[msix_vec
],
539 sizeof *vp_dev
->msix_names
,
541 dev_name(&vp_dev
->vdev
.dev
), names
[i
]);
542 err
= request_irq(vp_dev
->msix_entries
[msix_vec
].vector
,
544 vp_dev
->msix_names
[msix_vec
],
560 /* the config->find_vqs() implementation */
561 static int vp_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
562 struct virtqueue
*vqs
[],
563 vq_callback_t
*callbacks
[],
568 /* Try MSI-X with one vector per queue. */
569 err
= vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
, true, true);
572 /* Fallback: MSI-X with one vector for config, one shared for queues. */
573 err
= vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
,
577 /* Finally fall back to regular interrupts. */
578 return vp_try_to_find_vqs(vdev
, nvqs
, vqs
, callbacks
, names
,
582 static struct virtio_config_ops virtio_pci_config_ops
= {
585 .get_status
= vp_get_status
,
586 .set_status
= vp_set_status
,
588 .find_vqs
= vp_find_vqs
,
589 .del_vqs
= vp_del_vqs
,
590 .get_features
= vp_get_features
,
591 .finalize_features
= vp_finalize_features
,
594 static void virtio_pci_release_dev(struct device
*_d
)
596 struct virtio_device
*dev
= container_of(_d
, struct virtio_device
, dev
);
597 struct virtio_pci_device
*vp_dev
= to_vp_device(dev
);
598 struct pci_dev
*pci_dev
= vp_dev
->pci_dev
;
601 pci_set_drvdata(pci_dev
, NULL
);
602 pci_iounmap(pci_dev
, vp_dev
->ioaddr
);
603 pci_release_regions(pci_dev
);
604 pci_disable_device(pci_dev
);
608 /* the PCI probing function */
609 static int __devinit
virtio_pci_probe(struct pci_dev
*pci_dev
,
610 const struct pci_device_id
*id
)
612 struct virtio_pci_device
*vp_dev
;
615 /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
616 if (pci_dev
->device
< 0x1000 || pci_dev
->device
> 0x103f)
619 if (pci_dev
->revision
!= VIRTIO_PCI_ABI_VERSION
) {
620 printk(KERN_ERR
"virtio_pci: expected ABI version %d, got %d\n",
621 VIRTIO_PCI_ABI_VERSION
, pci_dev
->revision
);
625 /* allocate our structure and fill it out */
626 vp_dev
= kzalloc(sizeof(struct virtio_pci_device
), GFP_KERNEL
);
630 vp_dev
->vdev
.dev
.parent
= virtio_pci_root
;
631 vp_dev
->vdev
.dev
.release
= virtio_pci_release_dev
;
632 vp_dev
->vdev
.config
= &virtio_pci_config_ops
;
633 vp_dev
->pci_dev
= pci_dev
;
634 INIT_LIST_HEAD(&vp_dev
->virtqueues
);
635 spin_lock_init(&vp_dev
->lock
);
637 /* enable the device */
638 err
= pci_enable_device(pci_dev
);
642 err
= pci_request_regions(pci_dev
, "virtio-pci");
644 goto out_enable_device
;
646 vp_dev
->ioaddr
= pci_iomap(pci_dev
, 0, 0);
647 if (vp_dev
->ioaddr
== NULL
)
648 goto out_req_regions
;
650 pci_set_drvdata(pci_dev
, vp_dev
);
652 /* we use the subsystem vendor/device id as the virtio vendor/device
653 * id. this allows us to use the same PCI vendor/device id for all
654 * virtio devices and to identify the particular virtio driver by
655 * the subsytem ids */
656 vp_dev
->vdev
.id
.vendor
= pci_dev
->subsystem_vendor
;
657 vp_dev
->vdev
.id
.device
= pci_dev
->subsystem_device
;
659 /* finally register the virtio device */
660 err
= register_virtio_device(&vp_dev
->vdev
);
662 goto out_set_drvdata
;
667 pci_set_drvdata(pci_dev
, NULL
);
668 pci_iounmap(pci_dev
, vp_dev
->ioaddr
);
670 pci_release_regions(pci_dev
);
672 pci_disable_device(pci_dev
);
678 static void __devexit
virtio_pci_remove(struct pci_dev
*pci_dev
)
680 struct virtio_pci_device
*vp_dev
= pci_get_drvdata(pci_dev
);
682 unregister_virtio_device(&vp_dev
->vdev
);
686 static int virtio_pci_suspend(struct pci_dev
*pci_dev
, pm_message_t state
)
688 pci_save_state(pci_dev
);
689 pci_set_power_state(pci_dev
, PCI_D3hot
);
693 static int virtio_pci_resume(struct pci_dev
*pci_dev
)
695 pci_restore_state(pci_dev
);
696 pci_set_power_state(pci_dev
, PCI_D0
);
701 static struct pci_driver virtio_pci_driver
= {
702 .name
= "virtio-pci",
703 .id_table
= virtio_pci_id_table
,
704 .probe
= virtio_pci_probe
,
705 .remove
= virtio_pci_remove
,
707 .suspend
= virtio_pci_suspend
,
708 .resume
= virtio_pci_resume
,
712 static int __init
virtio_pci_init(void)
716 virtio_pci_root
= root_device_register("virtio-pci");
717 if (IS_ERR(virtio_pci_root
))
718 return PTR_ERR(virtio_pci_root
);
720 err
= pci_register_driver(&virtio_pci_driver
);
722 root_device_unregister(virtio_pci_root
);
727 module_init(virtio_pci_init
);
729 static void __exit
virtio_pci_exit(void)
731 pci_unregister_driver(&virtio_pci_driver
);
732 root_device_unregister(virtio_pci_root
);
735 module_exit(virtio_pci_exit
);