4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
7 * Copyright IBM Corp. 2007
10 * Anthony Liguori <aliguori@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <linux/module.h>
18 #include <linux/list.h>
19 #include <linux/pci.h>
20 #include <linux/interrupt.h>
21 #include <linux/virtio.h>
22 #include <linux/virtio_config.h>
23 #include <linux/virtio_ring.h>
24 #include <linux/virtio_pci.h>
25 #include <linux/highmem.h>
26 #include <linux/spinlock.h>
28 MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
29 MODULE_DESCRIPTION("virtio-pci");
30 MODULE_LICENSE("GPL");
33 /* Our device structure */
34 struct virtio_pci_device
36 struct virtio_device vdev
;
37 struct pci_dev
*pci_dev
;
39 /* the IO mapping for the PCI config space */
42 /* a list of queues so we can dispatch IRQs */
44 struct list_head virtqueues
;
49 struct msix_entry
*msix_entries
;
50 /* Name strings for interrupts. This size should be enough,
51 * and I'm too lazy to allocate each name separately. */
52 char (*msix_names
)[256];
53 /* Number of available vectors */
54 unsigned msix_vectors
;
55 /* Vectors allocated */
56 unsigned msix_used_vectors
;
59 /* Constants for MSI-X */
60 /* Use first vector for configuration changes, second and the rest for
61 * virtqueues Thus, we need at least 2 vectors for MSI. */
63 VP_MSIX_CONFIG_VECTOR
= 0,
64 VP_MSIX_VQ_VECTOR
= 1,
67 struct virtio_pci_vq_info
69 /* the actual virtqueue */
72 /* the number of entries in the queue */
75 /* the index of the queue */
78 /* the virtual address of the ring queue */
81 /* the list node for the virtqueues list */
82 struct list_head node
;
84 /* MSI-X vector (or none) */
88 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
89 static struct pci_device_id virtio_pci_id_table
[] = {
90 { 0x1af4, PCI_ANY_ID
, PCI_ANY_ID
, PCI_ANY_ID
, 0, 0, 0 },
94 MODULE_DEVICE_TABLE(pci
, virtio_pci_id_table
);
96 /* A PCI device has it's own struct device and so does a virtio device so
97 * we create a place for the virtio devices to show up in sysfs. I think it
98 * would make more sense for virtio to not insist on having it's own device. */
99 static struct device
*virtio_pci_root
;
101 /* Convert a generic virtio device to our structure */
102 static struct virtio_pci_device
*to_vp_device(struct virtio_device
*vdev
)
104 return container_of(vdev
, struct virtio_pci_device
, vdev
);
107 /* virtio config->get_features() implementation */
108 static u32
vp_get_features(struct virtio_device
*vdev
)
110 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
112 /* When someone needs more than 32 feature bits, we'll need to
113 * steal a bit to indicate that the rest are somewhere else. */
114 return ioread32(vp_dev
->ioaddr
+ VIRTIO_PCI_HOST_FEATURES
);
117 /* virtio config->finalize_features() implementation */
118 static void vp_finalize_features(struct virtio_device
*vdev
)
120 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
122 /* Give virtio_ring a chance to accept features. */
123 vring_transport_features(vdev
);
125 /* We only support 32 feature bits. */
126 BUILD_BUG_ON(ARRAY_SIZE(vdev
->features
) != 1);
127 iowrite32(vdev
->features
[0], vp_dev
->ioaddr
+VIRTIO_PCI_GUEST_FEATURES
);
130 /* virtio config->get() implementation */
131 static void vp_get(struct virtio_device
*vdev
, unsigned offset
,
132 void *buf
, unsigned len
)
134 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
135 void __iomem
*ioaddr
= vp_dev
->ioaddr
+
136 VIRTIO_PCI_CONFIG(vp_dev
) + offset
;
140 for (i
= 0; i
< len
; i
++)
141 ptr
[i
] = ioread8(ioaddr
+ i
);
144 /* the config->set() implementation. it's symmetric to the config->get()
146 static void vp_set(struct virtio_device
*vdev
, unsigned offset
,
147 const void *buf
, unsigned len
)
149 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
150 void __iomem
*ioaddr
= vp_dev
->ioaddr
+
151 VIRTIO_PCI_CONFIG(vp_dev
) + offset
;
155 for (i
= 0; i
< len
; i
++)
156 iowrite8(ptr
[i
], ioaddr
+ i
);
159 /* config->{get,set}_status() implementations */
160 static u8
vp_get_status(struct virtio_device
*vdev
)
162 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
163 return ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
166 static void vp_set_status(struct virtio_device
*vdev
, u8 status
)
168 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
169 /* We should never be setting status to 0. */
171 iowrite8(status
, vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
174 static void vp_reset(struct virtio_device
*vdev
)
176 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
177 /* 0 status means a reset. */
178 iowrite8(0, vp_dev
->ioaddr
+ VIRTIO_PCI_STATUS
);
181 /* the notify function used when creating a virt queue */
182 static void vp_notify(struct virtqueue
*vq
)
184 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
185 struct virtio_pci_vq_info
*info
= vq
->priv
;
187 /* we write the queue's selector into the notification register to
188 * signal the other end */
189 iowrite16(info
->queue_index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_NOTIFY
);
192 /* Handle a configuration change: Tell driver if it wants to know. */
193 static irqreturn_t
vp_config_changed(int irq
, void *opaque
)
195 struct virtio_pci_device
*vp_dev
= opaque
;
196 struct virtio_driver
*drv
;
197 drv
= container_of(vp_dev
->vdev
.dev
.driver
,
198 struct virtio_driver
, driver
);
200 if (drv
&& drv
->config_changed
)
201 drv
->config_changed(&vp_dev
->vdev
);
205 /* Notify all virtqueues on an interrupt. */
206 static irqreturn_t
vp_vring_interrupt(int irq
, void *opaque
)
208 struct virtio_pci_device
*vp_dev
= opaque
;
209 struct virtio_pci_vq_info
*info
;
210 irqreturn_t ret
= IRQ_NONE
;
213 spin_lock_irqsave(&vp_dev
->lock
, flags
);
214 list_for_each_entry(info
, &vp_dev
->virtqueues
, node
) {
215 if (vring_interrupt(irq
, info
->vq
) == IRQ_HANDLED
)
218 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
223 /* A small wrapper to also acknowledge the interrupt when it's handled.
224 * I really need an EIO hook for the vring so I can ack the interrupt once we
225 * know that we'll be handling the IRQ but before we invoke the callback since
226 * the callback may notify the host which results in the host attempting to
227 * raise an interrupt that we would then mask once we acknowledged the
229 static irqreturn_t
vp_interrupt(int irq
, void *opaque
)
231 struct virtio_pci_device
*vp_dev
= opaque
;
234 /* reading the ISR has the effect of also clearing it so it's very
235 * important to save off the value. */
236 isr
= ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_ISR
);
238 /* It's definitely not us if the ISR was not high */
242 /* Configuration change? Tell driver if it wants to know. */
243 if (isr
& VIRTIO_PCI_ISR_CONFIG
)
244 vp_config_changed(irq
, opaque
);
246 return vp_vring_interrupt(irq
, opaque
);
249 static void vp_free_vectors(struct virtio_device
*vdev
)
251 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
254 if (vp_dev
->intx_enabled
) {
255 free_irq(vp_dev
->pci_dev
->irq
, vp_dev
);
256 vp_dev
->intx_enabled
= 0;
259 for (i
= 0; i
< vp_dev
->msix_used_vectors
; ++i
)
260 free_irq(vp_dev
->msix_entries
[i
].vector
, vp_dev
);
261 vp_dev
->msix_used_vectors
= 0;
263 if (vp_dev
->msix_enabled
) {
264 /* Disable the vector used for configuration */
265 iowrite16(VIRTIO_MSI_NO_VECTOR
,
266 vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
267 /* Flush the write out to device */
268 ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
270 vp_dev
->msix_enabled
= 0;
271 pci_disable_msix(vp_dev
->pci_dev
);
275 static int vp_enable_msix(struct pci_dev
*dev
, struct msix_entry
*entries
,
276 int *options
, int noptions
)
279 for (i
= 0; i
< noptions
; ++i
)
280 if (!pci_enable_msix(dev
, entries
, options
[i
]))
285 static int vp_request_vectors(struct virtio_device
*vdev
, unsigned max_vqs
)
287 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
288 const char *name
= dev_name(&vp_dev
->vdev
.dev
);
291 /* We want at most one vector per queue and one for config changes.
292 * Fallback to separate vectors for config and a shared for queues.
293 * Finally fall back to regular interrupts. */
294 int options
[] = { max_vqs
+ 1, 2 };
295 int nvectors
= max(options
[0], options
[1]);
297 vp_dev
->msix_entries
= kmalloc(nvectors
* sizeof *vp_dev
->msix_entries
,
299 if (!vp_dev
->msix_entries
)
301 vp_dev
->msix_names
= kmalloc(nvectors
* sizeof *vp_dev
->msix_names
,
303 if (!vp_dev
->msix_names
)
306 for (i
= 0; i
< nvectors
; ++i
)
307 vp_dev
->msix_entries
[i
].entry
= i
;
309 err
= vp_enable_msix(vp_dev
->pci_dev
, vp_dev
->msix_entries
,
310 options
, ARRAY_SIZE(options
));
312 /* Can't allocate enough MSI-X vectors, use regular interrupt */
313 vp_dev
->msix_vectors
= 0;
314 err
= request_irq(vp_dev
->pci_dev
->irq
, vp_interrupt
,
315 IRQF_SHARED
, name
, vp_dev
);
318 vp_dev
->intx_enabled
= 1;
320 vp_dev
->msix_vectors
= err
;
321 vp_dev
->msix_enabled
= 1;
323 /* Set the vector used for configuration */
324 v
= vp_dev
->msix_used_vectors
;
325 snprintf(vp_dev
->msix_names
[v
], sizeof *vp_dev
->msix_names
,
327 err
= request_irq(vp_dev
->msix_entries
[v
].vector
,
328 vp_config_changed
, 0, vp_dev
->msix_names
[v
],
332 ++vp_dev
->msix_used_vectors
;
334 iowrite16(v
, vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
335 /* Verify we had enough resources to assign the vector */
336 v
= ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_CONFIG_VECTOR
);
337 if (v
== VIRTIO_MSI_NO_VECTOR
) {
343 if (vp_dev
->msix_vectors
&& vp_dev
->msix_vectors
!= max_vqs
+ 1) {
344 /* Shared vector for all VQs */
345 v
= vp_dev
->msix_used_vectors
;
346 snprintf(vp_dev
->msix_names
[v
], sizeof *vp_dev
->msix_names
,
347 "%s-virtqueues", name
);
348 err
= request_irq(vp_dev
->msix_entries
[v
].vector
,
349 vp_vring_interrupt
, 0, vp_dev
->msix_names
[v
],
353 ++vp_dev
->msix_used_vectors
;
357 vp_free_vectors(vdev
);
358 kfree(vp_dev
->msix_names
);
360 kfree(vp_dev
->msix_entries
);
365 static struct virtqueue
*vp_find_vq(struct virtio_device
*vdev
, unsigned index
,
366 void (*callback
)(struct virtqueue
*vq
),
369 struct virtio_pci_device
*vp_dev
= to_vp_device(vdev
);
370 struct virtio_pci_vq_info
*info
;
371 struct virtqueue
*vq
;
372 unsigned long flags
, size
;
376 /* Select the queue we're interested in */
377 iowrite16(index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_SEL
);
379 /* Check if queue is either not available or already active. */
380 num
= ioread16(vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_NUM
);
381 if (!num
|| ioread32(vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
))
382 return ERR_PTR(-ENOENT
);
384 /* allocate and fill out our structure the represents an active
386 info
= kmalloc(sizeof(struct virtio_pci_vq_info
), GFP_KERNEL
);
388 return ERR_PTR(-ENOMEM
);
390 info
->queue_index
= index
;
392 info
->vector
= VIRTIO_MSI_NO_VECTOR
;
394 size
= PAGE_ALIGN(vring_size(num
, VIRTIO_PCI_VRING_ALIGN
));
395 info
->queue
= alloc_pages_exact(size
, GFP_KERNEL
|__GFP_ZERO
);
396 if (info
->queue
== NULL
) {
401 /* activate the queue */
402 iowrite32(virt_to_phys(info
->queue
) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT
,
403 vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
405 /* create the vring */
406 vq
= vring_new_virtqueue(info
->num
, VIRTIO_PCI_VRING_ALIGN
,
407 vdev
, info
->queue
, vp_notify
, callback
, name
);
410 goto out_activate_queue
;
416 /* allocate per-vq vector if available and necessary */
417 if (callback
&& vp_dev
->msix_used_vectors
< vp_dev
->msix_vectors
) {
418 vector
= vp_dev
->msix_used_vectors
;
419 snprintf(vp_dev
->msix_names
[vector
], sizeof *vp_dev
->msix_names
,
420 "%s-%s", dev_name(&vp_dev
->vdev
.dev
), name
);
421 err
= request_irq(vp_dev
->msix_entries
[vector
].vector
,
423 vp_dev
->msix_names
[vector
], vq
);
425 goto out_request_irq
;
426 info
->vector
= vector
;
427 ++vp_dev
->msix_used_vectors
;
429 vector
= VP_MSIX_VQ_VECTOR
;
431 if (callback
&& vp_dev
->msix_enabled
) {
432 iowrite16(vector
, vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
433 vector
= ioread16(vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
434 if (vector
== VIRTIO_MSI_NO_VECTOR
) {
440 spin_lock_irqsave(&vp_dev
->lock
, flags
);
441 list_add(&info
->node
, &vp_dev
->virtqueues
);
442 spin_unlock_irqrestore(&vp_dev
->lock
, flags
);
447 if (info
->vector
!= VIRTIO_MSI_NO_VECTOR
) {
448 free_irq(vp_dev
->msix_entries
[info
->vector
].vector
, vq
);
449 --vp_dev
->msix_used_vectors
;
452 vring_del_virtqueue(vq
);
454 iowrite32(0, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
455 free_pages_exact(info
->queue
, size
);
461 static void vp_del_vq(struct virtqueue
*vq
)
463 struct virtio_pci_device
*vp_dev
= to_vp_device(vq
->vdev
);
464 struct virtio_pci_vq_info
*info
= vq
->priv
;
467 iowrite16(info
->queue_index
, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_SEL
);
469 if (info
->vector
!= VIRTIO_MSI_NO_VECTOR
)
470 free_irq(vp_dev
->msix_entries
[info
->vector
].vector
, vq
);
472 if (vp_dev
->msix_enabled
) {
473 iowrite16(VIRTIO_MSI_NO_VECTOR
,
474 vp_dev
->ioaddr
+ VIRTIO_MSI_QUEUE_VECTOR
);
475 /* Flush the write out to device */
476 ioread8(vp_dev
->ioaddr
+ VIRTIO_PCI_ISR
);
479 vring_del_virtqueue(vq
);
481 /* Select and deactivate the queue */
482 iowrite32(0, vp_dev
->ioaddr
+ VIRTIO_PCI_QUEUE_PFN
);
484 size
= PAGE_ALIGN(vring_size(info
->num
, VIRTIO_PCI_VRING_ALIGN
));
485 free_pages_exact(info
->queue
, size
);
489 /* the config->del_vqs() implementation */
490 static void vp_del_vqs(struct virtio_device
*vdev
)
492 struct virtqueue
*vq
, *n
;
494 list_for_each_entry_safe(vq
, n
, &vdev
->vqs
, list
)
497 vp_free_vectors(vdev
);
500 /* the config->find_vqs() implementation */
501 static int vp_find_vqs(struct virtio_device
*vdev
, unsigned nvqs
,
502 struct virtqueue
*vqs
[],
503 vq_callback_t
*callbacks
[],
509 /* How many vectors would we like? */
510 for (i
= 0; i
< nvqs
; ++i
)
514 err
= vp_request_vectors(vdev
, vectors
);
518 for (i
= 0; i
< nvqs
; ++i
) {
519 vqs
[i
] = vp_find_vq(vdev
, i
, callbacks
[i
], names
[i
]);
529 return PTR_ERR(vqs
[i
]);
532 static struct virtio_config_ops virtio_pci_config_ops
= {
535 .get_status
= vp_get_status
,
536 .set_status
= vp_set_status
,
538 .find_vqs
= vp_find_vqs
,
539 .del_vqs
= vp_del_vqs
,
540 .get_features
= vp_get_features
,
541 .finalize_features
= vp_finalize_features
,
544 static void virtio_pci_release_dev(struct device
*_d
)
546 struct virtio_device
*dev
= container_of(_d
, struct virtio_device
, dev
);
547 struct virtio_pci_device
*vp_dev
= to_vp_device(dev
);
548 struct pci_dev
*pci_dev
= vp_dev
->pci_dev
;
551 pci_set_drvdata(pci_dev
, NULL
);
552 pci_iounmap(pci_dev
, vp_dev
->ioaddr
);
553 pci_release_regions(pci_dev
);
554 pci_disable_device(pci_dev
);
558 /* the PCI probing function */
559 static int __devinit
virtio_pci_probe(struct pci_dev
*pci_dev
,
560 const struct pci_device_id
*id
)
562 struct virtio_pci_device
*vp_dev
;
565 /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
566 if (pci_dev
->device
< 0x1000 || pci_dev
->device
> 0x103f)
569 if (pci_dev
->revision
!= VIRTIO_PCI_ABI_VERSION
) {
570 printk(KERN_ERR
"virtio_pci: expected ABI version %d, got %d\n",
571 VIRTIO_PCI_ABI_VERSION
, pci_dev
->revision
);
575 /* allocate our structure and fill it out */
576 vp_dev
= kzalloc(sizeof(struct virtio_pci_device
), GFP_KERNEL
);
580 vp_dev
->vdev
.dev
.parent
= virtio_pci_root
;
581 vp_dev
->vdev
.dev
.release
= virtio_pci_release_dev
;
582 vp_dev
->vdev
.config
= &virtio_pci_config_ops
;
583 vp_dev
->pci_dev
= pci_dev
;
584 INIT_LIST_HEAD(&vp_dev
->virtqueues
);
585 spin_lock_init(&vp_dev
->lock
);
587 /* enable the device */
588 err
= pci_enable_device(pci_dev
);
592 err
= pci_request_regions(pci_dev
, "virtio-pci");
594 goto out_enable_device
;
596 vp_dev
->ioaddr
= pci_iomap(pci_dev
, 0, 0);
597 if (vp_dev
->ioaddr
== NULL
)
598 goto out_req_regions
;
600 pci_set_drvdata(pci_dev
, vp_dev
);
602 /* we use the subsystem vendor/device id as the virtio vendor/device
603 * id. this allows us to use the same PCI vendor/device id for all
604 * virtio devices and to identify the particular virtio driver by
605 * the subsytem ids */
606 vp_dev
->vdev
.id
.vendor
= pci_dev
->subsystem_vendor
;
607 vp_dev
->vdev
.id
.device
= pci_dev
->subsystem_device
;
609 /* finally register the virtio device */
610 err
= register_virtio_device(&vp_dev
->vdev
);
612 goto out_set_drvdata
;
617 pci_set_drvdata(pci_dev
, NULL
);
618 pci_iounmap(pci_dev
, vp_dev
->ioaddr
);
620 pci_release_regions(pci_dev
);
622 pci_disable_device(pci_dev
);
628 static void __devexit
virtio_pci_remove(struct pci_dev
*pci_dev
)
630 struct virtio_pci_device
*vp_dev
= pci_get_drvdata(pci_dev
);
632 unregister_virtio_device(&vp_dev
->vdev
);
636 static int virtio_pci_suspend(struct pci_dev
*pci_dev
, pm_message_t state
)
638 pci_save_state(pci_dev
);
639 pci_set_power_state(pci_dev
, PCI_D3hot
);
643 static int virtio_pci_resume(struct pci_dev
*pci_dev
)
645 pci_restore_state(pci_dev
);
646 pci_set_power_state(pci_dev
, PCI_D0
);
651 static struct pci_driver virtio_pci_driver
= {
652 .name
= "virtio-pci",
653 .id_table
= virtio_pci_id_table
,
654 .probe
= virtio_pci_probe
,
655 .remove
= virtio_pci_remove
,
657 .suspend
= virtio_pci_suspend
,
658 .resume
= virtio_pci_resume
,
662 static int __init
virtio_pci_init(void)
666 virtio_pci_root
= root_device_register("virtio-pci");
667 if (IS_ERR(virtio_pci_root
))
668 return PTR_ERR(virtio_pci_root
);
670 err
= pci_register_driver(&virtio_pci_driver
);
672 device_unregister(virtio_pci_root
);
677 module_init(virtio_pci_init
);
679 static void __exit
virtio_pci_exit(void)
681 pci_unregister_driver(&virtio_pci_driver
);
682 root_device_unregister(virtio_pci_root
);
685 module_exit(virtio_pci_exit
);