Merge branch 'upstream-merge'
[qemu-kvm/markmc.git] / hw / device-assignment.c
blob516cf14f91d82d652c1cfe0ed438f9059a9ced5e
1 /*
2 * Copyright (c) 2007, Neocleus Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 * Assign a PCI device from the host to a guest VM.
20 * Adapted for KVM by Qumranet.
22 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
23 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
24 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
25 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
26 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <sys/io.h>
31 #include <pci/pci.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include "qemu-kvm.h"
35 #include "hw.h"
36 #include "pc.h"
37 #include "sysemu.h"
38 #include "console.h"
39 #include "device-assignment.h"
40 #include "loader.h"
42 /* From linux/ioport.h */
43 #define IORESOURCE_IO 0x00000100 /* Resource type */
44 #define IORESOURCE_MEM 0x00000200
45 #define IORESOURCE_IRQ 0x00000400
46 #define IORESOURCE_DMA 0x00000800
47 #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
49 /* #define DEVICE_ASSIGNMENT_DEBUG 1 */
51 #ifdef DEVICE_ASSIGNMENT_DEBUG
52 #define DEBUG(fmt, ...) \
53 do { \
54 fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
55 } while (0)
56 #else
57 #define DEBUG(fmt, ...) do { } while(0)
58 #endif
60 static void assigned_dev_load_option_rom(AssignedDevice *dev);
62 static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
64 return region->u.r_baseport + (addr - region->e_physbase);
67 static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
68 uint32_t value)
70 AssignedDevRegion *r_access = opaque;
71 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
73 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
74 r_pio, (int)r_access->e_physbase,
75 (unsigned long)r_access->u.r_baseport, value);
77 outb(value, r_pio);
80 static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
81 uint32_t value)
83 AssignedDevRegion *r_access = opaque;
84 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
86 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
87 r_pio, (int)r_access->e_physbase,
88 (unsigned long)r_access->u.r_baseport, value);
90 outw(value, r_pio);
93 static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
94 uint32_t value)
96 AssignedDevRegion *r_access = opaque;
97 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
99 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
100 r_pio, (int)r_access->e_physbase,
101 (unsigned long)r_access->u.r_baseport, value);
103 outl(value, r_pio);
106 static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
108 AssignedDevRegion *r_access = opaque;
109 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
110 uint32_t value;
112 value = inb(r_pio);
114 DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
115 r_pio, (int)r_access->e_physbase,
116 (unsigned long)r_access->u.r_baseport, value);
118 return value;
121 static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
123 AssignedDevRegion *r_access = opaque;
124 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
125 uint32_t value;
127 value = inw(r_pio);
129 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
130 r_pio, (int)r_access->e_physbase,
131 (unsigned long)r_access->u.r_baseport, value);
133 return value;
136 static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
138 AssignedDevRegion *r_access = opaque;
139 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
140 uint32_t value;
142 value = inl(r_pio);
144 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
145 r_pio, (int)r_access->e_physbase,
146 (unsigned long)r_access->u.r_baseport, value);
148 return value;
151 static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
152 uint32_t e_phys, uint32_t e_size, int type)
154 AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
155 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
156 PCIRegion *real_region = &r_dev->real_device.regions[region_num];
157 uint32_t old_ephys = region->e_physbase;
158 uint32_t old_esize = region->e_size;
159 int first_map = (region->e_size == 0);
160 int ret = 0;
162 DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
163 e_phys, region->u.r_virtbase, type, e_size, region_num);
165 region->e_physbase = e_phys;
166 region->e_size = e_size;
168 if (!first_map)
169 kvm_destroy_phys_mem(kvm_context, old_ephys,
170 TARGET_PAGE_ALIGN(old_esize));
172 if (e_size > 0) {
173 /* deal with MSI-X MMIO page */
174 if (real_region->base_addr <= r_dev->msix_table_addr &&
175 real_region->base_addr + real_region->size >=
176 r_dev->msix_table_addr) {
177 int offset = r_dev->msix_table_addr - real_region->base_addr;
178 ret = munmap(region->u.r_virtbase + offset, TARGET_PAGE_SIZE);
179 if (ret == 0)
180 DEBUG("munmap done, virt_base 0x%p\n",
181 region->u.r_virtbase + offset);
182 else {
183 fprintf(stderr, "%s: fail munmap msix table!\n", __func__);
184 exit(1);
186 cpu_register_physical_memory(e_phys + offset,
187 TARGET_PAGE_SIZE, r_dev->mmio_index);
189 ret = kvm_register_phys_mem(kvm_context, e_phys,
190 region->u.r_virtbase,
191 TARGET_PAGE_ALIGN(e_size), 0);
194 if (ret != 0) {
195 fprintf(stderr, "%s: Error: create new mapping failed\n", __func__);
196 exit(1);
200 static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
201 uint32_t addr, uint32_t size, int type)
203 AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
204 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
205 int first_map = (region->e_size == 0);
206 CPUState *env;
208 region->e_physbase = addr;
209 region->e_size = size;
211 DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d region_num=%d \n",
212 addr, region->u.r_baseport, type, size, region_num);
214 if (first_map) {
215 struct ioperm_data *data;
217 data = qemu_mallocz(sizeof(struct ioperm_data));
218 if (data == NULL) {
219 fprintf(stderr, "%s: Out of memory\n", __func__);
220 exit(1);
223 data->start_port = region->u.r_baseport;
224 data->num = region->r_size;
225 data->turn_on = 1;
227 kvm_add_ioperm_data(data);
229 for (env = first_cpu; env; env = env->next_cpu)
230 kvm_ioperm(env, data);
233 register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,
234 (r_dev->v_addrs + region_num));
235 register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
236 (r_dev->v_addrs + region_num));
237 register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
238 (r_dev->v_addrs + region_num));
239 register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
240 (r_dev->v_addrs + region_num));
241 register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
242 (r_dev->v_addrs + region_num));
243 register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
244 (r_dev->v_addrs + region_num));
247 static uint8_t pci_find_cap_offset(struct pci_dev *pci_dev, uint8_t cap)
249 int id;
250 int max_cap = 48;
251 int pos = PCI_CAPABILITY_LIST;
252 int status;
254 status = pci_read_byte(pci_dev, PCI_STATUS);
255 if ((status & PCI_STATUS_CAP_LIST) == 0)
256 return 0;
258 while (max_cap--) {
259 pos = pci_read_byte(pci_dev, pos);
260 if (pos < 0x40)
261 break;
263 pos &= ~3;
264 id = pci_read_byte(pci_dev, pos + PCI_CAP_LIST_ID);
266 if (id == 0xff)
267 break;
268 if (id == cap)
269 return pos;
271 pos += PCI_CAP_LIST_NEXT;
273 return 0;
276 static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
277 uint32_t val, int len)
279 int fd;
280 ssize_t ret;
281 AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
283 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
284 ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
285 (uint16_t) address, val, len);
287 if (address == 0x4) {
288 pci_default_write_config(d, address, val, len);
289 /* Continue to program the card */
292 if ((address >= 0x10 && address <= 0x24) || address == 0x30 ||
293 address == 0x34 || address == 0x3c || address == 0x3d ||
294 pci_access_cap_config(d, address, len)) {
295 /* used for update-mappings (BAR emulation) */
296 pci_default_write_config(d, address, val, len);
297 return;
300 DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
301 ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
302 (uint16_t) address, val, len);
304 fd = pci_dev->real_device.config_fd;
306 again:
307 ret = pwrite(fd, &val, len, address);
308 if (ret != len) {
309 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
310 goto again;
312 fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
313 __func__, ret, errno);
315 exit(1);
319 static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
320 int len)
322 uint32_t val = 0;
323 int fd;
324 ssize_t ret;
325 AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
327 if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) ||
328 (address >= 0x10 && address <= 0x24) || address == 0x30 ||
329 address == 0x34 || address == 0x3c || address == 0x3d ||
330 pci_access_cap_config(d, address, len)) {
331 val = pci_default_read_config(d, address, len);
332 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
333 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
334 return val;
337 /* vga specific, remove later */
338 if (address == 0xFC)
339 goto do_log;
341 fd = pci_dev->real_device.config_fd;
343 again:
344 ret = pread(fd, &val, len, address);
345 if (ret != len) {
346 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
347 goto again;
349 fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
350 __func__, ret, errno);
352 exit(1);
355 do_log:
356 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
357 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
359 if (!pci_dev->cap.available) {
360 /* kill the special capabilities */
361 if (address == 4 && len == 4)
362 val &= ~0x100000;
363 else if (address == 6)
364 val &= ~0x10;
367 return val;
370 static int assigned_dev_register_regions(PCIRegion *io_regions,
371 unsigned long regions_num,
372 AssignedDevice *pci_dev)
374 uint32_t i;
375 PCIRegion *cur_region = io_regions;
377 for (i = 0; i < regions_num; i++, cur_region++) {
378 if (!cur_region->valid)
379 continue;
380 pci_dev->v_addrs[i].num = i;
382 /* handle memory io regions */
383 if (cur_region->type & IORESOURCE_MEM) {
384 int t = cur_region->type & IORESOURCE_PREFETCH
385 ? PCI_ADDRESS_SPACE_MEM_PREFETCH
386 : PCI_ADDRESS_SPACE_MEM;
387 if (cur_region->size & 0xFFF) {
388 fprintf(stderr, "Unable to assign device: PCI region %d "
389 "at address 0x%llx has size 0x%x, "
390 " which is not a multiple of 4K\n",
391 i, (unsigned long long)cur_region->base_addr,
392 cur_region->size);
393 return -1;
396 /* map physical memory */
397 pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
398 if (i == PCI_ROM_SLOT) {
399 pci_dev->v_addrs[i].u.r_virtbase =
400 mmap(NULL,
401 (cur_region->size + 0xFFF) & 0xFFFFF000,
402 PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE,
403 0, (off_t) 0);
405 } else {
406 pci_dev->v_addrs[i].u.r_virtbase =
407 mmap(NULL,
408 (cur_region->size + 0xFFF) & 0xFFFFF000,
409 PROT_WRITE | PROT_READ, MAP_SHARED,
410 cur_region->resource_fd, (off_t) 0);
413 if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) {
414 pci_dev->v_addrs[i].u.r_virtbase = NULL;
415 fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
416 "\n", __func__,
417 (uint32_t) (cur_region->base_addr));
418 return -1;
421 if (i == PCI_ROM_SLOT) {
422 memset(pci_dev->v_addrs[i].u.r_virtbase, 0,
423 (cur_region->size + 0xFFF) & 0xFFFFF000);
424 mprotect(pci_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
425 (cur_region->size + 0xFFF) & 0xFFFFF000, PROT_READ);
428 pci_dev->v_addrs[i].r_size = cur_region->size;
429 pci_dev->v_addrs[i].e_size = 0;
431 /* add offset */
432 pci_dev->v_addrs[i].u.r_virtbase +=
433 (cur_region->base_addr & 0xFFF);
435 pci_register_bar((PCIDevice *) pci_dev, i,
436 cur_region->size, t,
437 assigned_dev_iomem_map);
438 continue;
440 /* handle port io regions */
441 pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
442 pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
443 pci_dev->v_addrs[i].r_size = cur_region->size;
444 pci_dev->v_addrs[i].e_size = 0;
446 pci_register_bar((PCIDevice *) pci_dev, i,
447 cur_region->size, PCI_ADDRESS_SPACE_IO,
448 assigned_dev_ioport_map);
450 /* not relevant for port io */
451 pci_dev->v_addrs[i].memory_index = 0;
454 /* success */
455 return 0;
458 static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
459 uint8_t r_dev, uint8_t r_func)
461 char dir[128], name[128];
462 int fd, r = 0;
463 FILE *f;
464 unsigned long long start, end, size, flags;
465 unsigned long id;
466 struct stat statbuf;
467 PCIRegion *rp;
468 PCIDevRegions *dev = &pci_dev->real_device;
470 dev->region_number = 0;
472 snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/0000:%02x:%02x.%x/",
473 r_bus, r_dev, r_func);
475 snprintf(name, sizeof(name), "%sconfig", dir);
477 fd = open(name, O_RDWR);
478 if (fd == -1) {
479 fprintf(stderr, "%s: %s: %m\n", __func__, name);
480 return 1;
482 dev->config_fd = fd;
483 again:
484 r = read(fd, pci_dev->dev.config, sizeof(pci_dev->dev.config));
485 if (r < 0) {
486 if (errno == EINTR || errno == EAGAIN)
487 goto again;
488 fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
491 snprintf(name, sizeof(name), "%sresource", dir);
493 f = fopen(name, "r");
494 if (f == NULL) {
495 fprintf(stderr, "%s: %s: %m\n", __func__, name);
496 return 1;
499 for (r = 0; r < PCI_NUM_REGIONS; r++) {
500 if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3)
501 break;
503 rp = dev->regions + r;
504 rp->valid = 0;
505 size = end - start + 1;
506 flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
507 if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
508 continue;
509 if (flags & IORESOURCE_MEM) {
510 flags &= ~IORESOURCE_IO;
511 if (r != PCI_ROM_SLOT) {
512 snprintf(name, sizeof(name), "%sresource%d", dir, r);
513 fd = open(name, O_RDWR);
514 if (fd == -1)
515 continue;
516 rp->resource_fd = fd;
518 } else
519 flags &= ~IORESOURCE_PREFETCH;
521 rp->type = flags;
522 rp->valid = 1;
523 rp->base_addr = start;
524 rp->size = size;
525 DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
526 r, rp->size, start, rp->type, rp->resource_fd);
529 fclose(f);
531 /* read and fill device ID */
532 snprintf(name, sizeof(name), "%svendor", dir);
533 f = fopen(name, "r");
534 if (f == NULL) {
535 fprintf(stderr, "%s: %s: %m\n", __func__, name);
536 return 1;
538 if (fscanf(f, "%li\n", &id) == 1) {
539 pci_dev->dev.config[0] = id & 0xff;
540 pci_dev->dev.config[1] = (id & 0xff00) >> 8;
542 fclose(f);
544 /* read and fill vendor ID */
545 snprintf(name, sizeof(name), "%sdevice", dir);
546 f = fopen(name, "r");
547 if (f == NULL) {
548 fprintf(stderr, "%s: %s: %m\n", __func__, name);
549 return 1;
551 if (fscanf(f, "%li\n", &id) == 1) {
552 pci_dev->dev.config[2] = id & 0xff;
553 pci_dev->dev.config[3] = (id & 0xff00) >> 8;
555 fclose(f);
557 /* dealing with virtual function device */
558 snprintf(name, sizeof(name), "%sphysfn/", dir);
559 if (!stat(name, &statbuf))
560 pci_dev->need_emulate_cmd = 1;
561 else
562 pci_dev->need_emulate_cmd = 0;
564 dev->region_number = r;
565 return 0;
568 static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs);
570 #ifdef KVM_CAP_IRQ_ROUTING
571 static void free_dev_irq_entries(AssignedDevice *dev)
573 int i;
575 for (i = 0; i < dev->irq_entries_nr; i++)
576 kvm_del_routing_entry(kvm_context, &dev->entry[i]);
577 free(dev->entry);
578 dev->entry = NULL;
579 dev->irq_entries_nr = 0;
581 #endif
583 static void free_assigned_device(AssignedDevice *dev)
585 if (dev) {
586 int i;
588 for (i = 0; i < dev->real_device.region_number; i++) {
589 PCIRegion *pci_region = &dev->real_device.regions[i];
590 AssignedDevRegion *region = &dev->v_addrs[i];
592 if (!pci_region->valid)
593 continue;
595 if (pci_region->type & IORESOURCE_IO) {
596 kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
597 continue;
598 } else if (pci_region->type & IORESOURCE_MEM) {
599 if (region->e_size > 0)
600 kvm_destroy_phys_mem(kvm_context, region->e_physbase,
601 TARGET_PAGE_ALIGN(region->e_size));
603 if (region->u.r_virtbase) {
604 int ret = munmap(region->u.r_virtbase,
605 (pci_region->size + 0xFFF) & 0xFFFFF000);
606 if (ret != 0)
607 fprintf(stderr,
608 "Failed to unmap assigned device region: %s\n",
609 strerror(errno));
614 if (dev->real_device.config_fd) {
615 close(dev->real_device.config_fd);
616 dev->real_device.config_fd = 0;
619 #ifdef KVM_CAP_IRQ_ROUTING
620 free_dev_irq_entries(dev);
621 #endif
625 static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
627 return (uint32_t)bus << 8 | (uint32_t)devfn;
630 static int assign_device(AssignedDevice *dev)
632 struct kvm_assigned_pci_dev assigned_dev_data;
633 int r;
635 memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
636 assigned_dev_data.assigned_dev_id =
637 calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
638 assigned_dev_data.busnr = dev->h_busnr;
639 assigned_dev_data.devfn = dev->h_devfn;
641 #ifdef KVM_CAP_IOMMU
642 /* We always enable the IOMMU if present
643 * (or when not disabled on the command line)
645 r = kvm_check_extension(kvm_state, KVM_CAP_IOMMU);
646 if (!r)
647 dev->use_iommu = 0;
648 if (dev->use_iommu)
649 assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
650 #else
651 dev->use_iommu = 0;
652 #endif
654 r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
655 if (r < 0)
656 fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
657 dev->dev.qdev.id, strerror(-r));
658 return r;
661 static int assign_irq(AssignedDevice *dev)
663 struct kvm_assigned_irq assigned_irq_data;
664 int irq, r = 0;
666 /* Interrupt PIN 0 means don't use INTx */
667 if (pci_read_byte(dev->pdev, PCI_INTERRUPT_PIN) == 0)
668 return 0;
670 irq = pci_map_irq(&dev->dev, dev->intpin);
671 irq = piix_get_irq(irq);
673 #ifdef TARGET_IA64
674 irq = ipf_map_irq(&dev->dev, irq);
675 #endif
677 if (dev->girq == irq)
678 return r;
680 memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
681 assigned_irq_data.assigned_dev_id =
682 calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
683 assigned_irq_data.guest_irq = irq;
684 assigned_irq_data.host_irq = dev->real_device.irq;
685 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
686 if (dev->irq_requested_type) {
687 assigned_irq_data.flags = dev->irq_requested_type;
688 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
689 /* -ENXIO means no assigned irq */
690 if (r && r != -ENXIO)
691 perror("assign_irq: deassign");
694 assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX;
695 if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
696 assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_MSI;
697 else
698 assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_INTX;
699 #endif
701 r = kvm_assign_irq(kvm_context, &assigned_irq_data);
702 if (r < 0) {
703 fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
704 dev->dev.qdev.id, strerror(-r));
705 fprintf(stderr, "Perhaps you are assigning a device "
706 "that shares an IRQ with another device?\n");
707 return r;
710 dev->girq = irq;
711 dev->irq_requested_type = assigned_irq_data.flags;
712 return r;
715 static void deassign_device(AssignedDevice *dev)
717 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
718 struct kvm_assigned_pci_dev assigned_dev_data;
719 int r;
721 memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
722 assigned_dev_data.assigned_dev_id =
723 calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
725 r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data);
726 if (r < 0)
727 fprintf(stderr, "Failed to deassign device \"%s\" : %s\n",
728 dev->dev.qdev.id, strerror(-r));
729 #endif
732 #if 0
733 AssignedDevInfo *get_assigned_device(int pcibus, int slot)
735 AssignedDevice *assigned_dev = NULL;
736 AssignedDevInfo *adev = NULL;
738 QLIST_FOREACH(adev, &adev_head, next) {
739 assigned_dev = adev->assigned_dev;
740 if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
741 PCI_SLOT(assigned_dev->dev.devfn) == slot)
742 return adev;
745 return NULL;
747 #endif
749 /* The pci config space got updated. Check if irq numbers have changed
750 * for our devices
752 void assigned_dev_update_irqs(void)
754 AssignedDevice *dev, *next;
755 int r;
757 dev = QLIST_FIRST(&devs);
758 while (dev) {
759 next = QLIST_NEXT(dev, next);
760 r = assign_irq(dev);
761 if (r < 0)
762 qdev_unplug(&dev->dev.qdev);
763 dev = next;
767 #ifdef KVM_CAP_IRQ_ROUTING
769 #ifdef KVM_CAP_DEVICE_MSI
770 static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
772 struct kvm_assigned_irq assigned_irq_data;
773 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
774 uint8_t ctrl_byte = pci_dev->config[ctrl_pos];
775 int r;
777 memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
778 assigned_irq_data.assigned_dev_id =
779 calc_assigned_dev_id(assigned_dev->h_busnr,
780 (uint8_t)assigned_dev->h_devfn);
782 if (assigned_dev->irq_requested_type) {
783 assigned_irq_data.flags = assigned_dev->irq_requested_type;
784 free_dev_irq_entries(assigned_dev);
785 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
786 /* -ENXIO means no assigned irq */
787 if (r && r != -ENXIO)
788 perror("assigned_dev_update_msi: deassign irq");
791 if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
792 assigned_dev->entry = calloc(1, sizeof(struct kvm_irq_routing_entry));
793 if (!assigned_dev->entry) {
794 perror("assigned_dev_update_msi: ");
795 return;
797 assigned_dev->entry->u.msi.address_lo =
798 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
799 PCI_MSI_ADDRESS_LO);
800 assigned_dev->entry->u.msi.address_hi = 0;
801 assigned_dev->entry->u.msi.data = *(uint16_t *)(pci_dev->config +
802 pci_dev->cap.start + PCI_MSI_DATA_32);
803 assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
804 r = kvm_get_irq_route_gsi(kvm_context);
805 if (r < 0) {
806 perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
807 return;
809 assigned_dev->entry->gsi = r;
811 kvm_add_routing_entry(kvm_context, assigned_dev->entry);
812 if (kvm_commit_irq_routes(kvm_context) < 0) {
813 perror("assigned_dev_update_msi: kvm_commit_irq_routes");
814 assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED;
815 return;
817 assigned_dev->irq_entries_nr = 1;
819 assigned_irq_data.guest_irq = assigned_dev->entry->gsi;
820 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI;
821 if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0)
822 perror("assigned_dev_enable_msi: assign irq");
824 assigned_dev->irq_requested_type = assigned_irq_data.flags;
827 #endif
829 #ifdef KVM_CAP_DEVICE_MSIX
830 static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
832 AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
833 u16 entries_nr = 0, entries_max_nr;
834 int pos = 0, i, r = 0;
835 u32 msg_addr, msg_upper_addr, msg_data, msg_ctrl;
836 struct kvm_assigned_msix_nr msix_nr;
837 struct kvm_assigned_msix_entry msix_entry;
838 void *va = adev->msix_table_page;
840 if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
841 pos = pci_dev->cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH;
842 else
843 pos = pci_dev->cap.start;
845 entries_max_nr = pci_dev->config[pos + 2];
846 entries_max_nr &= PCI_MSIX_TABSIZE;
847 entries_max_nr += 1;
849 /* Get the usable entry number for allocating */
850 for (i = 0; i < entries_max_nr; i++) {
851 memcpy(&msg_ctrl, va + i * 16 + 12, 4);
852 memcpy(&msg_data, va + i * 16 + 8, 4);
853 /* Ignore unused entry even it's unmasked */
854 if (msg_data == 0)
855 continue;
856 entries_nr ++;
859 if (entries_nr == 0) {
860 fprintf(stderr, "MSI-X entry number is zero!\n");
861 return -EINVAL;
863 msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr,
864 (uint8_t)adev->h_devfn);
865 msix_nr.entry_nr = entries_nr;
866 r = kvm_assign_set_msix_nr(kvm_context, &msix_nr);
867 if (r != 0) {
868 fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
869 strerror(-r));
870 return r;
873 free_dev_irq_entries(adev);
874 adev->irq_entries_nr = entries_nr;
875 adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
876 if (!adev->entry) {
877 perror("assigned_dev_update_msix_mmio: ");
878 return -errno;
881 msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
882 entries_nr = 0;
883 for (i = 0; i < entries_max_nr; i++) {
884 if (entries_nr >= msix_nr.entry_nr)
885 break;
886 memcpy(&msg_ctrl, va + i * 16 + 12, 4);
887 memcpy(&msg_data, va + i * 16 + 8, 4);
888 if (msg_data == 0)
889 continue;
891 memcpy(&msg_addr, va + i * 16, 4);
892 memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
894 r = kvm_get_irq_route_gsi(kvm_context);
895 if (r < 0)
896 return r;
898 adev->entry[entries_nr].gsi = r;
899 adev->entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
900 adev->entry[entries_nr].flags = 0;
901 adev->entry[entries_nr].u.msi.address_lo = msg_addr;
902 adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
903 adev->entry[entries_nr].u.msi.data = msg_data;
904 DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
905 kvm_add_routing_entry(kvm_context, &adev->entry[entries_nr]);
907 msix_entry.gsi = adev->entry[entries_nr].gsi;
908 msix_entry.entry = i;
909 r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
910 if (r) {
911 fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
912 break;
914 DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
915 msix_entry.gsi, msix_entry.entry);
916 entries_nr ++;
919 if (r == 0 && kvm_commit_irq_routes(kvm_context) < 0) {
920 perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
921 return -EINVAL;
924 return r;
927 static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
929 struct kvm_assigned_irq assigned_irq_data;
930 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
931 uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
932 int r;
934 memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
935 assigned_irq_data.assigned_dev_id =
936 calc_assigned_dev_id(assigned_dev->h_busnr,
937 (uint8_t)assigned_dev->h_devfn);
939 if (assigned_dev->irq_requested_type) {
940 assigned_irq_data.flags = assigned_dev->irq_requested_type;
941 free_dev_irq_entries(assigned_dev);
942 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
943 /* -ENXIO means no assigned irq */
944 if (r && r != -ENXIO)
945 perror("assigned_dev_update_msix: deassign irq");
947 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX | KVM_DEV_IRQ_GUEST_MSIX;
949 if (*ctrl_word & PCI_MSIX_ENABLE) {
950 if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
951 perror("assigned_dev_update_msix_mmio");
952 return;
954 if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
955 perror("assigned_dev_enable_msix: assign irq");
956 return;
958 assigned_dev->irq_requested_type = assigned_irq_data.flags;
961 #endif
962 #endif
964 static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
965 uint32_t val, int len)
967 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
968 unsigned int pos = pci_dev->cap.start, ctrl_pos;
970 pci_default_cap_write_config(pci_dev, address, val, len);
971 #ifdef KVM_CAP_IRQ_ROUTING
972 #ifdef KVM_CAP_DEVICE_MSI
973 if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
974 ctrl_pos = pos + PCI_MSI_FLAGS;
975 if (address <= ctrl_pos && address + len > ctrl_pos)
976 assigned_dev_update_msi(pci_dev, ctrl_pos);
977 pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
979 #endif
980 #ifdef KVM_CAP_DEVICE_MSIX
981 if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
982 ctrl_pos = pos + 3;
983 if (address <= ctrl_pos && address + len > ctrl_pos) {
984 ctrl_pos--; /* control is word long */
985 assigned_dev_update_msix(pci_dev, ctrl_pos);
987 pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
989 #endif
990 #endif
991 return;
994 static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
996 AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
997 PCIRegion *pci_region = dev->real_device.regions;
998 int next_cap_pt = 0;
1000 pci_dev->cap.length = 0;
1001 #ifdef KVM_CAP_IRQ_ROUTING
1002 #ifdef KVM_CAP_DEVICE_MSI
1003 /* Expose MSI capability
1004 * MSI capability is the 1st capability in capability config */
1005 if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSI)) {
1006 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
1007 memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length],
1008 0, PCI_CAPABILITY_CONFIG_MSI_LENGTH);
1009 pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] =
1010 PCI_CAP_ID_MSI;
1011 pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
1012 next_cap_pt = 1;
1014 #endif
1015 #ifdef KVM_CAP_DEVICE_MSIX
1016 /* Expose MSI-X capability */
1017 if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX)) {
1018 int pos, entry_nr, bar_nr;
1019 u32 msix_table_entry;
1020 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
1021 memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length],
1022 0, PCI_CAPABILITY_CONFIG_MSIX_LENGTH);
1023 pos = pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX);
1024 entry_nr = pci_read_word(dev->pdev, pos + 2) & PCI_MSIX_TABSIZE;
1025 pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] = 0x11;
1026 pci_dev->config[pci_dev->cap.start +
1027 pci_dev->cap.length + 2] = entry_nr;
1028 msix_table_entry = pci_read_long(dev->pdev, pos + PCI_MSIX_TABLE);
1029 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
1030 pci_dev->cap.length + PCI_MSIX_TABLE) = msix_table_entry;
1031 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
1032 pci_dev->cap.length + PCI_MSIX_PBA) =
1033 pci_read_long(dev->pdev, pos + PCI_MSIX_PBA);
1034 bar_nr = msix_table_entry & PCI_MSIX_BIR;
1035 msix_table_entry &= ~PCI_MSIX_BIR;
1036 dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
1037 if (next_cap_pt != 0) {
1038 pci_dev->config[pci_dev->cap.start + next_cap_pt] =
1039 pci_dev->cap.start + pci_dev->cap.length;
1040 next_cap_pt += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
1041 } else
1042 next_cap_pt = 1;
1043 pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
1045 #endif
1046 #endif
1048 return 0;
1051 static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
1053 AssignedDevice *adev = opaque;
1054 unsigned int offset = addr & 0xfff;
1055 void *page = adev->msix_table_page;
1056 uint32_t val = 0;
1058 memcpy(&val, (void *)((char *)page + offset), 4);
1060 return val;
1063 static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
1065 return ((msix_mmio_readl(opaque, addr & ~3)) >>
1066 (8 * (addr & 3))) & 0xff;
1069 static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
1071 return ((msix_mmio_readl(opaque, addr & ~3)) >>
1072 (8 * (addr & 3))) & 0xffff;
1075 static void msix_mmio_writel(void *opaque,
1076 target_phys_addr_t addr, uint32_t val)
1078 AssignedDevice *adev = opaque;
1079 unsigned int offset = addr & 0xfff;
1080 void *page = adev->msix_table_page;
1082 DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
1083 addr, val);
1084 memcpy((void *)((char *)page + offset), &val, 4);
1087 static void msix_mmio_writew(void *opaque,
1088 target_phys_addr_t addr, uint32_t val)
1090 msix_mmio_writel(opaque, addr & ~3,
1091 (val & 0xffff) << (8*(addr & 3)));
1094 static void msix_mmio_writeb(void *opaque,
1095 target_phys_addr_t addr, uint32_t val)
1097 msix_mmio_writel(opaque, addr & ~3,
1098 (val & 0xff) << (8*(addr & 3)));
1101 static CPUWriteMemoryFunc *msix_mmio_write[] = {
1102 msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel
1105 static CPUReadMemoryFunc *msix_mmio_read[] = {
1106 msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
1109 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
1111 dev->msix_table_page = mmap(NULL, 0x1000,
1112 PROT_READ|PROT_WRITE,
1113 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
1114 if (dev->msix_table_page == MAP_FAILED) {
1115 fprintf(stderr, "fail allocate msix_table_page! %s\n",
1116 strerror(errno));
1117 return -EFAULT;
1119 memset(dev->msix_table_page, 0, 0x1000);
1120 dev->mmio_index = cpu_register_io_memory(
1121 msix_mmio_read, msix_mmio_write, dev);
1122 return 0;
1125 static int assigned_initfn(struct PCIDevice *pci_dev)
1127 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1128 struct pci_access *pacc;
1129 uint8_t e_device, e_intx;
1130 int r;
1132 if (!dev->host.bus && !dev->host.dev && !dev->host.func) {
1133 qemu_error("pci-assign: error: no host device specified\n");
1134 goto out;
1137 if (get_real_device(dev, dev->host.bus, dev->host.dev, dev->host.func)) {
1138 qemu_error("pci-assign: Error: Couldn't get real device (%s)!\n",
1139 dev->dev.qdev.id);
1140 goto out;
1143 /* handle real device's MMIO/PIO BARs */
1144 if (assigned_dev_register_regions(dev->real_device.regions,
1145 dev->real_device.region_number,
1146 dev))
1147 goto out;
1149 /* handle interrupt routing */
1150 e_device = (dev->dev.devfn >> 3) & 0x1f;
1151 e_intx = dev->dev.config[0x3d] - 1;
1152 dev->intpin = e_intx;
1153 dev->run = 0;
1154 dev->girq = 0;
1155 dev->h_busnr = dev->host.bus;
1156 dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func);
1158 pacc = pci_alloc();
1159 pci_init(pacc);
1160 dev->pdev = pci_get_dev(pacc, 0, dev->host.bus, dev->host.dev, dev->host.func);
1162 if (pci_enable_capability_support(pci_dev, 0, NULL,
1163 assigned_device_pci_cap_write_config,
1164 assigned_device_pci_cap_init) < 0)
1165 goto assigned_out;
1167 /* assign device to guest */
1168 r = assign_device(dev);
1169 if (r < 0)
1170 goto assigned_out;
1172 /* assign irq for the device */
1173 r = assign_irq(dev);
1174 if (r < 0)
1175 goto assigned_out;
1177 /* intercept MSI-X entry page in the MMIO */
1178 if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
1179 if (assigned_dev_register_msix_mmio(dev))
1180 goto assigned_out;
1182 assigned_dev_load_option_rom(dev);
1183 return 0;
1185 assigned_out:
1186 deassign_device(dev);
1187 out:
1188 free_assigned_device(dev);
1189 return -1;
1192 static int assigned_exitfn(struct PCIDevice *pci_dev)
1194 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1196 deassign_device(dev);
1197 free_assigned_device(dev);
1198 return 0;
1201 static int parse_hostaddr(DeviceState *dev, Property *prop, const char *str)
1203 PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
1204 int rc;
1206 rc = pci_parse_host_devaddr(str, &ptr->bus, &ptr->dev, &ptr->func);
1207 if (rc != 0)
1208 return -1;
1209 return 0;
1212 static int print_hostaddr(DeviceState *dev, Property *prop, char *dest, size_t len)
1214 PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
1216 return snprintf(dest, len, "%02x:%02x.%x", ptr->bus, ptr->dev, ptr->func);
1219 PropertyInfo qdev_prop_hostaddr = {
1220 .name = "pci-hostaddr",
1221 .type = -1,
1222 .size = sizeof(PCIHostDevice),
1223 .parse = parse_hostaddr,
1224 .print = print_hostaddr,
1227 static PCIDeviceInfo assign_info = {
1228 .qdev.name = "pci-assign",
1229 .qdev.desc = "pass through host pci devices to the guest",
1230 .qdev.size = sizeof(AssignedDevice),
1231 .init = assigned_initfn,
1232 .exit = assigned_exitfn,
1233 .config_read = assigned_dev_pci_read_config,
1234 .config_write = assigned_dev_pci_write_config,
1235 .qdev.props = (Property[]) {
1236 DEFINE_PROP("host", AssignedDevice, host, qdev_prop_hostaddr, PCIHostDevice),
1237 DEFINE_PROP_UINT32("iommu", AssignedDevice, use_iommu, 1),
1238 DEFINE_PROP_END_OF_LIST(),
1242 static void assign_register_devices(void)
1244 pci_qdev_register(&assign_info);
1247 device_init(assign_register_devices)
1251 * Syntax to assign device:
1253 * -pcidevice host=bus:dev.func[,dma=none][,name=Foo]
1255 * Example:
1256 * -pcidevice host=00:13.0,dma=pvdma
1258 * dma can currently only be 'none' to disable iommu support.
1260 QemuOpts *add_assigned_device(const char *arg)
1262 QemuOpts *opts = NULL;
1263 char host[64], id[64], dma[8];
1264 int r;
1266 r = get_param_value(host, sizeof(host), "host", arg);
1267 if (!r)
1268 goto bad;
1269 r = get_param_value(id, sizeof(id), "id", arg);
1270 if (!r)
1271 r = get_param_value(id, sizeof(id), "name", arg);
1272 if (!r)
1273 r = get_param_value(id, sizeof(id), "host", arg);
1275 opts = qemu_opts_create(&qemu_device_opts, id, 0);
1276 if (!opts)
1277 goto bad;
1278 qemu_opt_set(opts, "driver", "pci-assign");
1279 qemu_opt_set(opts, "host", host);
1281 #ifdef KVM_CAP_IOMMU
1282 r = get_param_value(dma, sizeof(dma), "dma", arg);
1283 if (r && !strncmp(dma, "none", 4))
1284 qemu_opt_set(opts, "iommu", "0");
1285 #endif
1286 qemu_opts_print(opts, NULL);
1287 return opts;
1289 bad:
1290 fprintf(stderr, "pcidevice argument parse error; "
1291 "please check the help text for usage\n");
1292 if (opts)
1293 qemu_opts_del(opts);
1294 return NULL;
1297 void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices)
1299 QemuOpts *opts;
1300 int i;
1302 for (i = 0; i < n_devices; i++) {
1303 opts = add_assigned_device(devices[i]);
1304 if (opts == NULL) {
1305 fprintf(stderr, "Could not add assigned device %s\n", devices[i]);
1306 exit(1);
1308 /* generic code will call qdev_device_add() for the device */
1312 /* Option ROM header */
1313 struct option_rom_header {
1314 uint8_t signature[2];
1315 uint8_t rom_size;
1316 uint32_t entry_point;
1317 uint8_t reserved[17];
1318 uint16_t pci_header_offset;
1319 uint16_t expansion_header_offset;
1320 } __attribute__ ((packed));
1322 /* Option ROM PCI data structure */
1323 struct option_rom_pci_header {
1324 uint8_t signature[4];
1325 uint16_t vendor_id;
1326 uint16_t device_id;
1327 uint16_t vital_product_data_offset;
1328 uint16_t structure_length;
1329 uint8_t structure_revision;
1330 uint8_t class_code[3];
1331 uint16_t image_length;
1332 uint16_t image_revision;
1333 uint8_t code_type;
1334 uint8_t indicator;
1335 uint16_t reserved;
1336 } __attribute__ ((packed));
1339 * Scan the list of Option ROMs at roms. If a suitable Option ROM is found,
1340 * allocate a ram space and copy it there. Then return its size aligned to
1341 * both 2KB and target page size.
1343 #define OPTION_ROM_ALIGN(x) (((x) + 2047) & ~2047)
1344 static void scan_option_rom(const char *name, uint8_t devfn, void *roms)
1346 int i, size;
1347 uint8_t csum;
1348 struct option_rom_header *rom;
1349 struct option_rom_pci_header *pcih;
1351 rom = roms;
1353 for ( ; ; ) {
1354 /* Invalid signature means we're out of option ROMs. */
1355 if (strncmp((char *)rom->signature, "\x55\xaa", 2) ||
1356 (rom->rom_size == 0))
1357 break;
1359 size = rom->rom_size * 512;
1360 /* Invalid checksum means we're out of option ROMs. */
1361 csum = 0;
1362 for (i = 0; i < size; i++)
1363 csum += ((uint8_t *)rom)[i];
1364 if (csum != 0)
1365 break;
1367 /* Check the PCI header (if any) for a match. */
1368 pcih = (struct option_rom_pci_header *)
1369 ((char *)rom + rom->pci_header_offset);
1370 if ((rom->pci_header_offset != 0) &&
1371 !strncmp((char *)pcih->signature, "PCIR", 4))
1372 goto found;
1374 rom = (struct option_rom_header *)((char *)rom + size);
1376 return;
1378 found:
1379 rom_add_blob(name ? name : "assigned device", rom, size,
1380 PC_ROM_MIN_OPTION, PC_ROM_MAX, PC_ROM_ALIGN);
1381 return;
1385 * Scan the assigned devices for the devices that have an option ROM, and then
1386 * load the corresponding ROM data to RAM. If an error occurs while loading an
1387 * option ROM, we just ignore that option ROM and continue with the next one.
1389 static void assigned_dev_load_option_rom(AssignedDevice *dev)
1391 int size, len;
1392 void *buf;
1393 FILE *fp;
1394 uint8_t i = 1;
1395 char rom_file[64];
1397 snprintf(rom_file, sizeof(rom_file),
1398 "/sys/bus/pci/devices/0000:%02x:%02x.%01x/rom",
1399 dev->host.bus, dev->host.dev, dev->host.func);
1401 if (access(rom_file, F_OK))
1402 return;
1404 /* Write something to the ROM file to enable it */
1405 fp = fopen(rom_file, "wb");
1406 if (fp == NULL)
1407 return;
1408 len = fwrite(&i, 1, 1, fp);
1409 fclose(fp);
1410 if (len != 1)
1411 return;
1413 /* The file has to be closed and reopened, otherwise it won't work */
1414 fp = fopen(rom_file, "rb");
1415 if (fp == NULL)
1416 return;
1418 fseek(fp, 0, SEEK_END);
1419 size = ftell(fp);
1420 fseek(fp, 0, SEEK_SET);
1422 buf = malloc(size);
1423 if (buf == NULL) {
1424 fclose(fp);
1425 return;
1428 fread(buf, size, 1, fp);
1429 if (!feof(fp) || ferror(fp)) {
1430 free(buf);
1431 fclose(fp);
1432 return;
1434 fclose(fp);
1436 /* Copy ROM contents into the space backing the ROM BAR */
1437 if (dev->v_addrs[PCI_ROM_SLOT].r_size >= size &&
1438 dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase) {
1439 mprotect(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1440 size, PROT_READ | PROT_WRITE);
1441 memcpy(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1442 buf, size);
1443 mprotect(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1444 size, PROT_READ);
1447 if (!dev->dev.qdev.hotplugged) {
1448 /* Scan the buffer for suitable ROMs and increase the offset */
1449 scan_option_rom(dev->dev.qdev.id, dev->dev.devfn, buf);
1451 free(buf);