Fix 32-bit overflow in parallels image support
[qemu-kvm/fedora.git] / hw / device-assignment.c
blob75db5462511e04c383bb156dd576b785a91224dd
1 /*
2 * Copyright (c) 2007, Neocleus Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 * Assign a PCI device from the host to a guest VM.
20 * Adapted for KVM by Qumranet.
22 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
23 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
24 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
25 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
26 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <sys/io.h>
31 #include <pci/pci.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include "qemu-kvm.h"
35 #include "hw.h"
36 #include "pc.h"
37 #include "sysemu.h"
38 #include "console.h"
39 #include "device-assignment.h"
41 /* From linux/ioport.h */
42 #define IORESOURCE_IO 0x00000100 /* Resource type */
43 #define IORESOURCE_MEM 0x00000200
44 #define IORESOURCE_IRQ 0x00000400
45 #define IORESOURCE_DMA 0x00000800
46 #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
48 /* #define DEVICE_ASSIGNMENT_DEBUG 1 */
50 #ifdef DEVICE_ASSIGNMENT_DEBUG
51 #define DEBUG(fmt, ...) \
52 do { \
53 fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
54 } while (0)
55 #else
56 #define DEBUG(fmt, ...) do { } while(0)
57 #endif
59 static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
61 return region->u.r_baseport + (addr - region->e_physbase);
64 static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
65 uint32_t value)
67 AssignedDevRegion *r_access = opaque;
68 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
70 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
71 r_pio, (int)r_access->e_physbase,
72 (unsigned long)r_access->u.r_baseport, value);
74 outb(value, r_pio);
77 static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
78 uint32_t value)
80 AssignedDevRegion *r_access = opaque;
81 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
83 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
84 r_pio, (int)r_access->e_physbase,
85 (unsigned long)r_access->u.r_baseport, value);
87 outw(value, r_pio);
90 static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
91 uint32_t value)
93 AssignedDevRegion *r_access = opaque;
94 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
96 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
97 r_pio, (int)r_access->e_physbase,
98 (unsigned long)r_access->u.r_baseport, value);
100 outl(value, r_pio);
103 static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
105 AssignedDevRegion *r_access = opaque;
106 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
107 uint32_t value;
109 value = inb(r_pio);
111 DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
112 r_pio, (int)r_access->e_physbase,
113 (unsigned long)r_access->u.r_baseport, value);
115 return value;
118 static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
120 AssignedDevRegion *r_access = opaque;
121 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
122 uint32_t value;
124 value = inw(r_pio);
126 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
127 r_pio, (int)r_access->e_physbase,
128 (unsigned long)r_access->u.r_baseport, value);
130 return value;
133 static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
135 AssignedDevRegion *r_access = opaque;
136 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
137 uint32_t value;
139 value = inl(r_pio);
141 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
142 r_pio, (int)r_access->e_physbase,
143 (unsigned long)r_access->u.r_baseport, value);
145 return value;
148 static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
149 uint32_t e_phys, uint32_t e_size, int type)
151 AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
152 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
153 PCIRegion *real_region = &r_dev->real_device.regions[region_num];
154 uint32_t old_ephys = region->e_physbase;
155 uint32_t old_esize = region->e_size;
156 int first_map = (region->e_size == 0);
157 int ret = 0;
159 DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
160 e_phys, region->u.r_virtbase, type, e_size, region_num);
162 region->e_physbase = e_phys;
163 region->e_size = e_size;
165 if (!first_map)
166 kvm_destroy_phys_mem(kvm_context, old_ephys,
167 TARGET_PAGE_ALIGN(old_esize));
169 if (e_size > 0) {
170 /* deal with MSI-X MMIO page */
171 if (real_region->base_addr <= r_dev->msix_table_addr &&
172 real_region->base_addr + real_region->size >=
173 r_dev->msix_table_addr) {
174 int offset = r_dev->msix_table_addr - real_region->base_addr;
175 ret = munmap(region->u.r_virtbase + offset, TARGET_PAGE_SIZE);
176 if (ret == 0)
177 DEBUG("munmap done, virt_base 0x%p\n",
178 region->u.r_virtbase + offset);
179 else {
180 fprintf(stderr, "%s: fail munmap msix table!\n", __func__);
181 exit(1);
183 cpu_register_physical_memory(e_phys + offset,
184 TARGET_PAGE_SIZE, r_dev->mmio_index);
186 ret = kvm_register_phys_mem(kvm_context, e_phys,
187 region->u.r_virtbase,
188 TARGET_PAGE_ALIGN(e_size), 0);
191 if (ret != 0) {
192 fprintf(stderr, "%s: Error: create new mapping failed\n", __func__);
193 exit(1);
197 static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
198 uint32_t addr, uint32_t size, int type)
200 AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
201 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
202 int first_map = (region->e_size == 0);
203 CPUState *env;
205 region->e_physbase = addr;
206 region->e_size = size;
208 DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d region_num=%d \n",
209 addr, region->u.r_baseport, type, size, region_num);
211 if (first_map) {
212 struct ioperm_data *data;
214 data = qemu_mallocz(sizeof(struct ioperm_data));
215 if (data == NULL) {
216 fprintf(stderr, "%s: Out of memory\n", __func__);
217 exit(1);
220 data->start_port = region->u.r_baseport;
221 data->num = region->r_size;
222 data->turn_on = 1;
224 kvm_add_ioperm_data(data);
226 for (env = first_cpu; env; env = env->next_cpu)
227 kvm_ioperm(env, data);
230 register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,
231 (r_dev->v_addrs + region_num));
232 register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
233 (r_dev->v_addrs + region_num));
234 register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
235 (r_dev->v_addrs + region_num));
236 register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
237 (r_dev->v_addrs + region_num));
238 register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
239 (r_dev->v_addrs + region_num));
240 register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
241 (r_dev->v_addrs + region_num));
244 static uint8_t pci_find_cap_offset(struct pci_dev *pci_dev, uint8_t cap)
246 int id;
247 int max_cap = 48;
248 int pos = PCI_CAPABILITY_LIST;
249 int status;
251 status = pci_read_byte(pci_dev, PCI_STATUS);
252 if ((status & PCI_STATUS_CAP_LIST) == 0)
253 return 0;
255 while (max_cap--) {
256 pos = pci_read_byte(pci_dev, pos);
257 if (pos < 0x40)
258 break;
260 pos &= ~3;
261 id = pci_read_byte(pci_dev, pos + PCI_CAP_LIST_ID);
263 if (id == 0xff)
264 break;
265 if (id == cap)
266 return pos;
268 pos += PCI_CAP_LIST_NEXT;
270 return 0;
273 static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
274 uint32_t val, int len)
276 int fd;
277 ssize_t ret;
278 AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
280 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
281 ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
282 (uint16_t) address, val, len);
284 if (address == 0x4) {
285 pci_default_write_config(d, address, val, len);
286 /* Continue to program the card */
289 if ((address >= 0x10 && address <= 0x24) || address == 0x30 ||
290 address == 0x34 || address == 0x3c || address == 0x3d ||
291 pci_access_cap_config(d, address, len)) {
292 /* used for update-mappings (BAR emulation) */
293 pci_default_write_config(d, address, val, len);
294 return;
297 DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
298 ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
299 (uint16_t) address, val, len);
301 fd = pci_dev->real_device.config_fd;
303 again:
304 ret = pwrite(fd, &val, len, address);
305 if (ret != len) {
306 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
307 goto again;
309 fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
310 __func__, ret, errno);
312 exit(1);
316 static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
317 int len)
319 uint32_t val = 0;
320 int fd;
321 ssize_t ret;
322 AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
324 if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) ||
325 (address >= 0x10 && address <= 0x24) || address == 0x30 ||
326 address == 0x34 || address == 0x3c || address == 0x3d ||
327 pci_access_cap_config(d, address, len)) {
328 val = pci_default_read_config(d, address, len);
329 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
330 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
331 return val;
334 /* vga specific, remove later */
335 if (address == 0xFC)
336 goto do_log;
338 fd = pci_dev->real_device.config_fd;
340 again:
341 ret = pread(fd, &val, len, address);
342 if (ret != len) {
343 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
344 goto again;
346 fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
347 __func__, ret, errno);
349 exit(1);
352 do_log:
353 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
354 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
356 if (!pci_dev->cap.available) {
357 /* kill the special capabilities */
358 if (address == 4 && len == 4)
359 val &= ~0x100000;
360 else if (address == 6)
361 val &= ~0x10;
364 return val;
367 static int assigned_dev_register_regions(PCIRegion *io_regions,
368 unsigned long regions_num,
369 AssignedDevice *pci_dev)
371 uint32_t i;
372 PCIRegion *cur_region = io_regions;
374 for (i = 0; i < regions_num; i++, cur_region++) {
375 if (!cur_region->valid)
376 continue;
377 pci_dev->v_addrs[i].num = i;
379 /* handle memory io regions */
380 if (cur_region->type & IORESOURCE_MEM) {
381 int t = cur_region->type & IORESOURCE_PREFETCH
382 ? PCI_ADDRESS_SPACE_MEM_PREFETCH
383 : PCI_ADDRESS_SPACE_MEM;
385 /* map physical memory */
386 pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
387 if (i == PCI_ROM_SLOT) {
388 pci_dev->v_addrs[i].u.r_virtbase =
389 mmap(NULL,
390 (cur_region->size + 0xFFF) & 0xFFFFF000,
391 PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE,
392 0, (off_t) 0);
394 } else {
395 pci_dev->v_addrs[i].u.r_virtbase =
396 mmap(NULL,
397 (cur_region->size + 0xFFF) & 0xFFFFF000,
398 PROT_WRITE | PROT_READ, MAP_SHARED,
399 cur_region->resource_fd, (off_t) 0);
402 if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) {
403 pci_dev->v_addrs[i].u.r_virtbase = NULL;
404 fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
405 "\n", __func__,
406 (uint32_t) (cur_region->base_addr));
407 return -1;
410 if (i == PCI_ROM_SLOT) {
411 memset(pci_dev->v_addrs[i].u.r_virtbase, 0,
412 (cur_region->size + 0xFFF) & 0xFFFFF000);
413 mprotect(pci_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
414 (cur_region->size + 0xFFF) & 0xFFFFF000, PROT_READ);
417 pci_dev->v_addrs[i].r_size = cur_region->size;
418 pci_dev->v_addrs[i].e_size = 0;
420 /* add offset */
421 pci_dev->v_addrs[i].u.r_virtbase +=
422 (cur_region->base_addr & 0xFFF);
424 pci_register_bar((PCIDevice *) pci_dev, i,
425 cur_region->size, t,
426 assigned_dev_iomem_map);
427 continue;
429 /* handle port io regions */
430 pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
431 pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
432 pci_dev->v_addrs[i].r_size = cur_region->size;
433 pci_dev->v_addrs[i].e_size = 0;
435 pci_register_bar((PCIDevice *) pci_dev, i,
436 cur_region->size, PCI_ADDRESS_SPACE_IO,
437 assigned_dev_ioport_map);
439 /* not relevant for port io */
440 pci_dev->v_addrs[i].memory_index = 0;
443 /* success */
444 return 0;
447 static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
448 uint8_t r_dev, uint8_t r_func)
450 char dir[128], name[128];
451 int fd, r = 0;
452 FILE *f;
453 unsigned long long start, end, size, flags;
454 unsigned long id;
455 struct stat statbuf;
456 PCIRegion *rp;
457 PCIDevRegions *dev = &pci_dev->real_device;
459 dev->region_number = 0;
461 snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/0000:%02x:%02x.%x/",
462 r_bus, r_dev, r_func);
464 snprintf(name, sizeof(name), "%sconfig", dir);
466 fd = open(name, O_RDWR);
467 if (fd == -1) {
468 fprintf(stderr, "%s: %s: %m\n", __func__, name);
469 return 1;
471 dev->config_fd = fd;
472 again:
473 r = read(fd, pci_dev->dev.config, sizeof(pci_dev->dev.config));
474 if (r < 0) {
475 if (errno == EINTR || errno == EAGAIN)
476 goto again;
477 fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
480 snprintf(name, sizeof(name), "%sresource", dir);
482 f = fopen(name, "r");
483 if (f == NULL) {
484 fprintf(stderr, "%s: %s: %m\n", __func__, name);
485 return 1;
488 for (r = 0; r < PCI_NUM_REGIONS; r++) {
489 if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3)
490 break;
492 rp = dev->regions + r;
493 rp->valid = 0;
494 size = end - start + 1;
495 flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
496 if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
497 continue;
498 if (flags & IORESOURCE_MEM) {
499 flags &= ~IORESOURCE_IO;
500 if (r != PCI_ROM_SLOT) {
501 snprintf(name, sizeof(name), "%sresource%d", dir, r);
502 fd = open(name, O_RDWR);
503 if (fd == -1)
504 continue;
505 rp->resource_fd = fd;
507 } else
508 flags &= ~IORESOURCE_PREFETCH;
510 rp->type = flags;
511 rp->valid = 1;
512 rp->base_addr = start;
513 rp->size = size;
514 DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
515 r, rp->size, start, rp->type, rp->resource_fd);
518 fclose(f);
520 /* read and fill device ID */
521 snprintf(name, sizeof(name), "%svendor", dir);
522 f = fopen(name, "r");
523 if (f == NULL) {
524 fprintf(stderr, "%s: %s: %m\n", __func__, name);
525 return 1;
527 if (fscanf(f, "%li\n", &id) == 1) {
528 pci_dev->dev.config[0] = id & 0xff;
529 pci_dev->dev.config[1] = (id & 0xff00) >> 8;
531 fclose(f);
533 /* read and fill vendor ID */
534 snprintf(name, sizeof(name), "%sdevice", dir);
535 f = fopen(name, "r");
536 if (f == NULL) {
537 fprintf(stderr, "%s: %s: %m\n", __func__, name);
538 return 1;
540 if (fscanf(f, "%li\n", &id) == 1) {
541 pci_dev->dev.config[2] = id & 0xff;
542 pci_dev->dev.config[3] = (id & 0xff00) >> 8;
544 fclose(f);
546 /* dealing with virtual function device */
547 snprintf(name, sizeof(name), "%sphysfn/", dir);
548 if (!stat(name, &statbuf))
549 pci_dev->need_emulate_cmd = 1;
550 else
551 pci_dev->need_emulate_cmd = 0;
553 dev->region_number = r;
554 return 0;
557 static LIST_HEAD(, AssignedDevInfo) adev_head;
559 #ifdef KVM_CAP_IRQ_ROUTING
560 static void free_dev_irq_entries(AssignedDevice *dev)
562 int i;
564 for (i = 0; i < dev->irq_entries_nr; i++)
565 kvm_del_routing_entry(kvm_context, &dev->entry[i]);
566 free(dev->entry);
567 dev->entry = NULL;
568 dev->irq_entries_nr = 0;
570 #endif
572 static void free_assigned_device(AssignedDevInfo *adev)
574 AssignedDevice *dev = adev->assigned_dev;
576 if (dev) {
577 int i;
579 for (i = 0; i < dev->real_device.region_number; i++) {
580 PCIRegion *pci_region = &dev->real_device.regions[i];
581 AssignedDevRegion *region = &dev->v_addrs[i];
583 if (!pci_region->valid)
584 continue;
586 if (pci_region->type & IORESOURCE_IO) {
587 kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
588 continue;
589 } else if (pci_region->type & IORESOURCE_MEM) {
590 if (region->e_size > 0)
591 kvm_destroy_phys_mem(kvm_context, region->e_physbase,
592 TARGET_PAGE_ALIGN(region->e_size));
594 if (region->u.r_virtbase) {
595 int ret = munmap(region->u.r_virtbase,
596 (pci_region->size + 0xFFF) & 0xFFFFF000);
597 if (ret != 0)
598 fprintf(stderr,
599 "Failed to unmap assigned device region: %s\n",
600 strerror(errno));
605 if (dev->real_device.config_fd) {
606 close(dev->real_device.config_fd);
607 dev->real_device.config_fd = 0;
610 pci_unregister_device(&dev->dev, 1);
611 #ifdef KVM_CAP_IRQ_ROUTING
612 free_dev_irq_entries(dev);
613 #endif
614 adev->assigned_dev = dev = NULL;
617 LIST_REMOVE(adev, next);
618 qemu_free(adev);
621 static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
623 return (uint32_t)bus << 8 | (uint32_t)devfn;
626 static int assign_device(AssignedDevInfo *adev)
628 struct kvm_assigned_pci_dev assigned_dev_data;
629 AssignedDevice *dev = adev->assigned_dev;
630 int r;
632 memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
633 assigned_dev_data.assigned_dev_id =
634 calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
635 assigned_dev_data.busnr = dev->h_busnr;
636 assigned_dev_data.devfn = dev->h_devfn;
638 #ifdef KVM_CAP_IOMMU
639 /* We always enable the IOMMU if present
640 * (or when not disabled on the command line)
642 r = kvm_check_extension(kvm_state, KVM_CAP_IOMMU);
643 if (r && !adev->disable_iommu)
644 assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
645 #endif
647 r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
648 if (r < 0)
649 fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
650 adev->name, strerror(-r));
651 return r;
654 static int assign_irq(AssignedDevInfo *adev)
656 struct kvm_assigned_irq assigned_irq_data;
657 AssignedDevice *dev = adev->assigned_dev;
658 int irq, r = 0;
660 /* Interrupt PIN 0 means don't use INTx */
661 if (pci_read_byte(dev->pdev, PCI_INTERRUPT_PIN) == 0)
662 return 0;
664 irq = pci_map_irq(&dev->dev, dev->intpin);
665 irq = piix_get_irq(irq);
667 #ifdef TARGET_IA64
668 irq = ipf_map_irq(&dev->dev, irq);
669 #endif
671 if (dev->girq == irq)
672 return r;
674 memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
675 assigned_irq_data.assigned_dev_id =
676 calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
677 assigned_irq_data.guest_irq = irq;
678 assigned_irq_data.host_irq = dev->real_device.irq;
679 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
680 if (dev->irq_requested_type) {
681 assigned_irq_data.flags = dev->irq_requested_type;
682 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
683 /* -ENXIO means no assigned irq */
684 if (r && r != -ENXIO)
685 perror("assign_irq: deassign");
688 assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX;
689 if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
690 assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_MSI;
691 else
692 assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_INTX;
693 #endif
695 r = kvm_assign_irq(kvm_context, &assigned_irq_data);
696 if (r < 0) {
697 fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
698 adev->name, strerror(-r));
699 fprintf(stderr, "Perhaps you are assigning a device "
700 "that shares an IRQ with another device?\n");
701 return r;
704 dev->girq = irq;
705 dev->irq_requested_type = assigned_irq_data.flags;
706 return r;
709 static void deassign_device(AssignedDevInfo *adev)
711 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
712 struct kvm_assigned_pci_dev assigned_dev_data;
713 AssignedDevice *dev = adev->assigned_dev;
714 int r;
716 memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
717 assigned_dev_data.assigned_dev_id =
718 calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
720 r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data);
721 if (r < 0)
722 fprintf(stderr, "Failed to deassign device \"%s\" : %s\n",
723 adev->name, strerror(-r));
724 #endif
727 void remove_assigned_device(AssignedDevInfo *adev)
729 deassign_device(adev);
730 free_assigned_device(adev);
733 AssignedDevInfo *get_assigned_device(int pcibus, int slot)
735 AssignedDevice *assigned_dev = NULL;
736 AssignedDevInfo *adev = NULL;
738 LIST_FOREACH(adev, &adev_head, next) {
739 assigned_dev = adev->assigned_dev;
740 if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
741 PCI_SLOT(assigned_dev->dev.devfn) == slot)
742 return adev;
745 return NULL;
748 /* The pci config space got updated. Check if irq numbers have changed
749 * for our devices
751 void assigned_dev_update_irqs()
753 AssignedDevInfo *adev;
755 adev = LIST_FIRST(&adev_head);
756 while (adev) {
757 AssignedDevInfo *next = LIST_NEXT(adev, next);
758 int r;
760 r = assign_irq(adev);
761 if (r < 0)
762 remove_assigned_device(adev);
764 adev = next;
768 #ifdef KVM_CAP_IRQ_ROUTING
770 #ifdef KVM_CAP_DEVICE_MSI
771 static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
773 struct kvm_assigned_irq assigned_irq_data;
774 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
775 uint8_t ctrl_byte = pci_dev->config[ctrl_pos];
776 int r;
778 memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
779 assigned_irq_data.assigned_dev_id =
780 calc_assigned_dev_id(assigned_dev->h_busnr,
781 (uint8_t)assigned_dev->h_devfn);
783 if (assigned_dev->irq_requested_type) {
784 assigned_irq_data.flags = assigned_dev->irq_requested_type;
785 free_dev_irq_entries(assigned_dev);
786 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
787 /* -ENXIO means no assigned irq */
788 if (r && r != -ENXIO)
789 perror("assigned_dev_update_msi: deassign irq");
792 if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
793 assigned_dev->entry = calloc(1, sizeof(struct kvm_irq_routing_entry));
794 if (!assigned_dev->entry) {
795 perror("assigned_dev_update_msi: ");
796 return;
798 assigned_dev->entry->u.msi.address_lo =
799 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
800 PCI_MSI_ADDRESS_LO);
801 assigned_dev->entry->u.msi.address_hi = 0;
802 assigned_dev->entry->u.msi.data = *(uint16_t *)(pci_dev->config +
803 pci_dev->cap.start + PCI_MSI_DATA_32);
804 assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
805 r = kvm_get_irq_route_gsi(kvm_context);
806 if (r < 0) {
807 perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
808 return;
810 assigned_dev->entry->gsi = r;
812 kvm_add_routing_entry(kvm_context, assigned_dev->entry);
813 if (kvm_commit_irq_routes(kvm_context) < 0) {
814 perror("assigned_dev_update_msi: kvm_commit_irq_routes");
815 assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED;
816 return;
818 assigned_dev->irq_entries_nr = 1;
820 assigned_irq_data.guest_irq = assigned_dev->entry->gsi;
821 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI;
822 if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0)
823 perror("assigned_dev_enable_msi: assign irq");
825 assigned_dev->irq_requested_type = assigned_irq_data.flags;
828 #endif
830 #ifdef KVM_CAP_DEVICE_MSIX
831 static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
833 AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
834 u16 entries_nr = 0, entries_max_nr;
835 int pos = 0, i, r = 0;
836 u32 msg_addr, msg_upper_addr, msg_data, msg_ctrl;
837 struct kvm_assigned_msix_nr msix_nr;
838 struct kvm_assigned_msix_entry msix_entry;
839 void *va = adev->msix_table_page;
841 if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
842 pos = pci_dev->cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH;
843 else
844 pos = pci_dev->cap.start;
846 entries_max_nr = pci_dev->config[pos + 2];
847 entries_max_nr &= PCI_MSIX_TABSIZE;
848 entries_max_nr += 1;
850 /* Get the usable entry number for allocating */
851 for (i = 0; i < entries_max_nr; i++) {
852 memcpy(&msg_ctrl, va + i * 16 + 12, 4);
853 memcpy(&msg_data, va + i * 16 + 8, 4);
854 /* Ignore unused entry even it's unmasked */
855 if (msg_data == 0)
856 continue;
857 entries_nr ++;
860 if (entries_nr == 0) {
861 fprintf(stderr, "MSI-X entry number is zero!\n");
862 return -EINVAL;
864 msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr,
865 (uint8_t)adev->h_devfn);
866 msix_nr.entry_nr = entries_nr;
867 r = kvm_assign_set_msix_nr(kvm_context, &msix_nr);
868 if (r != 0) {
869 fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
870 strerror(-r));
871 return r;
874 free_dev_irq_entries(adev);
875 adev->irq_entries_nr = entries_nr;
876 adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
877 if (!adev->entry) {
878 perror("assigned_dev_update_msix_mmio: ");
879 return -errno;
882 msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
883 entries_nr = 0;
884 for (i = 0; i < entries_max_nr; i++) {
885 if (entries_nr >= msix_nr.entry_nr)
886 break;
887 memcpy(&msg_ctrl, va + i * 16 + 12, 4);
888 memcpy(&msg_data, va + i * 16 + 8, 4);
889 if (msg_data == 0)
890 continue;
892 memcpy(&msg_addr, va + i * 16, 4);
893 memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
895 r = kvm_get_irq_route_gsi(kvm_context);
896 if (r < 0)
897 return r;
899 adev->entry[entries_nr].gsi = r;
900 adev->entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
901 adev->entry[entries_nr].flags = 0;
902 adev->entry[entries_nr].u.msi.address_lo = msg_addr;
903 adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
904 adev->entry[entries_nr].u.msi.data = msg_data;
905 DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
906 kvm_add_routing_entry(kvm_context, &adev->entry[entries_nr]);
908 msix_entry.gsi = adev->entry[entries_nr].gsi;
909 msix_entry.entry = i;
910 r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
911 if (r) {
912 fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
913 break;
915 DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
916 msix_entry.gsi, msix_entry.entry);
917 entries_nr ++;
920 if (r == 0 && kvm_commit_irq_routes(kvm_context) < 0) {
921 perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
922 return -EINVAL;
925 return r;
928 static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
930 struct kvm_assigned_irq assigned_irq_data;
931 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
932 uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
933 int r;
935 memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
936 assigned_irq_data.assigned_dev_id =
937 calc_assigned_dev_id(assigned_dev->h_busnr,
938 (uint8_t)assigned_dev->h_devfn);
940 if (assigned_dev->irq_requested_type) {
941 assigned_irq_data.flags = assigned_dev->irq_requested_type;
942 free_dev_irq_entries(assigned_dev);
943 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
944 /* -ENXIO means no assigned irq */
945 if (r && r != -ENXIO)
946 perror("assigned_dev_update_msix: deassign irq");
948 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX | KVM_DEV_IRQ_GUEST_MSIX;
950 if (*ctrl_word & PCI_MSIX_ENABLE) {
951 if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
952 perror("assigned_dev_update_msix_mmio");
953 return;
955 if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
956 perror("assigned_dev_enable_msix: assign irq");
957 return;
959 assigned_dev->irq_requested_type = assigned_irq_data.flags;
962 #endif
963 #endif
965 static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
966 uint32_t val, int len)
968 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
969 unsigned int pos = pci_dev->cap.start, ctrl_pos;
971 pci_default_cap_write_config(pci_dev, address, val, len);
972 #ifdef KVM_CAP_IRQ_ROUTING
973 #ifdef KVM_CAP_DEVICE_MSI
974 if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
975 ctrl_pos = pos + PCI_MSI_FLAGS;
976 if (address <= ctrl_pos && address + len > ctrl_pos)
977 assigned_dev_update_msi(pci_dev, ctrl_pos);
978 pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
980 #endif
981 #ifdef KVM_CAP_DEVICE_MSIX
982 if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
983 ctrl_pos = pos + 3;
984 if (address <= ctrl_pos && address + len > ctrl_pos) {
985 ctrl_pos--; /* control is word long */
986 assigned_dev_update_msix(pci_dev, ctrl_pos);
988 pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
990 #endif
991 #endif
992 return;
995 static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
997 AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
998 PCIRegion *pci_region = dev->real_device.regions;
999 int next_cap_pt = 0;
1001 pci_dev->cap.length = 0;
1002 #ifdef KVM_CAP_IRQ_ROUTING
1003 #ifdef KVM_CAP_DEVICE_MSI
1004 /* Expose MSI capability
1005 * MSI capability is the 1st capability in capability config */
1006 if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSI)) {
1007 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
1008 memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length],
1009 0, PCI_CAPABILITY_CONFIG_MSI_LENGTH);
1010 pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] =
1011 PCI_CAP_ID_MSI;
1012 pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
1013 next_cap_pt = 1;
1015 #endif
1016 #ifdef KVM_CAP_DEVICE_MSIX
1017 /* Expose MSI-X capability */
1018 if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX)) {
1019 int pos, entry_nr, bar_nr;
1020 u32 msix_table_entry;
1021 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
1022 memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length],
1023 0, PCI_CAPABILITY_CONFIG_MSIX_LENGTH);
1024 pos = pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX);
1025 entry_nr = pci_read_word(dev->pdev, pos + 2) & PCI_MSIX_TABSIZE;
1026 pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] = 0x11;
1027 pci_dev->config[pci_dev->cap.start +
1028 pci_dev->cap.length + 2] = entry_nr;
1029 msix_table_entry = pci_read_long(dev->pdev, pos + PCI_MSIX_TABLE);
1030 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
1031 pci_dev->cap.length + PCI_MSIX_TABLE) = msix_table_entry;
1032 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
1033 pci_dev->cap.length + PCI_MSIX_PBA) =
1034 pci_read_long(dev->pdev, pos + PCI_MSIX_PBA);
1035 bar_nr = msix_table_entry & PCI_MSIX_BIR;
1036 msix_table_entry &= ~PCI_MSIX_BIR;
1037 dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
1038 if (next_cap_pt != 0) {
1039 pci_dev->config[pci_dev->cap.start + next_cap_pt] =
1040 pci_dev->cap.start + pci_dev->cap.length;
1041 next_cap_pt += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
1042 } else
1043 next_cap_pt = 1;
1044 pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
1046 #endif
1047 #endif
1049 return 0;
1052 static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
1054 AssignedDevice *adev = opaque;
1055 unsigned int offset = addr & 0xfff;
1056 void *page = adev->msix_table_page;
1057 uint32_t val = 0;
1059 memcpy(&val, (void *)((char *)page + offset), 4);
1061 return val;
1064 static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
1066 return ((msix_mmio_readl(opaque, addr & ~3)) >>
1067 (8 * (addr & 3))) & 0xff;
1070 static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
1072 return ((msix_mmio_readl(opaque, addr & ~3)) >>
1073 (8 * (addr & 3))) & 0xffff;
1076 static void msix_mmio_writel(void *opaque,
1077 target_phys_addr_t addr, uint32_t val)
1079 AssignedDevice *adev = opaque;
1080 unsigned int offset = addr & 0xfff;
1081 void *page = adev->msix_table_page;
1083 DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
1084 addr, val);
1085 memcpy((void *)((char *)page + offset), &val, 4);
1088 static void msix_mmio_writew(void *opaque,
1089 target_phys_addr_t addr, uint32_t val)
1091 msix_mmio_writel(opaque, addr & ~3,
1092 (val & 0xffff) << (8*(addr & 3)));
1095 static void msix_mmio_writeb(void *opaque,
1096 target_phys_addr_t addr, uint32_t val)
1098 msix_mmio_writel(opaque, addr & ~3,
1099 (val & 0xff) << (8*(addr & 3)));
1102 static CPUWriteMemoryFunc *msix_mmio_write[] = {
1103 msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel
1106 static CPUReadMemoryFunc *msix_mmio_read[] = {
1107 msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
1110 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
1112 dev->msix_table_page = mmap(NULL, 0x1000,
1113 PROT_READ|PROT_WRITE,
1114 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
1115 if (dev->msix_table_page == MAP_FAILED) {
1116 fprintf(stderr, "fail allocate msix_table_page! %s\n",
1117 strerror(errno));
1118 return -EFAULT;
1120 memset(dev->msix_table_page, 0, 0x1000);
1121 dev->mmio_index = cpu_register_io_memory(
1122 msix_mmio_read, msix_mmio_write, dev);
1123 return 0;
1126 struct PCIDevice *init_assigned_device(AssignedDevInfo *adev,
1127 const char *devaddr)
1129 PCIBus *bus;
1130 int devfn;
1131 int r;
1132 AssignedDevice *dev;
1133 PCIDevice *pci_dev;
1134 struct pci_access *pacc;
1135 uint8_t e_device, e_intx;
1137 DEBUG("Registering real physical device %s (bus=%x dev=%x func=%x)\n",
1138 adev->name, adev->bus, adev->dev, adev->func);
1140 bus = pci_get_bus_devfn(&devfn, devaddr);
1141 pci_dev = pci_register_device(bus, adev->name,
1142 sizeof(AssignedDevice), devfn, assigned_dev_pci_read_config,
1143 assigned_dev_pci_write_config);
1144 dev = container_of(pci_dev, AssignedDevice, dev);
1146 if (NULL == dev) {
1147 fprintf(stderr, "%s: Error: Couldn't register real device %s\n",
1148 __func__, adev->name);
1149 return NULL;
1152 adev->assigned_dev = dev;
1154 if (get_real_device(dev, adev->bus, adev->dev, adev->func)) {
1155 fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n",
1156 __func__, adev->name);
1157 goto out;
1160 /* handle real device's MMIO/PIO BARs */
1161 if (assigned_dev_register_regions(dev->real_device.regions,
1162 dev->real_device.region_number,
1163 dev))
1164 goto out;
1166 /* handle interrupt routing */
1167 e_device = (dev->dev.devfn >> 3) & 0x1f;
1168 e_intx = dev->dev.config[0x3d] - 1;
1169 dev->intpin = e_intx;
1170 dev->run = 0;
1171 dev->girq = 0;
1172 dev->h_busnr = adev->bus;
1173 dev->h_devfn = PCI_DEVFN(adev->dev, adev->func);
1175 pacc = pci_alloc();
1176 pci_init(pacc);
1177 dev->pdev = pci_get_dev(pacc, 0, adev->bus, adev->dev, adev->func);
1179 if (pci_enable_capability_support(pci_dev, 0, NULL,
1180 assigned_device_pci_cap_write_config,
1181 assigned_device_pci_cap_init) < 0)
1182 goto assigned_out;
1184 /* assign device to guest */
1185 r = assign_device(adev);
1186 if (r < 0)
1187 goto assigned_out;
1189 /* assign irq for the device */
1190 r = assign_irq(adev);
1191 if (r < 0)
1192 goto assigned_out;
1194 /* intercept MSI-X entry page in the MMIO */
1195 if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
1196 if (assigned_dev_register_msix_mmio(dev))
1197 return NULL;
1199 return &dev->dev;
1201 assigned_out:
1202 deassign_device(adev);
1203 out:
1204 free_assigned_device(adev);
1205 return NULL;
1209 * Syntax to assign device:
1211 * -pcidevice host=bus:dev.func[,dma=none][,name=Foo]
1213 * Example:
1214 * -pcidevice host=00:13.0,dma=pvdma
1216 * dma can currently only be 'none' to disable iommu support.
1218 AssignedDevInfo *add_assigned_device(const char *arg)
1220 char device[16];
1221 char dma[6];
1222 int r;
1223 AssignedDevInfo *adev;
1225 adev = qemu_mallocz(sizeof(AssignedDevInfo));
1226 if (adev == NULL) {
1227 fprintf(stderr, "%s: Out of memory\n", __func__);
1228 return NULL;
1230 r = get_param_value(device, sizeof(device), "host", arg);
1231 if (!r)
1232 goto bad;
1234 r = pci_parse_host_devaddr(device, &adev->bus, &adev->dev, &adev->func);
1235 if (r)
1236 goto bad;
1238 r = get_param_value(adev->name, sizeof(adev->name), "name", arg);
1239 if (!r)
1240 snprintf(adev->name, sizeof(adev->name), "%s", device);
1242 #ifdef KVM_CAP_IOMMU
1243 r = get_param_value(dma, sizeof(dma), "dma", arg);
1244 if (r && !strncmp(dma, "none", 4))
1245 adev->disable_iommu = 1;
1246 #endif
1248 LIST_INSERT_HEAD(&adev_head, adev, next);
1249 return adev;
1250 bad:
1251 fprintf(stderr, "pcidevice argument parse error; "
1252 "please check the help text for usage\n");
1253 qemu_free(adev);
1254 return NULL;
1257 void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices)
1259 int i;
1261 for (i = 0; i < n_devices; i++) {
1262 struct AssignedDevInfo *adev;
1264 adev = add_assigned_device(devices[i]);
1265 if (!adev) {
1266 fprintf(stderr, "Could not add assigned device %s\n", devices[i]);
1267 exit(1);
1270 if (!init_assigned_device(adev, NULL)) {
1271 fprintf(stderr, "Failed to initialize assigned device %s\n",
1272 devices[i]);
1273 exit(1);
1278 /* Option ROM header */
1279 struct option_rom_header {
1280 uint8_t signature[2];
1281 uint8_t rom_size;
1282 uint32_t entry_point;
1283 uint8_t reserved[17];
1284 uint16_t pci_header_offset;
1285 uint16_t expansion_header_offset;
1286 } __attribute__ ((packed));
1288 /* Option ROM PCI data structure */
1289 struct option_rom_pci_header {
1290 uint8_t signature[4];
1291 uint16_t vendor_id;
1292 uint16_t device_id;
1293 uint16_t vital_product_data_offset;
1294 uint16_t structure_length;
1295 uint8_t structure_revision;
1296 uint8_t class_code[3];
1297 uint16_t image_length;
1298 uint16_t image_revision;
1299 uint8_t code_type;
1300 uint8_t indicator;
1301 uint16_t reserved;
1302 } __attribute__ ((packed));
1305 * Scan the list of Option ROMs at roms. If a suitable Option ROM is found,
1306 * allocate a ram space and copy it there. Then return its size aligned to
1307 * both 2KB and target page size.
1309 #define OPTION_ROM_ALIGN(x) (((x) + 2047) & ~2047)
1310 static int scan_option_rom(uint8_t devfn, void *roms, ram_addr_t offset)
1312 int i, size, total_size;
1313 uint8_t csum;
1314 ram_addr_t addr;
1315 struct option_rom_header *rom;
1316 struct option_rom_pci_header *pcih;
1318 rom = roms;
1320 for ( ; ; ) {
1321 /* Invalid signature means we're out of option ROMs. */
1322 if (strncmp((char *)rom->signature, "\x55\xaa", 2) ||
1323 (rom->rom_size == 0))
1324 break;
1326 size = rom->rom_size * 512;
1327 /* Invalid checksum means we're out of option ROMs. */
1328 csum = 0;
1329 for (i = 0; i < size; i++)
1330 csum += ((uint8_t *)rom)[i];
1331 if (csum != 0)
1332 break;
1334 /* Check the PCI header (if any) for a match. */
1335 pcih = (struct option_rom_pci_header *)
1336 ((char *)rom + rom->pci_header_offset);
1337 if ((rom->pci_header_offset != 0) &&
1338 !strncmp((char *)pcih->signature, "PCIR", 4))
1339 goto found;
1341 rom = (struct option_rom_header *)((char *)rom + size);
1344 return 0;
1346 found:
1347 /* The size should be both 2K-aligned and page-aligned */
1348 total_size = (TARGET_PAGE_SIZE < 2048)
1349 ? OPTION_ROM_ALIGN(size + 1)
1350 : TARGET_PAGE_ALIGN(size + 1);
1352 /* Size of all available ram space is 0x10000 (0xd0000 to 0xe0000) */
1353 if ((offset + total_size) > 0x10000u) {
1354 fprintf(stderr, "Option ROM size %x exceeds available space\n", size);
1355 return 0;
1358 addr = qemu_ram_alloc(total_size);
1359 cpu_register_physical_memory(0xd0000 + offset, total_size, addr | IO_MEM_ROM);
1361 /* Write ROM data and devfn to phys_addr */
1362 cpu_physical_memory_write_rom(0xd0000 + offset, (uint8_t *)rom, size);
1363 cpu_physical_memory_write_rom(0xd0000 + offset + size, &devfn, 1);
1365 return total_size;
1369 * Scan the assigned devices for the devices that have an option ROM, and then
1370 * load the corresponding ROM data to RAM. If an error occurs while loading an
1371 * option ROM, we just ignore that option ROM and continue with the next one.
1373 ram_addr_t assigned_dev_load_option_roms(ram_addr_t rom_base_offset)
1375 ram_addr_t offset = rom_base_offset;
1376 AssignedDevInfo *adev;
1378 LIST_FOREACH(adev, &adev_head, next) {
1379 int size, len;
1380 void *buf;
1381 FILE *fp;
1382 uint8_t i = 1;
1383 char rom_file[64];
1385 snprintf(rom_file, sizeof(rom_file),
1386 "/sys/bus/pci/devices/0000:%02x:%02x.%01x/rom",
1387 adev->bus, adev->dev, adev->func);
1389 if (access(rom_file, F_OK))
1390 continue;
1392 /* Write something to the ROM file to enable it */
1393 fp = fopen(rom_file, "wb");
1394 if (fp == NULL)
1395 continue;
1396 len = fwrite(&i, 1, 1, fp);
1397 fclose(fp);
1398 if (len != 1)
1399 continue;
1401 /* The file has to be closed and reopened, otherwise it won't work */
1402 fp = fopen(rom_file, "rb");
1403 if (fp == NULL)
1404 continue;
1406 fseek(fp, 0, SEEK_END);
1407 size = ftell(fp);
1408 fseek(fp, 0, SEEK_SET);
1410 buf = malloc(size);
1411 if (buf == NULL) {
1412 fclose(fp);
1413 continue;
1416 fread(buf, size, 1, fp);
1417 if (!feof(fp) || ferror(fp)) {
1418 free(buf);
1419 fclose(fp);
1420 continue;
1423 /* Copy ROM contents into the space backing the ROM BAR */
1424 if (adev->assigned_dev->v_addrs[PCI_ROM_SLOT].r_size >= size &&
1425 adev->assigned_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase) {
1426 mprotect(adev->assigned_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1427 size, PROT_READ | PROT_WRITE);
1428 memcpy(adev->assigned_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1429 buf, size);
1430 mprotect(adev->assigned_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1431 size, PROT_READ);
1434 /* Scan the buffer for suitable ROMs and increase the offset */
1435 offset += scan_option_rom(adev->assigned_dev->dev.devfn, buf, offset);
1437 free(buf);
1438 fclose(fp);
1441 return offset;