Merge commit '60e0df25e415b00cf35c4d214eaba9dc19aaa9e6' into upstream-merge
[qemu/qemu-dev-zwu.git] / hw / device-assignment.c
blob0f7f5aa8452bcd76bf741bab3308767aba8a804b
1 /*
2 * Copyright (c) 2007, Neocleus Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 * Assign a PCI device from the host to a guest VM.
20 * Adapted for KVM by Qumranet.
22 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
23 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
24 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
25 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
26 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <sys/io.h>
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include "qemu-kvm.h"
34 #include "hw.h"
35 #include "pc.h"
36 #include "qemu-error.h"
37 #include "console.h"
38 #include "device-assignment.h"
39 #include "loader.h"
40 #include "monitor.h"
41 #include "range.h"
42 #include <pci/header.h>
43 #include "sysemu.h"
45 /* From linux/ioport.h */
46 #define IORESOURCE_IO 0x00000100 /* Resource type */
47 #define IORESOURCE_MEM 0x00000200
48 #define IORESOURCE_IRQ 0x00000400
49 #define IORESOURCE_DMA 0x00000800
50 #define IORESOURCE_PREFETCH 0x00002000 /* No side effects */
52 /* #define DEVICE_ASSIGNMENT_DEBUG 1 */
54 #ifdef DEVICE_ASSIGNMENT_DEBUG
55 #define DEBUG(fmt, ...) \
56 do { \
57 fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
58 } while (0)
59 #else
60 #define DEBUG(fmt, ...) do { } while(0)
61 #endif
63 static void assigned_dev_load_option_rom(AssignedDevice *dev);
65 static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
67 static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
68 uint32_t address,
69 uint32_t val, int len);
71 static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
72 uint32_t address, int len);
74 /* Merge the bits set in mask from mval into val. Both val and mval are
75 * at the same addr offset, pos is the starting offset of the mask. */
76 static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr,
77 int len, uint8_t pos, uint32_t mask)
79 if (!ranges_overlap(addr, len, pos, 4)) {
80 return val;
83 if (addr >= pos) {
84 mask >>= (addr - pos) * 8;
85 } else {
86 mask <<= (pos - addr) * 8;
88 mask &= 0xffffffffU >> (4 - len) * 8;
90 val &= ~mask;
91 val |= (mval & mask);
93 return val;
96 static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
97 uint32_t addr, int len, uint32_t *val)
99 uint32_t ret = 0;
100 uint32_t offset = addr - dev_region->e_physbase;
101 int fd = dev_region->region->resource_fd;
103 if (fd >= 0) {
104 if (val) {
105 DEBUG("pwrite val=%x, len=%d, e_phys=%x, offset=%x\n",
106 *val, len, addr, offset);
107 if (pwrite(fd, val, len, offset) != len) {
108 fprintf(stderr, "%s - pwrite failed %s\n",
109 __func__, strerror(errno));
111 } else {
112 if (pread(fd, &ret, len, offset) != len) {
113 fprintf(stderr, "%s - pread failed %s\n",
114 __func__, strerror(errno));
115 ret = (1UL << (len * 8)) - 1;
117 DEBUG("pread ret=%x, len=%d, e_phys=%x, offset=%x\n",
118 ret, len, addr, offset);
120 } else {
121 uint32_t port = offset + dev_region->u.r_baseport;
123 if (val) {
124 DEBUG("out val=%x, len=%d, e_phys=%x, host=%x\n",
125 *val, len, addr, port);
126 switch (len) {
127 case 1:
128 outb(*val, port);
129 break;
130 case 2:
131 outw(*val, port);
132 break;
133 case 4:
134 outl(*val, port);
135 break;
137 } else {
138 switch (len) {
139 case 1:
140 ret = inb(port);
141 break;
142 case 2:
143 ret = inw(port);
144 break;
145 case 4:
146 ret = inl(port);
147 break;
149 DEBUG("in val=%x, len=%d, e_phys=%x, host=%x\n",
150 ret, len, addr, port);
153 return ret;
156 static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
157 uint32_t value)
159 assigned_dev_ioport_rw(opaque, addr, 1, &value);
160 return;
163 static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
164 uint32_t value)
166 assigned_dev_ioport_rw(opaque, addr, 2, &value);
167 return;
170 static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
171 uint32_t value)
173 assigned_dev_ioport_rw(opaque, addr, 4, &value);
174 return;
177 static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
179 return assigned_dev_ioport_rw(opaque, addr, 1, NULL);
182 static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
184 return assigned_dev_ioport_rw(opaque, addr, 2, NULL);
187 static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
189 return assigned_dev_ioport_rw(opaque, addr, 4, NULL);
192 static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
194 AssignedDevRegion *d = opaque;
195 uint8_t *in = d->u.r_virtbase + addr;
196 uint32_t r;
198 r = *in;
199 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
201 return r;
204 static uint32_t slow_bar_readw(void *opaque, target_phys_addr_t addr)
206 AssignedDevRegion *d = opaque;
207 uint16_t *in = d->u.r_virtbase + addr;
208 uint32_t r;
210 r = *in;
211 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
213 return r;
216 static uint32_t slow_bar_readl(void *opaque, target_phys_addr_t addr)
218 AssignedDevRegion *d = opaque;
219 uint32_t *in = d->u.r_virtbase + addr;
220 uint32_t r;
222 r = *in;
223 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
225 return r;
228 static void slow_bar_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
230 AssignedDevRegion *d = opaque;
231 uint8_t *out = d->u.r_virtbase + addr;
233 DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val);
234 *out = val;
237 static void slow_bar_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
239 AssignedDevRegion *d = opaque;
240 uint16_t *out = d->u.r_virtbase + addr;
242 DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val);
243 *out = val;
246 static void slow_bar_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
248 AssignedDevRegion *d = opaque;
249 uint32_t *out = d->u.r_virtbase + addr;
251 DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val);
252 *out = val;
255 static CPUWriteMemoryFunc * const slow_bar_write[] = {
256 &slow_bar_writeb,
257 &slow_bar_writew,
258 &slow_bar_writel
261 static CPUReadMemoryFunc * const slow_bar_read[] = {
262 &slow_bar_readb,
263 &slow_bar_readw,
264 &slow_bar_readl
267 static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
268 pcibus_t e_phys, pcibus_t e_size, int type)
270 AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
271 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
272 PCIRegion *real_region = &r_dev->real_device.regions[region_num];
274 DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n",
275 e_phys, region->u.r_virtbase, type, e_size, region_num);
277 region->e_physbase = e_phys;
278 region->e_size = e_size;
280 if (e_size > 0) {
281 cpu_register_physical_memory(e_phys, e_size, region->memory_index);
283 /* deal with MSI-X MMIO page */
284 if (real_region->base_addr <= r_dev->msix_table_addr &&
285 real_region->base_addr + real_region->size >=
286 r_dev->msix_table_addr) {
287 int offset = r_dev->msix_table_addr - real_region->base_addr;
289 cpu_register_physical_memory(e_phys + offset,
290 TARGET_PAGE_SIZE, r_dev->mmio_index);
295 static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
296 pcibus_t addr, pcibus_t size, int type)
298 AssignedDevice *r_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
299 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
300 int first_map = (region->e_size == 0);
301 CPUState *env;
303 region->e_physbase = addr;
304 region->e_size = size;
306 DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n",
307 addr, region->u.r_baseport, type, size, region_num);
309 if (first_map && region->region->resource_fd < 0) {
310 struct ioperm_data *data;
312 data = qemu_mallocz(sizeof(struct ioperm_data));
313 data->start_port = region->u.r_baseport;
314 data->num = region->r_size;
315 data->turn_on = 1;
317 kvm_add_ioperm_data(data);
319 for (env = first_cpu; env; env = env->next_cpu)
320 kvm_ioperm(env, data);
323 register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,
324 (r_dev->v_addrs + region_num));
325 register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
326 (r_dev->v_addrs + region_num));
327 register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
328 (r_dev->v_addrs + region_num));
329 register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
330 (r_dev->v_addrs + region_num));
331 register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
332 (r_dev->v_addrs + region_num));
333 register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
334 (r_dev->v_addrs + region_num));
337 static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len)
339 AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
340 uint32_t val;
341 ssize_t ret;
342 int fd = pci_dev->real_device.config_fd;
344 again:
345 ret = pread(fd, &val, len, pos);
346 if (ret != len) {
347 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
348 goto again;
350 fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
351 __func__, ret, errno);
353 exit(1);
356 return val;
359 static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos)
361 return (uint8_t)assigned_dev_pci_read(d, pos, 1);
364 static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len)
366 AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
367 ssize_t ret;
368 int fd = pci_dev->real_device.config_fd;
370 again:
371 ret = pwrite(fd, &val, len, pos);
372 if (ret != len) {
373 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
374 goto again;
376 fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
377 __func__, ret, errno);
379 exit(1);
382 return;
385 static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start)
387 int id;
388 int max_cap = 48;
389 int pos = start ? start : PCI_CAPABILITY_LIST;
390 int status;
392 status = assigned_dev_pci_read_byte(d, PCI_STATUS);
393 if ((status & PCI_STATUS_CAP_LIST) == 0)
394 return 0;
396 while (max_cap--) {
397 pos = assigned_dev_pci_read_byte(d, pos);
398 if (pos < 0x40)
399 break;
401 pos &= ~3;
402 id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID);
404 if (id == 0xff)
405 break;
406 if (id == cap)
407 return pos;
409 pos += PCI_CAP_LIST_NEXT;
411 return 0;
414 static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
415 uint32_t val, int len)
417 int fd;
418 ssize_t ret;
419 AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
421 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
422 ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
423 (uint16_t) address, val, len);
425 if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) {
426 return assigned_device_pci_cap_write_config(d, address, val, len);
429 if (ranges_overlap(address, len, PCI_COMMAND, 2)) {
430 pci_default_write_config(d, address, val, len);
431 /* Continue to program the card */
435 * Catch access to
436 * - base address registers
437 * - ROM base address & capability pointer
438 * - interrupt line & pin
440 if (ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) ||
441 ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) {
442 pci_default_write_config(d, address, val, len);
443 return;
444 } else if (ranges_overlap(address, len, PCI_CAPABILITY_LIST, 1) ||
445 ranges_overlap(address, len, PCI_INTERRUPT_LINE, 2)) {
446 uint32_t real_val;
448 pci_default_write_config(d, address, val, len);
450 /* Ensure that writes to overlapping areas we don't virtualize still
451 * hit the device. */
452 real_val = assigned_dev_pci_read(d, address, len);
453 val = merge_bits(val, real_val, address, len,
454 PCI_CAPABILITY_LIST, 0xff);
455 val = merge_bits(val, real_val, address, len,
456 PCI_INTERRUPT_LINE, 0xffff);
459 DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
460 ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
461 (uint16_t) address, val, len);
463 fd = pci_dev->real_device.config_fd;
465 again:
466 ret = pwrite(fd, &val, len, address);
467 if (ret != len) {
468 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
469 goto again;
471 fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
472 __func__, ret, errno);
474 exit(1);
478 static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
479 int len)
481 uint32_t val = 0, virt_val;
482 int fd;
483 ssize_t ret;
484 AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
486 if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) {
487 val = assigned_device_pci_cap_read_config(d, address, len);
488 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
489 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
490 return val;
494 * Catch access to
495 * - vendor & device ID
496 * - base address registers
497 * - ROM base address
499 if (ranges_overlap(address, len, PCI_VENDOR_ID, 4) ||
500 ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) ||
501 ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) {
502 val = pci_default_read_config(d, address, len);
503 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
504 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
505 return val;
508 fd = pci_dev->real_device.config_fd;
510 again:
511 ret = pread(fd, &val, len, address);
512 if (ret != len) {
513 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
514 goto again;
516 fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
517 __func__, ret, errno);
519 exit(1);
522 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
523 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
525 if (pci_dev->emulate_cmd_mask) {
526 val = merge_bits(val, pci_default_read_config(d, address, len),
527 address, len, PCI_COMMAND, pci_dev->emulate_cmd_mask);
531 * Merge bits from virtualized
532 * - capability pointer
533 * - interrupt line & pin
535 virt_val = pci_default_read_config(d, address, len);
536 val = merge_bits(val, virt_val, address, len, PCI_CAPABILITY_LIST, 0xff);
537 val = merge_bits(val, virt_val, address, len, PCI_INTERRUPT_LINE, 0xffff);
539 if (!pci_dev->cap.available) {
540 /* kill the special capabilities */
541 if (address == PCI_COMMAND && len == 4) {
542 val &= ~(PCI_STATUS_CAP_LIST << 16);
543 } else if (address == PCI_STATUS) {
544 val &= ~PCI_STATUS_CAP_LIST;
548 return val;
551 static int assigned_dev_register_regions(PCIRegion *io_regions,
552 unsigned long regions_num,
553 AssignedDevice *pci_dev)
555 uint32_t i;
556 PCIRegion *cur_region = io_regions;
558 for (i = 0; i < regions_num; i++, cur_region++) {
559 if (!cur_region->valid)
560 continue;
561 pci_dev->v_addrs[i].num = i;
563 /* handle memory io regions */
564 if (cur_region->type & IORESOURCE_MEM) {
565 int t = cur_region->type & IORESOURCE_PREFETCH
566 ? PCI_BASE_ADDRESS_MEM_PREFETCH
567 : PCI_BASE_ADDRESS_SPACE_MEMORY;
569 /* map physical memory */
570 pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
571 pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size,
572 PROT_WRITE | PROT_READ,
573 MAP_SHARED,
574 cur_region->resource_fd,
575 (off_t)0);
577 if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) {
578 pci_dev->v_addrs[i].u.r_virtbase = NULL;
579 fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
580 "\n", __func__,
581 (uint32_t) (cur_region->base_addr));
582 return -1;
585 pci_dev->v_addrs[i].r_size = cur_region->size;
586 pci_dev->v_addrs[i].e_size = 0;
588 /* add offset */
589 pci_dev->v_addrs[i].u.r_virtbase +=
590 (cur_region->base_addr & 0xFFF);
592 if (cur_region->size & 0xFFF) {
593 fprintf(stderr, "PCI region %d at address 0x%llx "
594 "has size 0x%x, which is not a multiple of 4K. "
595 "You might experience some performance hit "
596 "due to that.\n",
597 i, (unsigned long long)cur_region->base_addr,
598 cur_region->size);
599 pci_dev->v_addrs[i].memory_index =
600 cpu_register_io_memory(slow_bar_read, slow_bar_write,
601 &pci_dev->v_addrs[i],
602 DEVICE_NATIVE_ENDIAN);
603 } else {
604 void *virtbase = pci_dev->v_addrs[i].u.r_virtbase;
605 char name[32];
606 snprintf(name, sizeof(name), "%s.bar%d",
607 pci_dev->dev.qdev.info->name, i);
608 pci_dev->v_addrs[i].memory_index =
609 qemu_ram_alloc_from_ptr(
610 &pci_dev->dev.qdev,
611 name, cur_region->size,
612 virtbase);
615 pci_register_bar((PCIDevice *) pci_dev, i, cur_region->size, t,
616 assigned_dev_iomem_map);
617 continue;
618 } else {
619 /* handle port io regions */
620 uint32_t val;
621 int ret;
623 /* Test kernel support for ioport resource read/write. Old
624 * kernels return EIO. New kernels only allow 1/2/4 byte reads
625 * so should return EINVAL for a 3 byte read */
626 ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0);
627 if (ret == 3) {
628 fprintf(stderr, "I/O port resource supports 3 byte read?!\n");
629 abort();
630 } else if (errno != EINVAL) {
631 fprintf(stderr, "Using raw in/out ioport access (sysfs - %s)\n",
632 strerror(errno));
633 close(pci_dev->v_addrs[i].region->resource_fd);
634 pci_dev->v_addrs[i].region->resource_fd = -1;
637 pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
638 pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
639 pci_dev->v_addrs[i].r_size = cur_region->size;
640 pci_dev->v_addrs[i].e_size = 0;
642 pci_register_bar((PCIDevice *) pci_dev, i,
643 cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
644 assigned_dev_ioport_map);
646 /* not relevant for port io */
647 pci_dev->v_addrs[i].memory_index = 0;
651 /* success */
652 return 0;
655 static int get_real_id(const char *devpath, const char *idname, uint16_t *val)
657 FILE *f;
658 char name[128];
659 long id;
661 snprintf(name, sizeof(name), "%s%s", devpath, idname);
662 f = fopen(name, "r");
663 if (f == NULL) {
664 fprintf(stderr, "%s: %s: %m\n", __func__, name);
665 return -1;
667 if (fscanf(f, "%li\n", &id) == 1) {
668 *val = id;
669 } else {
670 return -1;
672 fclose(f);
674 return 0;
677 static int get_real_vendor_id(const char *devpath, uint16_t *val)
679 return get_real_id(devpath, "vendor", val);
682 static int get_real_device_id(const char *devpath, uint16_t *val)
684 return get_real_id(devpath, "device", val);
687 static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg,
688 uint8_t r_bus, uint8_t r_dev, uint8_t r_func)
690 char dir[128], name[128];
691 int fd, r = 0, v;
692 FILE *f;
693 unsigned long long start, end, size, flags;
694 uint16_t id;
695 struct stat statbuf;
696 PCIRegion *rp;
697 PCIDevRegions *dev = &pci_dev->real_device;
699 dev->region_number = 0;
701 snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
702 r_seg, r_bus, r_dev, r_func);
704 snprintf(name, sizeof(name), "%sconfig", dir);
706 if (pci_dev->configfd_name && *pci_dev->configfd_name) {
707 if (qemu_isdigit(pci_dev->configfd_name[0])) {
708 dev->config_fd = strtol(pci_dev->configfd_name, NULL, 0);
709 } else {
710 dev->config_fd = monitor_get_fd(cur_mon, pci_dev->configfd_name);
711 if (dev->config_fd < 0) {
712 fprintf(stderr, "%s: (%s) unkown\n", __func__,
713 pci_dev->configfd_name);
714 return 1;
717 } else {
718 dev->config_fd = open(name, O_RDWR);
720 if (dev->config_fd == -1) {
721 fprintf(stderr, "%s: %s: %m\n", __func__, name);
722 return 1;
725 again:
726 r = read(dev->config_fd, pci_dev->dev.config,
727 pci_config_size(&pci_dev->dev));
728 if (r < 0) {
729 if (errno == EINTR || errno == EAGAIN)
730 goto again;
731 fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
734 /* Clear host resource mapping info. If we choose not to register a
735 * BAR, such as might be the case with the option ROM, we can get
736 * confusing, unwritable, residual addresses from the host here. */
737 memset(&pci_dev->dev.config[PCI_BASE_ADDRESS_0], 0, 24);
738 memset(&pci_dev->dev.config[PCI_ROM_ADDRESS], 0, 4);
740 snprintf(name, sizeof(name), "%sresource", dir);
742 f = fopen(name, "r");
743 if (f == NULL) {
744 fprintf(stderr, "%s: %s: %m\n", __func__, name);
745 return 1;
748 for (r = 0; r < PCI_ROM_SLOT; r++) {
749 if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3)
750 break;
752 rp = dev->regions + r;
753 rp->valid = 0;
754 rp->resource_fd = -1;
755 size = end - start + 1;
756 flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
757 if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
758 continue;
759 if (flags & IORESOURCE_MEM) {
760 flags &= ~IORESOURCE_IO;
761 } else {
762 flags &= ~IORESOURCE_PREFETCH;
764 snprintf(name, sizeof(name), "%sresource%d", dir, r);
765 fd = open(name, O_RDWR);
766 if (fd == -1)
767 continue;
768 rp->resource_fd = fd;
770 rp->type = flags;
771 rp->valid = 1;
772 rp->base_addr = start;
773 rp->size = size;
774 pci_dev->v_addrs[r].region = rp;
775 DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
776 r, rp->size, start, rp->type, rp->resource_fd);
779 fclose(f);
781 /* read and fill vendor ID */
782 v = get_real_vendor_id(dir, &id);
783 if (v) {
784 return 1;
786 pci_dev->dev.config[0] = id & 0xff;
787 pci_dev->dev.config[1] = (id & 0xff00) >> 8;
789 /* read and fill device ID */
790 v = get_real_device_id(dir, &id);
791 if (v) {
792 return 1;
794 pci_dev->dev.config[2] = id & 0xff;
795 pci_dev->dev.config[3] = (id & 0xff00) >> 8;
797 /* dealing with virtual function device */
798 snprintf(name, sizeof(name), "%sphysfn/", dir);
799 if (!stat(name, &statbuf)) {
800 pci_dev->emulate_cmd_mask = 0xffff;
803 dev->region_number = r;
804 return 0;
807 static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs);
809 #ifdef KVM_CAP_IRQ_ROUTING
810 static void free_dev_irq_entries(AssignedDevice *dev)
812 int i;
814 for (i = 0; i < dev->irq_entries_nr; i++)
815 kvm_del_routing_entry(&dev->entry[i]);
816 free(dev->entry);
817 dev->entry = NULL;
818 dev->irq_entries_nr = 0;
820 #endif
822 static void free_assigned_device(AssignedDevice *dev)
824 int i;
826 for (i = 0; i < dev->real_device.region_number; i++) {
827 PCIRegion *pci_region = &dev->real_device.regions[i];
828 AssignedDevRegion *region = &dev->v_addrs[i];
830 if (!pci_region->valid) {
831 continue;
833 if (pci_region->type & IORESOURCE_IO) {
834 if (pci_region->resource_fd < 0) {
835 kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
837 } else if (pci_region->type & IORESOURCE_MEM) {
838 if (region->u.r_virtbase) {
839 if (region->e_size > 0) {
840 cpu_register_physical_memory(region->e_physbase,
841 region->e_size,
842 IO_MEM_UNASSIGNED);
844 if (region->r_size & 0xFFF) {
845 cpu_unregister_io_memory(region->memory_index);
846 } else {
847 qemu_ram_unmap(region->memory_index);
849 if (munmap(region->u.r_virtbase,
850 (pci_region->size + 0xFFF) & 0xFFFFF000)) {
851 fprintf(stderr,
852 "Failed to unmap assigned device region: %s\n",
853 strerror(errno));
857 if (pci_region->resource_fd >= 0) {
858 close(pci_region->resource_fd);
862 if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
863 assigned_dev_unregister_msix_mmio(dev);
865 if (dev->real_device.config_fd >= 0) {
866 close(dev->real_device.config_fd);
869 #ifdef KVM_CAP_IRQ_ROUTING
870 free_dev_irq_entries(dev);
871 #endif
874 static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
876 return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
879 static void assign_failed_examine(AssignedDevice *dev)
881 char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
882 uint16_t vendor_id, device_id;
883 int r;
885 sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
886 dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
888 sprintf(name, "%sdriver", dir);
890 r = readlink(name, driver, sizeof(driver));
891 if ((r <= 0) || r >= sizeof(driver) || !(ns = strrchr(driver, '/'))) {
892 goto fail;
895 ns++;
897 if (get_real_vendor_id(dir, &vendor_id) ||
898 get_real_device_id(dir, &device_id)) {
899 goto fail;
902 fprintf(stderr, "*** The driver '%s' is occupying your device "
903 "%04x:%02x:%02x.%x.\n",
904 ns, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
905 fprintf(stderr, "***\n");
906 fprintf(stderr, "*** You can try the following commands to free it:\n");
907 fprintf(stderr, "***\n");
908 fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
909 "new_id\n", vendor_id, device_id);
910 fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
911 "%s/unbind\n",
912 dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func, ns);
913 fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
914 "pci-stub/bind\n",
915 dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
916 fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
917 "/remove_id\n", vendor_id, device_id);
918 fprintf(stderr, "***\n");
920 return;
922 fail:
923 fprintf(stderr, "Couldn't find out why.\n");
926 static int assign_device(AssignedDevice *dev)
928 struct kvm_assigned_pci_dev assigned_dev_data;
929 int r;
931 #ifdef KVM_CAP_PCI_SEGMENT
932 /* Only pass non-zero PCI segment to capable module */
933 if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) &&
934 dev->h_segnr) {
935 fprintf(stderr, "Can't assign device inside non-zero PCI segment "
936 "as this KVM module doesn't support it.\n");
937 return -ENODEV;
939 #endif
941 memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
942 assigned_dev_data.assigned_dev_id =
943 calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
944 #ifdef KVM_CAP_PCI_SEGMENT
945 assigned_dev_data.segnr = dev->h_segnr;
946 #endif
947 assigned_dev_data.busnr = dev->h_busnr;
948 assigned_dev_data.devfn = dev->h_devfn;
950 #ifdef KVM_CAP_IOMMU
951 /* We always enable the IOMMU unless disabled on the command line */
952 if (dev->features & ASSIGNED_DEVICE_USE_IOMMU_MASK) {
953 if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) {
954 fprintf(stderr, "No IOMMU found. Unable to assign device \"%s\"\n",
955 dev->dev.qdev.id);
956 return -ENODEV;
958 assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
960 #else
961 dev->features &= ~ASSIGNED_DEVICE_USE_IOMMU_MASK;
962 #endif
963 if (!(dev->features & ASSIGNED_DEVICE_USE_IOMMU_MASK)) {
964 fprintf(stderr,
965 "WARNING: Assigning a device without IOMMU protection can "
966 "cause host memory corruption if the device issues DMA write "
967 "requests!\n");
970 r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
971 if (r < 0) {
972 fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
973 dev->dev.qdev.id, strerror(-r));
975 switch (r) {
976 case -EBUSY:
977 assign_failed_examine(dev);
978 break;
979 default:
980 break;
983 return r;
986 static int assign_irq(AssignedDevice *dev)
988 struct kvm_assigned_irq assigned_irq_data;
989 int irq, r = 0;
991 /* Interrupt PIN 0 means don't use INTx */
992 if (assigned_dev_pci_read_byte(&dev->dev, PCI_INTERRUPT_PIN) == 0)
993 return 0;
995 irq = pci_map_irq(&dev->dev, dev->intpin);
996 irq = piix_get_irq(irq);
998 #ifdef TARGET_IA64
999 irq = ipf_map_irq(&dev->dev, irq);
1000 #endif
1002 if (dev->girq == irq)
1003 return r;
1005 memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
1006 assigned_irq_data.assigned_dev_id =
1007 calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
1008 assigned_irq_data.guest_irq = irq;
1009 assigned_irq_data.host_irq = dev->real_device.irq;
1010 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1011 if (dev->irq_requested_type) {
1012 assigned_irq_data.flags = dev->irq_requested_type;
1013 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
1014 /* -ENXIO means no assigned irq */
1015 if (r && r != -ENXIO)
1016 perror("assign_irq: deassign");
1019 assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX;
1020 if (dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK &&
1021 dev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
1022 assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_MSI;
1023 else
1024 assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_INTX;
1025 #endif
1027 r = kvm_assign_irq(kvm_context, &assigned_irq_data);
1028 if (r < 0) {
1029 fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
1030 dev->dev.qdev.id, strerror(-r));
1031 fprintf(stderr, "Perhaps you are assigning a device "
1032 "that shares an IRQ with another device?\n");
1033 return r;
1036 dev->girq = irq;
1037 dev->irq_requested_type = assigned_irq_data.flags;
1038 return r;
1041 static void deassign_device(AssignedDevice *dev)
1043 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1044 struct kvm_assigned_pci_dev assigned_dev_data;
1045 int r;
1047 memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
1048 assigned_dev_data.assigned_dev_id =
1049 calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
1051 r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data);
1052 if (r < 0)
1053 fprintf(stderr, "Failed to deassign device \"%s\" : %s\n",
1054 dev->dev.qdev.id, strerror(-r));
1055 #endif
1058 #if 0
1059 AssignedDevInfo *get_assigned_device(int pcibus, int slot)
1061 AssignedDevice *assigned_dev = NULL;
1062 AssignedDevInfo *adev = NULL;
1064 QLIST_FOREACH(adev, &adev_head, next) {
1065 assigned_dev = adev->assigned_dev;
1066 if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
1067 PCI_SLOT(assigned_dev->dev.devfn) == slot)
1068 return adev;
1071 return NULL;
1073 #endif
1075 /* The pci config space got updated. Check if irq numbers have changed
1076 * for our devices
1078 void assigned_dev_update_irqs(void)
1080 AssignedDevice *dev, *next;
1081 int r;
1083 dev = QLIST_FIRST(&devs);
1084 while (dev) {
1085 next = QLIST_NEXT(dev, next);
1086 r = assign_irq(dev);
1087 if (r < 0)
1088 qdev_unplug(&dev->dev.qdev);
1089 dev = next;
1093 #ifdef KVM_CAP_IRQ_ROUTING
1095 #ifdef KVM_CAP_DEVICE_MSI
1096 static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
1098 struct kvm_assigned_irq assigned_irq_data;
1099 AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1100 uint8_t ctrl_byte = pci_dev->config[ctrl_pos];
1101 int r;
1103 memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
1104 assigned_irq_data.assigned_dev_id =
1105 calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
1106 (uint8_t)assigned_dev->h_devfn);
1108 /* Some guests gratuitously disable MSI even if they're not using it,
1109 * try to catch this by only deassigning irqs if the guest is using
1110 * MSI or intends to start. */
1111 if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSI) ||
1112 (ctrl_byte & PCI_MSI_FLAGS_ENABLE)) {
1114 assigned_irq_data.flags = assigned_dev->irq_requested_type;
1115 free_dev_irq_entries(assigned_dev);
1116 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
1117 /* -ENXIO means no assigned irq */
1118 if (r && r != -ENXIO)
1119 perror("assigned_dev_update_msi: deassign irq");
1121 assigned_dev->irq_requested_type = 0;
1124 if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
1125 int pos = ctrl_pos - PCI_MSI_FLAGS;
1126 assigned_dev->entry = qemu_mallocz(sizeof(*(assigned_dev->entry)));
1127 assigned_dev->entry->u.msi.address_lo =
1128 pci_get_long(pci_dev->config + pos + PCI_MSI_ADDRESS_LO);
1129 assigned_dev->entry->u.msi.address_hi = 0;
1130 assigned_dev->entry->u.msi.data =
1131 pci_get_word(pci_dev->config + pos + PCI_MSI_DATA_32);
1132 assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
1133 r = kvm_get_irq_route_gsi();
1134 if (r < 0) {
1135 perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
1136 return;
1138 assigned_dev->entry->gsi = r;
1140 kvm_add_routing_entry(assigned_dev->entry);
1141 if (kvm_commit_irq_routes() < 0) {
1142 perror("assigned_dev_update_msi: kvm_commit_irq_routes");
1143 assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED;
1144 return;
1146 assigned_dev->irq_entries_nr = 1;
1148 assigned_irq_data.guest_irq = assigned_dev->entry->gsi;
1149 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI;
1150 if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0)
1151 perror("assigned_dev_enable_msi: assign irq");
1153 assigned_dev->girq = -1;
1154 assigned_dev->irq_requested_type = assigned_irq_data.flags;
1155 } else {
1156 assign_irq(assigned_dev);
1159 #endif
1161 #ifdef KVM_CAP_DEVICE_MSIX
1162 static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
1164 AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1165 uint16_t entries_nr = 0, entries_max_nr;
1166 int pos = 0, i, r = 0;
1167 uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
1168 struct kvm_assigned_msix_nr msix_nr;
1169 struct kvm_assigned_msix_entry msix_entry;
1170 void *va = adev->msix_table_page;
1172 pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
1174 entries_max_nr = *(uint16_t *)(pci_dev->config + pos + 2);
1175 entries_max_nr &= PCI_MSIX_TABSIZE;
1176 entries_max_nr += 1;
1178 /* Get the usable entry number for allocating */
1179 for (i = 0; i < entries_max_nr; i++) {
1180 memcpy(&msg_ctrl, va + i * 16 + 12, 4);
1181 memcpy(&msg_data, va + i * 16 + 8, 4);
1182 /* Ignore unused entry even it's unmasked */
1183 if (msg_data == 0)
1184 continue;
1185 entries_nr ++;
1188 if (entries_nr == 0) {
1189 fprintf(stderr, "MSI-X entry number is zero!\n");
1190 return -EINVAL;
1192 msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
1193 (uint8_t)adev->h_devfn);
1194 msix_nr.entry_nr = entries_nr;
1195 r = kvm_assign_set_msix_nr(kvm_context, &msix_nr);
1196 if (r != 0) {
1197 fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
1198 strerror(-r));
1199 return r;
1202 free_dev_irq_entries(adev);
1203 adev->irq_entries_nr = entries_nr;
1204 adev->entry = qemu_mallocz(entries_nr * sizeof(*(adev->entry)));
1206 msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
1207 entries_nr = 0;
1208 for (i = 0; i < entries_max_nr; i++) {
1209 if (entries_nr >= msix_nr.entry_nr)
1210 break;
1211 memcpy(&msg_ctrl, va + i * 16 + 12, 4);
1212 memcpy(&msg_data, va + i * 16 + 8, 4);
1213 if (msg_data == 0)
1214 continue;
1216 memcpy(&msg_addr, va + i * 16, 4);
1217 memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
1219 r = kvm_get_irq_route_gsi();
1220 if (r < 0)
1221 return r;
1223 adev->entry[entries_nr].gsi = r;
1224 adev->entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
1225 adev->entry[entries_nr].flags = 0;
1226 adev->entry[entries_nr].u.msi.address_lo = msg_addr;
1227 adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
1228 adev->entry[entries_nr].u.msi.data = msg_data;
1229 DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
1230 kvm_add_routing_entry(&adev->entry[entries_nr]);
1232 msix_entry.gsi = adev->entry[entries_nr].gsi;
1233 msix_entry.entry = i;
1234 r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
1235 if (r) {
1236 fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
1237 break;
1239 DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
1240 msix_entry.gsi, msix_entry.entry);
1241 entries_nr ++;
1244 if (r == 0 && kvm_commit_irq_routes() < 0) {
1245 perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
1246 return -EINVAL;
1249 return r;
1252 static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
1254 struct kvm_assigned_irq assigned_irq_data;
1255 AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1256 uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
1257 int r;
1259 memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
1260 assigned_irq_data.assigned_dev_id =
1261 calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
1262 (uint8_t)assigned_dev->h_devfn);
1264 /* Some guests gratuitously disable MSIX even if they're not using it,
1265 * try to catch this by only deassigning irqs if the guest is using
1266 * MSIX or intends to start. */
1267 if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
1268 (*ctrl_word & PCI_MSIX_ENABLE)) {
1270 assigned_irq_data.flags = assigned_dev->irq_requested_type;
1271 free_dev_irq_entries(assigned_dev);
1272 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
1273 /* -ENXIO means no assigned irq */
1274 if (r && r != -ENXIO)
1275 perror("assigned_dev_update_msix: deassign irq");
1277 assigned_dev->irq_requested_type = 0;
1280 if (*ctrl_word & PCI_MSIX_ENABLE) {
1281 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
1282 KVM_DEV_IRQ_GUEST_MSIX;
1284 if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
1285 perror("assigned_dev_update_msix_mmio");
1286 return;
1288 if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
1289 perror("assigned_dev_enable_msix: assign irq");
1290 return;
1292 assigned_dev->girq = -1;
1293 assigned_dev->irq_requested_type = assigned_irq_data.flags;
1294 } else {
1295 assign_irq(assigned_dev);
1298 #endif
1299 #endif
1301 /* There can be multiple VNDR capabilities per device, we need to find the
1302 * one that starts closet to the given address without going over. */
1303 static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address)
1305 uint8_t cap, pos;
1307 for (cap = pos = 0;
1308 (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos));
1309 pos += PCI_CAP_LIST_NEXT) {
1310 if (pos <= address) {
1311 cap = MAX(pos, cap);
1314 return cap;
1317 static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
1318 uint32_t address, int len)
1320 uint8_t cap, cap_id = pci_dev->config_map[address];
1321 uint32_t val;
1323 switch (cap_id) {
1325 case PCI_CAP_ID_VPD:
1326 cap = pci_find_capability(pci_dev, cap_id);
1327 val = assigned_dev_pci_read(pci_dev, address, len);
1328 return merge_bits(val, pci_get_long(pci_dev->config + address),
1329 address, len, cap + PCI_CAP_LIST_NEXT, 0xff);
1331 case PCI_CAP_ID_VNDR:
1332 cap = find_vndr_start(pci_dev, address);
1333 val = assigned_dev_pci_read(pci_dev, address, len);
1334 return merge_bits(val, pci_get_long(pci_dev->config + address),
1335 address, len, cap + PCI_CAP_LIST_NEXT, 0xff);
1338 return pci_default_read_config(pci_dev, address, len);
1341 static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
1342 uint32_t address,
1343 uint32_t val, int len)
1345 uint8_t cap_id = pci_dev->config_map[address];
1347 pci_default_write_config(pci_dev, address, val, len);
1348 switch (cap_id) {
1349 #ifdef KVM_CAP_IRQ_ROUTING
1350 case PCI_CAP_ID_MSI:
1351 #ifdef KVM_CAP_DEVICE_MSI
1353 uint8_t cap = pci_find_capability(pci_dev, cap_id);
1354 if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) {
1355 assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS);
1358 #endif
1359 break;
1361 case PCI_CAP_ID_MSIX:
1362 #ifdef KVM_CAP_DEVICE_MSIX
1364 uint8_t cap = pci_find_capability(pci_dev, cap_id);
1365 if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) {
1366 assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS);
1369 #endif
1370 break;
1371 #endif
1373 case PCI_CAP_ID_VPD:
1374 case PCI_CAP_ID_VNDR:
1375 assigned_dev_pci_write(pci_dev, address, val, len);
1376 break;
1380 static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
1382 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1383 PCIRegion *pci_region = dev->real_device.regions;
1384 int ret, pos;
1386 /* Clear initial capabilities pointer and status copied from hw */
1387 pci_set_byte(pci_dev->config + PCI_CAPABILITY_LIST, 0);
1388 pci_set_word(pci_dev->config + PCI_STATUS,
1389 pci_get_word(pci_dev->config + PCI_STATUS) &
1390 ~PCI_STATUS_CAP_LIST);
1392 #ifdef KVM_CAP_IRQ_ROUTING
1393 #ifdef KVM_CAP_DEVICE_MSI
1394 /* Expose MSI capability
1395 * MSI capability is the 1st capability in capability config */
1396 if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0))) {
1397 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
1398 /* Only 32-bit/no-mask currently supported */
1399 if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10)) < 0) {
1400 return ret;
1403 pci_set_word(pci_dev->config + pos + PCI_MSI_FLAGS,
1404 pci_get_word(pci_dev->config + pos + PCI_MSI_FLAGS) &
1405 PCI_MSI_FLAGS_QMASK);
1406 pci_set_long(pci_dev->config + pos + PCI_MSI_ADDRESS_LO, 0);
1407 pci_set_word(pci_dev->config + pos + PCI_MSI_DATA_32, 0);
1409 /* Set writable fields */
1410 pci_set_word(pci_dev->wmask + pos + PCI_MSI_FLAGS,
1411 PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
1412 pci_set_long(pci_dev->wmask + pos + PCI_MSI_ADDRESS_LO, 0xfffffffc);
1413 pci_set_word(pci_dev->wmask + pos + PCI_MSI_DATA_32, 0xffff);
1415 #endif
1416 #ifdef KVM_CAP_DEVICE_MSIX
1417 /* Expose MSI-X capability */
1418 if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0))) {
1419 int bar_nr;
1420 uint32_t msix_table_entry;
1422 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
1423 if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos, 12)) < 0) {
1424 return ret;
1427 pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS,
1428 pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
1429 PCI_MSIX_TABSIZE);
1431 /* Only enable and function mask bits are writable */
1432 pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS,
1433 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
1435 msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE);
1436 bar_nr = msix_table_entry & PCI_MSIX_BIR;
1437 msix_table_entry &= ~PCI_MSIX_BIR;
1438 dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
1440 #endif
1441 #endif
1443 /* Minimal PM support, nothing writable, device appears to NAK changes */
1444 if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM, 0))) {
1445 uint16_t pmc;
1446 if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos,
1447 PCI_PM_SIZEOF)) < 0) {
1448 return ret;
1451 pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS);
1452 pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI);
1453 pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc);
1455 /* assign_device will bring the device up to D0, so we don't need
1456 * to worry about doing that ourselves here. */
1457 pci_set_word(pci_dev->config + pos + PCI_PM_CTRL,
1458 PCI_PM_CTRL_NO_SOFT_RESET);
1460 pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0);
1461 pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0);
1464 if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP, 0))) {
1465 uint8_t version;
1466 uint16_t type, devctl, lnkcap, lnksta;
1467 uint32_t devcap;
1468 int size = 0x3c; /* version 2 size */
1470 version = pci_get_byte(pci_dev->config + pos + PCI_EXP_FLAGS);
1471 version &= PCI_EXP_FLAGS_VERS;
1472 if (version == 1) {
1473 size = 0x14;
1474 } else if (version > 2) {
1475 fprintf(stderr, "Unsupported PCI express capability version %d\n",
1476 version);
1477 return -EINVAL;
1480 if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_EXP,
1481 pos, size)) < 0) {
1482 return ret;
1485 type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS);
1486 type = (type & PCI_EXP_FLAGS_TYPE) >> 8;
1487 if (type != PCI_EXP_TYPE_ENDPOINT &&
1488 type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) {
1489 fprintf(stderr,
1490 "Device assignment only supports endpoint assignment, "
1491 "device type %d\n", type);
1492 return -EINVAL;
1495 /* capabilities, pass existing read-only copy
1496 * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */
1498 /* device capabilities: hide FLR */
1499 devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP);
1500 devcap &= ~PCI_EXP_DEVCAP_FLR;
1501 pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap);
1503 /* device control: clear all error reporting enable bits, leaving
1504 * leaving only a few host values. Note, these are
1505 * all writable, but not passed to hw.
1507 devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL);
1508 devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) |
1509 PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN;
1510 pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl);
1511 devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME;
1512 pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl);
1514 /* Clear device status */
1515 pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0);
1517 /* Link capabilities, expose links and latencues, clear reporting */
1518 lnkcap = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKCAP);
1519 lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW |
1520 PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL |
1521 PCI_EXP_LNKCAP_L1EL);
1522 pci_set_word(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap);
1523 pci_set_word(pci_dev->wmask + pos + PCI_EXP_LNKCAP,
1524 PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_RCB |
1525 PCI_EXP_LNKCTL_CCC | PCI_EXP_LNKCTL_ES |
1526 PCI_EXP_LNKCTL_CLKREQ_EN | PCI_EXP_LNKCTL_HAWD);
1528 /* Link control, pass existing read-only copy. Should be writable? */
1530 /* Link status, only expose current speed and width */
1531 lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA);
1532 lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
1533 pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta);
1535 if (version >= 2) {
1536 /* Slot capabilities, control, status - not needed for endpoints */
1537 pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0);
1538 pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0);
1539 pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0);
1541 /* Root control, capabilities, status - not needed for endpoints */
1542 pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0);
1543 pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0);
1544 pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0);
1546 /* Device capabilities/control 2, pass existing read-only copy */
1547 /* Link control 2, pass existing read-only copy */
1551 if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX, 0))) {
1552 uint16_t cmd;
1553 uint32_t status;
1555 /* Only expose the minimum, 8 byte capability */
1556 if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8)) < 0) {
1557 return ret;
1560 /* Command register, clear upper bits, including extended modes */
1561 cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD);
1562 cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ |
1563 PCI_X_CMD_MAX_SPLIT);
1564 pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd);
1566 /* Status register, update with emulated PCI bus location, clear
1567 * error bits, leave the rest. */
1568 status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS);
1569 status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN);
1570 status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn;
1571 status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL |
1572 PCI_X_STATUS_SPL_ERR);
1573 pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status);
1576 if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0))) {
1577 /* Direct R/W passthrough */
1578 if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0) {
1579 return ret;
1583 /* Devices can have multiple vendor capabilities, get them all */
1584 for (pos = 0; (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos));
1585 pos += PCI_CAP_LIST_NEXT) {
1586 uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS);
1587 /* Direct R/W passthrough */
1588 if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VNDR,
1589 pos, len)) < 0) {
1590 return ret;
1594 return 0;
1597 static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
1599 AssignedDevice *adev = opaque;
1600 unsigned int offset = addr & 0xfff;
1601 void *page = adev->msix_table_page;
1602 uint32_t val = 0;
1604 memcpy(&val, (void *)((char *)page + offset), 4);
1606 return val;
1609 static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
1611 return ((msix_mmio_readl(opaque, addr & ~3)) >>
1612 (8 * (addr & 3))) & 0xff;
1615 static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
1617 return ((msix_mmio_readl(opaque, addr & ~3)) >>
1618 (8 * (addr & 3))) & 0xffff;
1621 static void msix_mmio_writel(void *opaque,
1622 target_phys_addr_t addr, uint32_t val)
1624 AssignedDevice *adev = opaque;
1625 unsigned int offset = addr & 0xfff;
1626 void *page = adev->msix_table_page;
1628 DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
1629 addr, val);
1630 memcpy((void *)((char *)page + offset), &val, 4);
1633 static void msix_mmio_writew(void *opaque,
1634 target_phys_addr_t addr, uint32_t val)
1636 msix_mmio_writel(opaque, addr & ~3,
1637 (val & 0xffff) << (8*(addr & 3)));
1640 static void msix_mmio_writeb(void *opaque,
1641 target_phys_addr_t addr, uint32_t val)
1643 msix_mmio_writel(opaque, addr & ~3,
1644 (val & 0xff) << (8*(addr & 3)));
1647 static CPUWriteMemoryFunc *msix_mmio_write[] = {
1648 msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel
1651 static CPUReadMemoryFunc *msix_mmio_read[] = {
1652 msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
1655 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
1657 dev->msix_table_page = mmap(NULL, 0x1000,
1658 PROT_READ|PROT_WRITE,
1659 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
1660 if (dev->msix_table_page == MAP_FAILED) {
1661 fprintf(stderr, "fail allocate msix_table_page! %s\n",
1662 strerror(errno));
1663 return -EFAULT;
1665 memset(dev->msix_table_page, 0, 0x1000);
1666 dev->mmio_index = cpu_register_io_memory(
1667 msix_mmio_read, msix_mmio_write, dev,
1668 DEVICE_NATIVE_ENDIAN);
1669 return 0;
1672 static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
1674 if (!dev->msix_table_page)
1675 return;
1677 cpu_unregister_io_memory(dev->mmio_index);
1678 dev->mmio_index = 0;
1680 if (munmap(dev->msix_table_page, 0x1000) == -1) {
1681 fprintf(stderr, "error unmapping msix_table_page! %s\n",
1682 strerror(errno));
1684 dev->msix_table_page = NULL;
1687 static const VMStateDescription vmstate_assigned_device = {
1688 .name = "pci-assign",
1689 .fields = (VMStateField []) {
1690 VMSTATE_END_OF_LIST()
1694 static void reset_assigned_device(DeviceState *dev)
1696 PCIDevice *pci_dev = DO_UPCAST(PCIDevice, qdev, dev);
1697 AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1698 char reset_file[64];
1699 const char reset[] = "1";
1700 int fd, ret;
1702 snprintf(reset_file, sizeof(reset_file),
1703 "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/reset",
1704 adev->host.seg, adev->host.bus, adev->host.dev, adev->host.func);
1707 * Issue a device reset via pci-sysfs. Note that we use write(2) here
1708 * and ignore the return value because some kernels have a bug that
1709 * returns 0 rather than bytes written on success, sending us into an
1710 * infinite retry loop using other write mechanisms.
1712 fd = open(reset_file, O_WRONLY);
1713 if (fd != -1) {
1714 ret = write(fd, reset, strlen(reset));
1715 close(fd);
1719 * When a 0 is written to the command register, the device is logically
1720 * disconnected from the PCI bus. This avoids further DMA transfers.
1722 assigned_dev_pci_write_config(pci_dev, PCI_COMMAND, 0, 2);
1725 static int assigned_initfn(struct PCIDevice *pci_dev)
1727 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1728 uint8_t e_device, e_intx;
1729 int r;
1731 if (!kvm_enabled()) {
1732 error_report("pci-assign: error: requires KVM support");
1733 return -1;
1736 if (!dev->host.seg && !dev->host.bus && !dev->host.dev && !dev->host.func) {
1737 error_report("pci-assign: error: no host device specified");
1738 return -1;
1741 if (get_real_device(dev, dev->host.seg, dev->host.bus,
1742 dev->host.dev, dev->host.func)) {
1743 error_report("pci-assign: Error: Couldn't get real device (%s)!",
1744 dev->dev.qdev.id);
1745 goto out;
1748 /* handle real device's MMIO/PIO BARs */
1749 if (assigned_dev_register_regions(dev->real_device.regions,
1750 dev->real_device.region_number,
1751 dev))
1752 goto out;
1754 /* handle interrupt routing */
1755 e_device = (dev->dev.devfn >> 3) & 0x1f;
1756 e_intx = dev->dev.config[0x3d] - 1;
1757 dev->intpin = e_intx;
1758 dev->run = 0;
1759 dev->girq = -1;
1760 dev->h_segnr = dev->host.seg;
1761 dev->h_busnr = dev->host.bus;
1762 dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func);
1764 if (assigned_device_pci_cap_init(pci_dev) < 0)
1765 goto out;
1767 /* assign device to guest */
1768 r = assign_device(dev);
1769 if (r < 0)
1770 goto out;
1772 /* assign irq for the device */
1773 r = assign_irq(dev);
1774 if (r < 0)
1775 goto assigned_out;
1777 /* intercept MSI-X entry page in the MMIO */
1778 if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
1779 if (assigned_dev_register_msix_mmio(dev))
1780 goto assigned_out;
1782 assigned_dev_load_option_rom(dev);
1783 QLIST_INSERT_HEAD(&devs, dev, next);
1785 add_boot_device_path(dev->bootindex, &pci_dev->qdev, NULL);
1787 /* Register a vmsd so that we can mark it unmigratable. */
1788 vmstate_register(&dev->dev.qdev, 0, &vmstate_assigned_device, dev);
1789 register_device_unmigratable(&dev->dev.qdev,
1790 vmstate_assigned_device.name, dev);
1792 return 0;
1794 assigned_out:
1795 deassign_device(dev);
1796 out:
1797 free_assigned_device(dev);
1798 return -1;
1801 static int assigned_exitfn(struct PCIDevice *pci_dev)
1803 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1805 vmstate_unregister(&dev->dev.qdev, &vmstate_assigned_device, dev);
1806 QLIST_REMOVE(dev, next);
1807 deassign_device(dev);
1808 free_assigned_device(dev);
1809 return 0;
1812 static int parse_hostaddr(DeviceState *dev, Property *prop, const char *str)
1814 PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
1815 int rc;
1817 rc = pci_parse_host_devaddr(str, &ptr->seg, &ptr->bus, &ptr->dev, &ptr->func);
1818 if (rc != 0)
1819 return -1;
1820 return 0;
1823 static int print_hostaddr(DeviceState *dev, Property *prop, char *dest, size_t len)
1825 PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
1827 return snprintf(dest, len, "%02x:%02x.%x", ptr->bus, ptr->dev, ptr->func);
1830 PropertyInfo qdev_prop_hostaddr = {
1831 .name = "pci-hostaddr",
1832 .type = -1,
1833 .size = sizeof(PCIHostDevice),
1834 .parse = parse_hostaddr,
1835 .print = print_hostaddr,
1838 static PCIDeviceInfo assign_info = {
1839 .qdev.name = "pci-assign",
1840 .qdev.desc = "pass through host pci devices to the guest",
1841 .qdev.size = sizeof(AssignedDevice),
1842 .qdev.reset = reset_assigned_device,
1843 .init = assigned_initfn,
1844 .exit = assigned_exitfn,
1845 .config_read = assigned_dev_pci_read_config,
1846 .config_write = assigned_dev_pci_write_config,
1847 .qdev.props = (Property[]) {
1848 DEFINE_PROP("host", AssignedDevice, host, qdev_prop_hostaddr, PCIHostDevice),
1849 DEFINE_PROP_BIT("iommu", AssignedDevice, features,
1850 ASSIGNED_DEVICE_USE_IOMMU_BIT, true),
1851 DEFINE_PROP_BIT("prefer_msi", AssignedDevice, features,
1852 ASSIGNED_DEVICE_PREFER_MSI_BIT, true),
1853 DEFINE_PROP_INT32("bootindex", AssignedDevice, bootindex, -1),
1854 DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name),
1855 DEFINE_PROP_END_OF_LIST(),
1859 static void assign_register_devices(void)
1861 pci_qdev_register(&assign_info);
1864 device_init(assign_register_devices)
1867 * Scan the assigned devices for the devices that have an option ROM, and then
1868 * load the corresponding ROM data to RAM. If an error occurs while loading an
1869 * option ROM, we just ignore that option ROM and continue with the next one.
1871 static void assigned_dev_load_option_rom(AssignedDevice *dev)
1873 char name[32], rom_file[64];
1874 FILE *fp;
1875 uint8_t val;
1876 struct stat st;
1877 void *ptr;
1879 /* If loading ROM from file, pci handles it */
1880 if (dev->dev.romfile || !dev->dev.rom_bar)
1881 return;
1883 snprintf(rom_file, sizeof(rom_file),
1884 "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
1885 dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
1887 if (stat(rom_file, &st)) {
1888 return;
1891 if (access(rom_file, F_OK)) {
1892 fprintf(stderr, "pci-assign: Insufficient privileges for %s\n",
1893 rom_file);
1894 return;
1897 /* Write "1" to the ROM file to enable it */
1898 fp = fopen(rom_file, "r+");
1899 if (fp == NULL) {
1900 return;
1902 val = 1;
1903 if (fwrite(&val, 1, 1, fp) != 1) {
1904 goto close_rom;
1906 fseek(fp, 0, SEEK_SET);
1908 snprintf(name, sizeof(name), "%s.rom", dev->dev.qdev.info->name);
1909 dev->dev.rom_offset = qemu_ram_alloc(&dev->dev.qdev, name, st.st_size);
1910 ptr = qemu_get_ram_ptr(dev->dev.rom_offset);
1911 memset(ptr, 0xff, st.st_size);
1913 if (!fread(ptr, 1, st.st_size, fp)) {
1914 fprintf(stderr, "pci-assign: Cannot read from host %s\n"
1915 "\tDevice option ROM contents are probably invalid "
1916 "(check dmesg).\n\tSkip option ROM probe with rombar=0, "
1917 "or load from file with romfile=\n", rom_file);
1918 qemu_ram_free(dev->dev.rom_offset);
1919 dev->dev.rom_offset = 0;
1920 goto close_rom;
1923 pci_register_bar(&dev->dev, PCI_ROM_SLOT,
1924 st.st_size, 0, pci_map_option_rom);
1925 close_rom:
1926 /* Write "0" to disable ROM */
1927 fseek(fp, 0, SEEK_SET);
1928 val = 0;
1929 if (!fwrite(&val, 1, 1, fp)) {
1930 DEBUG("%s\n", "Failed to disable pci-sysfs rom file");
1932 fclose(fp);