2 * Copyright (c) 2007, Neocleus Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 * Assign a PCI device from the host to a guest VM.
20 * Adapted for KVM by Qumranet.
22 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
23 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
24 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
25 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
26 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
31 #include <sys/types.h>
36 #include "qemu-error.h"
38 #include "device-assignment.h"
42 #include <pci/header.h>
45 /* From linux/ioport.h */
46 #define IORESOURCE_IO 0x00000100 /* Resource type */
47 #define IORESOURCE_MEM 0x00000200
48 #define IORESOURCE_IRQ 0x00000400
49 #define IORESOURCE_DMA 0x00000800
50 #define IORESOURCE_PREFETCH 0x00002000 /* No side effects */
52 /* #define DEVICE_ASSIGNMENT_DEBUG 1 */
54 #ifdef DEVICE_ASSIGNMENT_DEBUG
55 #define DEBUG(fmt, ...) \
57 fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
60 #define DEBUG(fmt, ...) do { } while(0)
63 static void assigned_dev_load_option_rom(AssignedDevice
*dev
);
65 static void assigned_dev_unregister_msix_mmio(AssignedDevice
*dev
);
67 static void assigned_device_pci_cap_write_config(PCIDevice
*pci_dev
,
69 uint32_t val
, int len
);
71 static uint32_t assigned_device_pci_cap_read_config(PCIDevice
*pci_dev
,
72 uint32_t address
, int len
);
74 /* Merge the bits set in mask from mval into val. Both val and mval are
75 * at the same addr offset, pos is the starting offset of the mask. */
76 static uint32_t merge_bits(uint32_t val
, uint32_t mval
, uint8_t addr
,
77 int len
, uint8_t pos
, uint32_t mask
)
79 if (!ranges_overlap(addr
, len
, pos
, 4)) {
84 mask
>>= (addr
- pos
) * 8;
86 mask
<<= (pos
- addr
) * 8;
88 mask
&= 0xffffffffU
>> (4 - len
) * 8;
96 static uint32_t assigned_dev_ioport_rw(AssignedDevRegion
*dev_region
,
97 uint32_t addr
, int len
, uint32_t *val
)
100 uint32_t offset
= addr
- dev_region
->e_physbase
;
101 int fd
= dev_region
->region
->resource_fd
;
105 DEBUG("pwrite val=%x, len=%d, e_phys=%x, offset=%x\n",
106 *val
, len
, addr
, offset
);
107 if (pwrite(fd
, val
, len
, offset
) != len
) {
108 fprintf(stderr
, "%s - pwrite failed %s\n",
109 __func__
, strerror(errno
));
112 if (pread(fd
, &ret
, len
, offset
) != len
) {
113 fprintf(stderr
, "%s - pread failed %s\n",
114 __func__
, strerror(errno
));
115 ret
= (1UL << (len
* 8)) - 1;
117 DEBUG("pread ret=%x, len=%d, e_phys=%x, offset=%x\n",
118 ret
, len
, addr
, offset
);
121 uint32_t port
= offset
+ dev_region
->u
.r_baseport
;
124 DEBUG("out val=%x, len=%d, e_phys=%x, host=%x\n",
125 *val
, len
, addr
, port
);
149 DEBUG("in val=%x, len=%d, e_phys=%x, host=%x\n",
150 ret
, len
, addr
, port
);
156 static void assigned_dev_ioport_writeb(void *opaque
, uint32_t addr
,
159 assigned_dev_ioport_rw(opaque
, addr
, 1, &value
);
163 static void assigned_dev_ioport_writew(void *opaque
, uint32_t addr
,
166 assigned_dev_ioport_rw(opaque
, addr
, 2, &value
);
170 static void assigned_dev_ioport_writel(void *opaque
, uint32_t addr
,
173 assigned_dev_ioport_rw(opaque
, addr
, 4, &value
);
177 static uint32_t assigned_dev_ioport_readb(void *opaque
, uint32_t addr
)
179 return assigned_dev_ioport_rw(opaque
, addr
, 1, NULL
);
182 static uint32_t assigned_dev_ioport_readw(void *opaque
, uint32_t addr
)
184 return assigned_dev_ioport_rw(opaque
, addr
, 2, NULL
);
187 static uint32_t assigned_dev_ioport_readl(void *opaque
, uint32_t addr
)
189 return assigned_dev_ioport_rw(opaque
, addr
, 4, NULL
);
192 static uint32_t slow_bar_readb(void *opaque
, target_phys_addr_t addr
)
194 AssignedDevRegion
*d
= opaque
;
195 uint8_t *in
= d
->u
.r_virtbase
+ addr
;
199 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx
" val=0x%08x\n", addr
, r
);
204 static uint32_t slow_bar_readw(void *opaque
, target_phys_addr_t addr
)
206 AssignedDevRegion
*d
= opaque
;
207 uint16_t *in
= d
->u
.r_virtbase
+ addr
;
211 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx
" val=0x%08x\n", addr
, r
);
216 static uint32_t slow_bar_readl(void *opaque
, target_phys_addr_t addr
)
218 AssignedDevRegion
*d
= opaque
;
219 uint32_t *in
= d
->u
.r_virtbase
+ addr
;
223 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx
" val=0x%08x\n", addr
, r
);
228 static void slow_bar_writeb(void *opaque
, target_phys_addr_t addr
, uint32_t val
)
230 AssignedDevRegion
*d
= opaque
;
231 uint8_t *out
= d
->u
.r_virtbase
+ addr
;
233 DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx
" val=0x%02x\n", addr
, val
);
237 static void slow_bar_writew(void *opaque
, target_phys_addr_t addr
, uint32_t val
)
239 AssignedDevRegion
*d
= opaque
;
240 uint16_t *out
= d
->u
.r_virtbase
+ addr
;
242 DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx
" val=0x%04x\n", addr
, val
);
246 static void slow_bar_writel(void *opaque
, target_phys_addr_t addr
, uint32_t val
)
248 AssignedDevRegion
*d
= opaque
;
249 uint32_t *out
= d
->u
.r_virtbase
+ addr
;
251 DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx
" val=0x%08x\n", addr
, val
);
255 static CPUWriteMemoryFunc
* const slow_bar_write
[] = {
261 static CPUReadMemoryFunc
* const slow_bar_read
[] = {
267 static void assigned_dev_iomem_map(PCIDevice
*pci_dev
, int region_num
,
268 pcibus_t e_phys
, pcibus_t e_size
, int type
)
270 AssignedDevice
*r_dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
271 AssignedDevRegion
*region
= &r_dev
->v_addrs
[region_num
];
272 PCIRegion
*real_region
= &r_dev
->real_device
.regions
[region_num
];
274 DEBUG("e_phys=%08" FMT_PCIBUS
" r_virt=%p type=%d len=%08" FMT_PCIBUS
" region_num=%d \n",
275 e_phys
, region
->u
.r_virtbase
, type
, e_size
, region_num
);
277 region
->e_physbase
= e_phys
;
278 region
->e_size
= e_size
;
281 cpu_register_physical_memory(e_phys
, e_size
, region
->memory_index
);
283 /* deal with MSI-X MMIO page */
284 if (real_region
->base_addr
<= r_dev
->msix_table_addr
&&
285 real_region
->base_addr
+ real_region
->size
>=
286 r_dev
->msix_table_addr
) {
287 int offset
= r_dev
->msix_table_addr
- real_region
->base_addr
;
289 cpu_register_physical_memory(e_phys
+ offset
,
290 TARGET_PAGE_SIZE
, r_dev
->mmio_index
);
295 static void assigned_dev_ioport_map(PCIDevice
*pci_dev
, int region_num
,
296 pcibus_t addr
, pcibus_t size
, int type
)
298 AssignedDevice
*r_dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
299 AssignedDevRegion
*region
= &r_dev
->v_addrs
[region_num
];
300 int first_map
= (region
->e_size
== 0);
303 region
->e_physbase
= addr
;
304 region
->e_size
= size
;
306 DEBUG("e_phys=0x%" FMT_PCIBUS
" r_baseport=%x type=0x%x len=%" FMT_PCIBUS
" region_num=%d \n",
307 addr
, region
->u
.r_baseport
, type
, size
, region_num
);
309 if (first_map
&& region
->region
->resource_fd
< 0) {
310 struct ioperm_data
*data
;
312 data
= qemu_mallocz(sizeof(struct ioperm_data
));
313 data
->start_port
= region
->u
.r_baseport
;
314 data
->num
= region
->r_size
;
317 kvm_add_ioperm_data(data
);
319 for (env
= first_cpu
; env
; env
= env
->next_cpu
)
320 kvm_ioperm(env
, data
);
323 register_ioport_read(addr
, size
, 1, assigned_dev_ioport_readb
,
324 (r_dev
->v_addrs
+ region_num
));
325 register_ioport_read(addr
, size
, 2, assigned_dev_ioport_readw
,
326 (r_dev
->v_addrs
+ region_num
));
327 register_ioport_read(addr
, size
, 4, assigned_dev_ioport_readl
,
328 (r_dev
->v_addrs
+ region_num
));
329 register_ioport_write(addr
, size
, 1, assigned_dev_ioport_writeb
,
330 (r_dev
->v_addrs
+ region_num
));
331 register_ioport_write(addr
, size
, 2, assigned_dev_ioport_writew
,
332 (r_dev
->v_addrs
+ region_num
));
333 register_ioport_write(addr
, size
, 4, assigned_dev_ioport_writel
,
334 (r_dev
->v_addrs
+ region_num
));
337 static uint32_t assigned_dev_pci_read(PCIDevice
*d
, int pos
, int len
)
339 AssignedDevice
*pci_dev
= DO_UPCAST(AssignedDevice
, dev
, d
);
342 int fd
= pci_dev
->real_device
.config_fd
;
345 ret
= pread(fd
, &val
, len
, pos
);
347 if ((ret
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
350 fprintf(stderr
, "%s: pread failed, ret = %zd errno = %d\n",
351 __func__
, ret
, errno
);
359 static uint8_t assigned_dev_pci_read_byte(PCIDevice
*d
, int pos
)
361 return (uint8_t)assigned_dev_pci_read(d
, pos
, 1);
364 static void assigned_dev_pci_write(PCIDevice
*d
, int pos
, uint32_t val
, int len
)
366 AssignedDevice
*pci_dev
= DO_UPCAST(AssignedDevice
, dev
, d
);
368 int fd
= pci_dev
->real_device
.config_fd
;
371 ret
= pwrite(fd
, &val
, len
, pos
);
373 if ((ret
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
376 fprintf(stderr
, "%s: pwrite failed, ret = %zd errno = %d\n",
377 __func__
, ret
, errno
);
385 static uint8_t pci_find_cap_offset(PCIDevice
*d
, uint8_t cap
, uint8_t start
)
389 int pos
= start
? start
: PCI_CAPABILITY_LIST
;
392 status
= assigned_dev_pci_read_byte(d
, PCI_STATUS
);
393 if ((status
& PCI_STATUS_CAP_LIST
) == 0)
397 pos
= assigned_dev_pci_read_byte(d
, pos
);
402 id
= assigned_dev_pci_read_byte(d
, pos
+ PCI_CAP_LIST_ID
);
409 pos
+= PCI_CAP_LIST_NEXT
;
414 static void assigned_dev_pci_write_config(PCIDevice
*d
, uint32_t address
,
415 uint32_t val
, int len
)
419 AssignedDevice
*pci_dev
= DO_UPCAST(AssignedDevice
, dev
, d
);
421 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
422 ((d
->devfn
>> 3) & 0x1F), (d
->devfn
& 0x7),
423 (uint16_t) address
, val
, len
);
425 if (address
>= PCI_CONFIG_HEADER_SIZE
&& d
->config_map
[address
]) {
426 return assigned_device_pci_cap_write_config(d
, address
, val
, len
);
429 if (ranges_overlap(address
, len
, PCI_COMMAND
, 2)) {
430 pci_default_write_config(d
, address
, val
, len
);
431 /* Continue to program the card */
436 * - base address registers
437 * - ROM base address & capability pointer
438 * - interrupt line & pin
440 if (ranges_overlap(address
, len
, PCI_BASE_ADDRESS_0
, 24) ||
441 ranges_overlap(address
, len
, PCI_ROM_ADDRESS
, 4)) {
442 pci_default_write_config(d
, address
, val
, len
);
444 } else if (ranges_overlap(address
, len
, PCI_CAPABILITY_LIST
, 1) ||
445 ranges_overlap(address
, len
, PCI_INTERRUPT_LINE
, 2)) {
448 pci_default_write_config(d
, address
, val
, len
);
450 /* Ensure that writes to overlapping areas we don't virtualize still
452 real_val
= assigned_dev_pci_read(d
, address
, len
);
453 val
= merge_bits(val
, real_val
, address
, len
,
454 PCI_CAPABILITY_LIST
, 0xff);
455 val
= merge_bits(val
, real_val
, address
, len
,
456 PCI_INTERRUPT_LINE
, 0xffff);
459 DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
460 ((d
->devfn
>> 3) & 0x1F), (d
->devfn
& 0x7),
461 (uint16_t) address
, val
, len
);
463 fd
= pci_dev
->real_device
.config_fd
;
466 ret
= pwrite(fd
, &val
, len
, address
);
468 if ((ret
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
471 fprintf(stderr
, "%s: pwrite failed, ret = %zd errno = %d\n",
472 __func__
, ret
, errno
);
478 static uint32_t assigned_dev_pci_read_config(PCIDevice
*d
, uint32_t address
,
481 uint32_t val
= 0, virt_val
;
484 AssignedDevice
*pci_dev
= DO_UPCAST(AssignedDevice
, dev
, d
);
486 if (address
>= PCI_CONFIG_HEADER_SIZE
&& d
->config_map
[address
]) {
487 val
= assigned_device_pci_cap_read_config(d
, address
, len
);
488 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
489 (d
->devfn
>> 3) & 0x1F, (d
->devfn
& 0x7), address
, val
, len
);
495 * - vendor & device ID
496 * - base address registers
499 if (ranges_overlap(address
, len
, PCI_VENDOR_ID
, 4) ||
500 ranges_overlap(address
, len
, PCI_BASE_ADDRESS_0
, 24) ||
501 ranges_overlap(address
, len
, PCI_ROM_ADDRESS
, 4)) {
502 val
= pci_default_read_config(d
, address
, len
);
503 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
504 (d
->devfn
>> 3) & 0x1F, (d
->devfn
& 0x7), address
, val
, len
);
508 fd
= pci_dev
->real_device
.config_fd
;
511 ret
= pread(fd
, &val
, len
, address
);
513 if ((ret
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
516 fprintf(stderr
, "%s: pread failed, ret = %zd errno = %d\n",
517 __func__
, ret
, errno
);
522 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
523 (d
->devfn
>> 3) & 0x1F, (d
->devfn
& 0x7), address
, val
, len
);
525 if (pci_dev
->emulate_cmd_mask
) {
526 val
= merge_bits(val
, pci_default_read_config(d
, address
, len
),
527 address
, len
, PCI_COMMAND
, pci_dev
->emulate_cmd_mask
);
531 * Merge bits from virtualized
532 * - capability pointer
533 * - interrupt line & pin
535 virt_val
= pci_default_read_config(d
, address
, len
);
536 val
= merge_bits(val
, virt_val
, address
, len
, PCI_CAPABILITY_LIST
, 0xff);
537 val
= merge_bits(val
, virt_val
, address
, len
, PCI_INTERRUPT_LINE
, 0xffff);
539 if (!pci_dev
->cap
.available
) {
540 /* kill the special capabilities */
541 if (address
== PCI_COMMAND
&& len
== 4) {
542 val
&= ~(PCI_STATUS_CAP_LIST
<< 16);
543 } else if (address
== PCI_STATUS
) {
544 val
&= ~PCI_STATUS_CAP_LIST
;
551 static int assigned_dev_register_regions(PCIRegion
*io_regions
,
552 unsigned long regions_num
,
553 AssignedDevice
*pci_dev
)
556 PCIRegion
*cur_region
= io_regions
;
558 for (i
= 0; i
< regions_num
; i
++, cur_region
++) {
559 if (!cur_region
->valid
)
561 pci_dev
->v_addrs
[i
].num
= i
;
563 /* handle memory io regions */
564 if (cur_region
->type
& IORESOURCE_MEM
) {
565 int t
= cur_region
->type
& IORESOURCE_PREFETCH
566 ? PCI_BASE_ADDRESS_MEM_PREFETCH
567 : PCI_BASE_ADDRESS_SPACE_MEMORY
;
569 /* map physical memory */
570 pci_dev
->v_addrs
[i
].e_physbase
= cur_region
->base_addr
;
571 pci_dev
->v_addrs
[i
].u
.r_virtbase
= mmap(NULL
, cur_region
->size
,
572 PROT_WRITE
| PROT_READ
,
574 cur_region
->resource_fd
,
577 if (pci_dev
->v_addrs
[i
].u
.r_virtbase
== MAP_FAILED
) {
578 pci_dev
->v_addrs
[i
].u
.r_virtbase
= NULL
;
579 fprintf(stderr
, "%s: Error: Couldn't mmap 0x%x!"
581 (uint32_t) (cur_region
->base_addr
));
585 pci_dev
->v_addrs
[i
].r_size
= cur_region
->size
;
586 pci_dev
->v_addrs
[i
].e_size
= 0;
589 pci_dev
->v_addrs
[i
].u
.r_virtbase
+=
590 (cur_region
->base_addr
& 0xFFF);
592 if (cur_region
->size
& 0xFFF) {
593 fprintf(stderr
, "PCI region %d at address 0x%llx "
594 "has size 0x%x, which is not a multiple of 4K. "
595 "You might experience some performance hit "
597 i
, (unsigned long long)cur_region
->base_addr
,
599 pci_dev
->v_addrs
[i
].memory_index
=
600 cpu_register_io_memory(slow_bar_read
, slow_bar_write
,
601 &pci_dev
->v_addrs
[i
],
602 DEVICE_NATIVE_ENDIAN
);
604 void *virtbase
= pci_dev
->v_addrs
[i
].u
.r_virtbase
;
606 snprintf(name
, sizeof(name
), "%s.bar%d",
607 pci_dev
->dev
.qdev
.info
->name
, i
);
608 pci_dev
->v_addrs
[i
].memory_index
=
609 qemu_ram_alloc_from_ptr(
611 name
, cur_region
->size
,
615 pci_register_bar((PCIDevice
*) pci_dev
, i
, cur_region
->size
, t
,
616 assigned_dev_iomem_map
);
619 /* handle port io regions */
623 /* Test kernel support for ioport resource read/write. Old
624 * kernels return EIO. New kernels only allow 1/2/4 byte reads
625 * so should return EINVAL for a 3 byte read */
626 ret
= pread(pci_dev
->v_addrs
[i
].region
->resource_fd
, &val
, 3, 0);
628 fprintf(stderr
, "I/O port resource supports 3 byte read?!\n");
630 } else if (errno
!= EINVAL
) {
631 fprintf(stderr
, "Using raw in/out ioport access (sysfs - %s)\n",
633 close(pci_dev
->v_addrs
[i
].region
->resource_fd
);
634 pci_dev
->v_addrs
[i
].region
->resource_fd
= -1;
637 pci_dev
->v_addrs
[i
].e_physbase
= cur_region
->base_addr
;
638 pci_dev
->v_addrs
[i
].u
.r_baseport
= cur_region
->base_addr
;
639 pci_dev
->v_addrs
[i
].r_size
= cur_region
->size
;
640 pci_dev
->v_addrs
[i
].e_size
= 0;
642 pci_register_bar((PCIDevice
*) pci_dev
, i
,
643 cur_region
->size
, PCI_BASE_ADDRESS_SPACE_IO
,
644 assigned_dev_ioport_map
);
646 /* not relevant for port io */
647 pci_dev
->v_addrs
[i
].memory_index
= 0;
655 static int get_real_id(const char *devpath
, const char *idname
, uint16_t *val
)
661 snprintf(name
, sizeof(name
), "%s%s", devpath
, idname
);
662 f
= fopen(name
, "r");
664 fprintf(stderr
, "%s: %s: %m\n", __func__
, name
);
667 if (fscanf(f
, "%li\n", &id
) == 1) {
677 static int get_real_vendor_id(const char *devpath
, uint16_t *val
)
679 return get_real_id(devpath
, "vendor", val
);
682 static int get_real_device_id(const char *devpath
, uint16_t *val
)
684 return get_real_id(devpath
, "device", val
);
687 static int get_real_device(AssignedDevice
*pci_dev
, uint16_t r_seg
,
688 uint8_t r_bus
, uint8_t r_dev
, uint8_t r_func
)
690 char dir
[128], name
[128];
693 unsigned long long start
, end
, size
, flags
;
697 PCIDevRegions
*dev
= &pci_dev
->real_device
;
699 dev
->region_number
= 0;
701 snprintf(dir
, sizeof(dir
), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
702 r_seg
, r_bus
, r_dev
, r_func
);
704 snprintf(name
, sizeof(name
), "%sconfig", dir
);
706 if (pci_dev
->configfd_name
&& *pci_dev
->configfd_name
) {
707 if (qemu_isdigit(pci_dev
->configfd_name
[0])) {
708 dev
->config_fd
= strtol(pci_dev
->configfd_name
, NULL
, 0);
710 dev
->config_fd
= monitor_get_fd(cur_mon
, pci_dev
->configfd_name
);
711 if (dev
->config_fd
< 0) {
712 fprintf(stderr
, "%s: (%s) unkown\n", __func__
,
713 pci_dev
->configfd_name
);
718 dev
->config_fd
= open(name
, O_RDWR
);
720 if (dev
->config_fd
== -1) {
721 fprintf(stderr
, "%s: %s: %m\n", __func__
, name
);
726 r
= read(dev
->config_fd
, pci_dev
->dev
.config
,
727 pci_config_size(&pci_dev
->dev
));
729 if (errno
== EINTR
|| errno
== EAGAIN
)
731 fprintf(stderr
, "%s: read failed, errno = %d\n", __func__
, errno
);
734 /* Clear host resource mapping info. If we choose not to register a
735 * BAR, such as might be the case with the option ROM, we can get
736 * confusing, unwritable, residual addresses from the host here. */
737 memset(&pci_dev
->dev
.config
[PCI_BASE_ADDRESS_0
], 0, 24);
738 memset(&pci_dev
->dev
.config
[PCI_ROM_ADDRESS
], 0, 4);
740 snprintf(name
, sizeof(name
), "%sresource", dir
);
742 f
= fopen(name
, "r");
744 fprintf(stderr
, "%s: %s: %m\n", __func__
, name
);
748 for (r
= 0; r
< PCI_ROM_SLOT
; r
++) {
749 if (fscanf(f
, "%lli %lli %lli\n", &start
, &end
, &flags
) != 3)
752 rp
= dev
->regions
+ r
;
754 rp
->resource_fd
= -1;
755 size
= end
- start
+ 1;
756 flags
&= IORESOURCE_IO
| IORESOURCE_MEM
| IORESOURCE_PREFETCH
;
757 if (size
== 0 || (flags
& ~IORESOURCE_PREFETCH
) == 0)
759 if (flags
& IORESOURCE_MEM
) {
760 flags
&= ~IORESOURCE_IO
;
762 flags
&= ~IORESOURCE_PREFETCH
;
764 snprintf(name
, sizeof(name
), "%sresource%d", dir
, r
);
765 fd
= open(name
, O_RDWR
);
768 rp
->resource_fd
= fd
;
772 rp
->base_addr
= start
;
774 pci_dev
->v_addrs
[r
].region
= rp
;
775 DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
776 r
, rp
->size
, start
, rp
->type
, rp
->resource_fd
);
781 /* read and fill vendor ID */
782 v
= get_real_vendor_id(dir
, &id
);
786 pci_dev
->dev
.config
[0] = id
& 0xff;
787 pci_dev
->dev
.config
[1] = (id
& 0xff00) >> 8;
789 /* read and fill device ID */
790 v
= get_real_device_id(dir
, &id
);
794 pci_dev
->dev
.config
[2] = id
& 0xff;
795 pci_dev
->dev
.config
[3] = (id
& 0xff00) >> 8;
797 /* dealing with virtual function device */
798 snprintf(name
, sizeof(name
), "%sphysfn/", dir
);
799 if (!stat(name
, &statbuf
)) {
800 pci_dev
->emulate_cmd_mask
= 0xffff;
803 dev
->region_number
= r
;
807 static QLIST_HEAD(, AssignedDevice
) devs
= QLIST_HEAD_INITIALIZER(devs
);
809 #ifdef KVM_CAP_IRQ_ROUTING
810 static void free_dev_irq_entries(AssignedDevice
*dev
)
814 for (i
= 0; i
< dev
->irq_entries_nr
; i
++)
815 kvm_del_routing_entry(&dev
->entry
[i
]);
818 dev
->irq_entries_nr
= 0;
822 static void free_assigned_device(AssignedDevice
*dev
)
826 for (i
= 0; i
< dev
->real_device
.region_number
; i
++) {
827 PCIRegion
*pci_region
= &dev
->real_device
.regions
[i
];
828 AssignedDevRegion
*region
= &dev
->v_addrs
[i
];
830 if (!pci_region
->valid
) {
833 if (pci_region
->type
& IORESOURCE_IO
) {
834 if (pci_region
->resource_fd
< 0) {
835 kvm_remove_ioperm_data(region
->u
.r_baseport
, region
->r_size
);
837 } else if (pci_region
->type
& IORESOURCE_MEM
) {
838 if (region
->u
.r_virtbase
) {
839 if (region
->e_size
> 0) {
840 cpu_register_physical_memory(region
->e_physbase
,
844 if (region
->r_size
& 0xFFF) {
845 cpu_unregister_io_memory(region
->memory_index
);
847 qemu_ram_unmap(region
->memory_index
);
849 if (munmap(region
->u
.r_virtbase
,
850 (pci_region
->size
+ 0xFFF) & 0xFFFFF000)) {
852 "Failed to unmap assigned device region: %s\n",
857 if (pci_region
->resource_fd
>= 0) {
858 close(pci_region
->resource_fd
);
862 if (dev
->cap
.available
& ASSIGNED_DEVICE_CAP_MSIX
) {
863 assigned_dev_unregister_msix_mmio(dev
);
865 if (dev
->real_device
.config_fd
>= 0) {
866 close(dev
->real_device
.config_fd
);
869 #ifdef KVM_CAP_IRQ_ROUTING
870 free_dev_irq_entries(dev
);
874 static uint32_t calc_assigned_dev_id(uint16_t seg
, uint8_t bus
, uint8_t devfn
)
876 return (uint32_t)seg
<< 16 | (uint32_t)bus
<< 8 | (uint32_t)devfn
;
879 static void assign_failed_examine(AssignedDevice
*dev
)
881 char name
[PATH_MAX
], dir
[PATH_MAX
], driver
[PATH_MAX
] = {}, *ns
;
882 uint16_t vendor_id
, device_id
;
885 sprintf(dir
, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
886 dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
888 sprintf(name
, "%sdriver", dir
);
890 r
= readlink(name
, driver
, sizeof(driver
));
891 if ((r
<= 0) || r
>= sizeof(driver
) || !(ns
= strrchr(driver
, '/'))) {
897 if (get_real_vendor_id(dir
, &vendor_id
) ||
898 get_real_device_id(dir
, &device_id
)) {
902 fprintf(stderr
, "*** The driver '%s' is occupying your device "
903 "%04x:%02x:%02x.%x.\n",
904 ns
, dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
905 fprintf(stderr
, "***\n");
906 fprintf(stderr
, "*** You can try the following commands to free it:\n");
907 fprintf(stderr
, "***\n");
908 fprintf(stderr
, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
909 "new_id\n", vendor_id
, device_id
);
910 fprintf(stderr
, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
912 dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
, ns
);
913 fprintf(stderr
, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
915 dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
916 fprintf(stderr
, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
917 "/remove_id\n", vendor_id
, device_id
);
918 fprintf(stderr
, "***\n");
923 fprintf(stderr
, "Couldn't find out why.\n");
926 static int assign_device(AssignedDevice
*dev
)
928 struct kvm_assigned_pci_dev assigned_dev_data
;
931 #ifdef KVM_CAP_PCI_SEGMENT
932 /* Only pass non-zero PCI segment to capable module */
933 if (!kvm_check_extension(kvm_state
, KVM_CAP_PCI_SEGMENT
) &&
935 fprintf(stderr
, "Can't assign device inside non-zero PCI segment "
936 "as this KVM module doesn't support it.\n");
941 memset(&assigned_dev_data
, 0, sizeof(assigned_dev_data
));
942 assigned_dev_data
.assigned_dev_id
=
943 calc_assigned_dev_id(dev
->h_segnr
, dev
->h_busnr
, dev
->h_devfn
);
944 #ifdef KVM_CAP_PCI_SEGMENT
945 assigned_dev_data
.segnr
= dev
->h_segnr
;
947 assigned_dev_data
.busnr
= dev
->h_busnr
;
948 assigned_dev_data
.devfn
= dev
->h_devfn
;
951 /* We always enable the IOMMU unless disabled on the command line */
952 if (dev
->features
& ASSIGNED_DEVICE_USE_IOMMU_MASK
) {
953 if (!kvm_check_extension(kvm_state
, KVM_CAP_IOMMU
)) {
954 fprintf(stderr
, "No IOMMU found. Unable to assign device \"%s\"\n",
958 assigned_dev_data
.flags
|= KVM_DEV_ASSIGN_ENABLE_IOMMU
;
961 dev
->features
&= ~ASSIGNED_DEVICE_USE_IOMMU_MASK
;
963 if (!(dev
->features
& ASSIGNED_DEVICE_USE_IOMMU_MASK
)) {
965 "WARNING: Assigning a device without IOMMU protection can "
966 "cause host memory corruption if the device issues DMA write "
970 r
= kvm_assign_pci_device(kvm_context
, &assigned_dev_data
);
972 fprintf(stderr
, "Failed to assign device \"%s\" : %s\n",
973 dev
->dev
.qdev
.id
, strerror(-r
));
977 assign_failed_examine(dev
);
986 static int assign_irq(AssignedDevice
*dev
)
988 struct kvm_assigned_irq assigned_irq_data
;
991 /* Interrupt PIN 0 means don't use INTx */
992 if (assigned_dev_pci_read_byte(&dev
->dev
, PCI_INTERRUPT_PIN
) == 0)
995 irq
= pci_map_irq(&dev
->dev
, dev
->intpin
);
996 irq
= piix_get_irq(irq
);
999 irq
= ipf_map_irq(&dev
->dev
, irq
);
1002 if (dev
->girq
== irq
)
1005 memset(&assigned_irq_data
, 0, sizeof(assigned_irq_data
));
1006 assigned_irq_data
.assigned_dev_id
=
1007 calc_assigned_dev_id(dev
->h_segnr
, dev
->h_busnr
, dev
->h_devfn
);
1008 assigned_irq_data
.guest_irq
= irq
;
1009 assigned_irq_data
.host_irq
= dev
->real_device
.irq
;
1010 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
1011 if (dev
->irq_requested_type
) {
1012 assigned_irq_data
.flags
= dev
->irq_requested_type
;
1013 r
= kvm_deassign_irq(kvm_context
, &assigned_irq_data
);
1014 /* -ENXIO means no assigned irq */
1015 if (r
&& r
!= -ENXIO
)
1016 perror("assign_irq: deassign");
1019 assigned_irq_data
.flags
= KVM_DEV_IRQ_GUEST_INTX
;
1020 if (dev
->features
& ASSIGNED_DEVICE_PREFER_MSI_MASK
&&
1021 dev
->cap
.available
& ASSIGNED_DEVICE_CAP_MSI
)
1022 assigned_irq_data
.flags
|= KVM_DEV_IRQ_HOST_MSI
;
1024 assigned_irq_data
.flags
|= KVM_DEV_IRQ_HOST_INTX
;
1027 r
= kvm_assign_irq(kvm_context
, &assigned_irq_data
);
1029 fprintf(stderr
, "Failed to assign irq for \"%s\": %s\n",
1030 dev
->dev
.qdev
.id
, strerror(-r
));
1031 fprintf(stderr
, "Perhaps you are assigning a device "
1032 "that shares an IRQ with another device?\n");
1037 dev
->irq_requested_type
= assigned_irq_data
.flags
;
1041 static void deassign_device(AssignedDevice
*dev
)
1043 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
1044 struct kvm_assigned_pci_dev assigned_dev_data
;
1047 memset(&assigned_dev_data
, 0, sizeof(assigned_dev_data
));
1048 assigned_dev_data
.assigned_dev_id
=
1049 calc_assigned_dev_id(dev
->h_segnr
, dev
->h_busnr
, dev
->h_devfn
);
1051 r
= kvm_deassign_pci_device(kvm_context
, &assigned_dev_data
);
1053 fprintf(stderr
, "Failed to deassign device \"%s\" : %s\n",
1054 dev
->dev
.qdev
.id
, strerror(-r
));
1059 AssignedDevInfo
*get_assigned_device(int pcibus
, int slot
)
1061 AssignedDevice
*assigned_dev
= NULL
;
1062 AssignedDevInfo
*adev
= NULL
;
1064 QLIST_FOREACH(adev
, &adev_head
, next
) {
1065 assigned_dev
= adev
->assigned_dev
;
1066 if (pci_bus_num(assigned_dev
->dev
.bus
) == pcibus
&&
1067 PCI_SLOT(assigned_dev
->dev
.devfn
) == slot
)
1075 /* The pci config space got updated. Check if irq numbers have changed
1078 void assigned_dev_update_irqs(void)
1080 AssignedDevice
*dev
, *next
;
1083 dev
= QLIST_FIRST(&devs
);
1085 next
= QLIST_NEXT(dev
, next
);
1086 r
= assign_irq(dev
);
1088 qdev_unplug(&dev
->dev
.qdev
);
1093 #ifdef KVM_CAP_IRQ_ROUTING
1095 #ifdef KVM_CAP_DEVICE_MSI
1096 static void assigned_dev_update_msi(PCIDevice
*pci_dev
, unsigned int ctrl_pos
)
1098 struct kvm_assigned_irq assigned_irq_data
;
1099 AssignedDevice
*assigned_dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1100 uint8_t ctrl_byte
= pci_dev
->config
[ctrl_pos
];
1103 memset(&assigned_irq_data
, 0, sizeof assigned_irq_data
);
1104 assigned_irq_data
.assigned_dev_id
=
1105 calc_assigned_dev_id(assigned_dev
->h_segnr
, assigned_dev
->h_busnr
,
1106 (uint8_t)assigned_dev
->h_devfn
);
1108 /* Some guests gratuitously disable MSI even if they're not using it,
1109 * try to catch this by only deassigning irqs if the guest is using
1110 * MSI or intends to start. */
1111 if ((assigned_dev
->irq_requested_type
& KVM_DEV_IRQ_GUEST_MSI
) ||
1112 (ctrl_byte
& PCI_MSI_FLAGS_ENABLE
)) {
1114 assigned_irq_data
.flags
= assigned_dev
->irq_requested_type
;
1115 free_dev_irq_entries(assigned_dev
);
1116 r
= kvm_deassign_irq(kvm_context
, &assigned_irq_data
);
1117 /* -ENXIO means no assigned irq */
1118 if (r
&& r
!= -ENXIO
)
1119 perror("assigned_dev_update_msi: deassign irq");
1121 assigned_dev
->irq_requested_type
= 0;
1124 if (ctrl_byte
& PCI_MSI_FLAGS_ENABLE
) {
1125 int pos
= ctrl_pos
- PCI_MSI_FLAGS
;
1126 assigned_dev
->entry
= qemu_mallocz(sizeof(*(assigned_dev
->entry
)));
1127 assigned_dev
->entry
->u
.msi
.address_lo
=
1128 pci_get_long(pci_dev
->config
+ pos
+ PCI_MSI_ADDRESS_LO
);
1129 assigned_dev
->entry
->u
.msi
.address_hi
= 0;
1130 assigned_dev
->entry
->u
.msi
.data
=
1131 pci_get_word(pci_dev
->config
+ pos
+ PCI_MSI_DATA_32
);
1132 assigned_dev
->entry
->type
= KVM_IRQ_ROUTING_MSI
;
1133 r
= kvm_get_irq_route_gsi();
1135 perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
1138 assigned_dev
->entry
->gsi
= r
;
1140 kvm_add_routing_entry(assigned_dev
->entry
);
1141 if (kvm_commit_irq_routes() < 0) {
1142 perror("assigned_dev_update_msi: kvm_commit_irq_routes");
1143 assigned_dev
->cap
.state
&= ~ASSIGNED_DEVICE_MSI_ENABLED
;
1146 assigned_dev
->irq_entries_nr
= 1;
1148 assigned_irq_data
.guest_irq
= assigned_dev
->entry
->gsi
;
1149 assigned_irq_data
.flags
= KVM_DEV_IRQ_HOST_MSI
| KVM_DEV_IRQ_GUEST_MSI
;
1150 if (kvm_assign_irq(kvm_context
, &assigned_irq_data
) < 0)
1151 perror("assigned_dev_enable_msi: assign irq");
1153 assigned_dev
->girq
= -1;
1154 assigned_dev
->irq_requested_type
= assigned_irq_data
.flags
;
1156 assign_irq(assigned_dev
);
1161 #ifdef KVM_CAP_DEVICE_MSIX
1162 static int assigned_dev_update_msix_mmio(PCIDevice
*pci_dev
)
1164 AssignedDevice
*adev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1165 uint16_t entries_nr
= 0, entries_max_nr
;
1166 int pos
= 0, i
, r
= 0;
1167 uint32_t msg_addr
, msg_upper_addr
, msg_data
, msg_ctrl
;
1168 struct kvm_assigned_msix_nr msix_nr
;
1169 struct kvm_assigned_msix_entry msix_entry
;
1170 void *va
= adev
->msix_table_page
;
1172 pos
= pci_find_capability(pci_dev
, PCI_CAP_ID_MSIX
);
1174 entries_max_nr
= *(uint16_t *)(pci_dev
->config
+ pos
+ 2);
1175 entries_max_nr
&= PCI_MSIX_TABSIZE
;
1176 entries_max_nr
+= 1;
1178 /* Get the usable entry number for allocating */
1179 for (i
= 0; i
< entries_max_nr
; i
++) {
1180 memcpy(&msg_ctrl
, va
+ i
* 16 + 12, 4);
1181 memcpy(&msg_data
, va
+ i
* 16 + 8, 4);
1182 /* Ignore unused entry even it's unmasked */
1188 if (entries_nr
== 0) {
1189 fprintf(stderr
, "MSI-X entry number is zero!\n");
1192 msix_nr
.assigned_dev_id
= calc_assigned_dev_id(adev
->h_segnr
, adev
->h_busnr
,
1193 (uint8_t)adev
->h_devfn
);
1194 msix_nr
.entry_nr
= entries_nr
;
1195 r
= kvm_assign_set_msix_nr(kvm_context
, &msix_nr
);
1197 fprintf(stderr
, "fail to set MSI-X entry number for MSIX! %s\n",
1202 free_dev_irq_entries(adev
);
1203 adev
->irq_entries_nr
= entries_nr
;
1204 adev
->entry
= qemu_mallocz(entries_nr
* sizeof(*(adev
->entry
)));
1206 msix_entry
.assigned_dev_id
= msix_nr
.assigned_dev_id
;
1208 for (i
= 0; i
< entries_max_nr
; i
++) {
1209 if (entries_nr
>= msix_nr
.entry_nr
)
1211 memcpy(&msg_ctrl
, va
+ i
* 16 + 12, 4);
1212 memcpy(&msg_data
, va
+ i
* 16 + 8, 4);
1216 memcpy(&msg_addr
, va
+ i
* 16, 4);
1217 memcpy(&msg_upper_addr
, va
+ i
* 16 + 4, 4);
1219 r
= kvm_get_irq_route_gsi();
1223 adev
->entry
[entries_nr
].gsi
= r
;
1224 adev
->entry
[entries_nr
].type
= KVM_IRQ_ROUTING_MSI
;
1225 adev
->entry
[entries_nr
].flags
= 0;
1226 adev
->entry
[entries_nr
].u
.msi
.address_lo
= msg_addr
;
1227 adev
->entry
[entries_nr
].u
.msi
.address_hi
= msg_upper_addr
;
1228 adev
->entry
[entries_nr
].u
.msi
.data
= msg_data
;
1229 DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data
, msg_addr
);
1230 kvm_add_routing_entry(&adev
->entry
[entries_nr
]);
1232 msix_entry
.gsi
= adev
->entry
[entries_nr
].gsi
;
1233 msix_entry
.entry
= i
;
1234 r
= kvm_assign_set_msix_entry(kvm_context
, &msix_entry
);
1236 fprintf(stderr
, "fail to set MSI-X entry! %s\n", strerror(-r
));
1239 DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
1240 msix_entry
.gsi
, msix_entry
.entry
);
1244 if (r
== 0 && kvm_commit_irq_routes() < 0) {
1245 perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
1252 static void assigned_dev_update_msix(PCIDevice
*pci_dev
, unsigned int ctrl_pos
)
1254 struct kvm_assigned_irq assigned_irq_data
;
1255 AssignedDevice
*assigned_dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1256 uint16_t *ctrl_word
= (uint16_t *)(pci_dev
->config
+ ctrl_pos
);
1259 memset(&assigned_irq_data
, 0, sizeof assigned_irq_data
);
1260 assigned_irq_data
.assigned_dev_id
=
1261 calc_assigned_dev_id(assigned_dev
->h_segnr
, assigned_dev
->h_busnr
,
1262 (uint8_t)assigned_dev
->h_devfn
);
1264 /* Some guests gratuitously disable MSIX even if they're not using it,
1265 * try to catch this by only deassigning irqs if the guest is using
1266 * MSIX or intends to start. */
1267 if ((assigned_dev
->irq_requested_type
& KVM_DEV_IRQ_GUEST_MSIX
) ||
1268 (*ctrl_word
& PCI_MSIX_ENABLE
)) {
1270 assigned_irq_data
.flags
= assigned_dev
->irq_requested_type
;
1271 free_dev_irq_entries(assigned_dev
);
1272 r
= kvm_deassign_irq(kvm_context
, &assigned_irq_data
);
1273 /* -ENXIO means no assigned irq */
1274 if (r
&& r
!= -ENXIO
)
1275 perror("assigned_dev_update_msix: deassign irq");
1277 assigned_dev
->irq_requested_type
= 0;
1280 if (*ctrl_word
& PCI_MSIX_ENABLE
) {
1281 assigned_irq_data
.flags
= KVM_DEV_IRQ_HOST_MSIX
|
1282 KVM_DEV_IRQ_GUEST_MSIX
;
1284 if (assigned_dev_update_msix_mmio(pci_dev
) < 0) {
1285 perror("assigned_dev_update_msix_mmio");
1288 if (kvm_assign_irq(kvm_context
, &assigned_irq_data
) < 0) {
1289 perror("assigned_dev_enable_msix: assign irq");
1292 assigned_dev
->girq
= -1;
1293 assigned_dev
->irq_requested_type
= assigned_irq_data
.flags
;
1295 assign_irq(assigned_dev
);
1301 /* There can be multiple VNDR capabilities per device, we need to find the
1302 * one that starts closet to the given address without going over. */
1303 static uint8_t find_vndr_start(PCIDevice
*pci_dev
, uint32_t address
)
1308 (pos
= pci_find_cap_offset(pci_dev
, PCI_CAP_ID_VNDR
, pos
));
1309 pos
+= PCI_CAP_LIST_NEXT
) {
1310 if (pos
<= address
) {
1311 cap
= MAX(pos
, cap
);
1317 static uint32_t assigned_device_pci_cap_read_config(PCIDevice
*pci_dev
,
1318 uint32_t address
, int len
)
1320 uint8_t cap
, cap_id
= pci_dev
->config_map
[address
];
1325 case PCI_CAP_ID_VPD
:
1326 cap
= pci_find_capability(pci_dev
, cap_id
);
1327 val
= assigned_dev_pci_read(pci_dev
, address
, len
);
1328 return merge_bits(val
, pci_get_long(pci_dev
->config
+ address
),
1329 address
, len
, cap
+ PCI_CAP_LIST_NEXT
, 0xff);
1331 case PCI_CAP_ID_VNDR
:
1332 cap
= find_vndr_start(pci_dev
, address
);
1333 val
= assigned_dev_pci_read(pci_dev
, address
, len
);
1334 return merge_bits(val
, pci_get_long(pci_dev
->config
+ address
),
1335 address
, len
, cap
+ PCI_CAP_LIST_NEXT
, 0xff);
1338 return pci_default_read_config(pci_dev
, address
, len
);
1341 static void assigned_device_pci_cap_write_config(PCIDevice
*pci_dev
,
1343 uint32_t val
, int len
)
1345 uint8_t cap_id
= pci_dev
->config_map
[address
];
1347 pci_default_write_config(pci_dev
, address
, val
, len
);
1349 #ifdef KVM_CAP_IRQ_ROUTING
1350 case PCI_CAP_ID_MSI
:
1351 #ifdef KVM_CAP_DEVICE_MSI
1353 uint8_t cap
= pci_find_capability(pci_dev
, cap_id
);
1354 if (ranges_overlap(address
- cap
, len
, PCI_MSI_FLAGS
, 1)) {
1355 assigned_dev_update_msi(pci_dev
, cap
+ PCI_MSI_FLAGS
);
1361 case PCI_CAP_ID_MSIX
:
1362 #ifdef KVM_CAP_DEVICE_MSIX
1364 uint8_t cap
= pci_find_capability(pci_dev
, cap_id
);
1365 if (ranges_overlap(address
- cap
, len
, PCI_MSIX_FLAGS
+ 1, 1)) {
1366 assigned_dev_update_msix(pci_dev
, cap
+ PCI_MSIX_FLAGS
);
1373 case PCI_CAP_ID_VPD
:
1374 case PCI_CAP_ID_VNDR
:
1375 assigned_dev_pci_write(pci_dev
, address
, val
, len
);
1380 static int assigned_device_pci_cap_init(PCIDevice
*pci_dev
)
1382 AssignedDevice
*dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1383 PCIRegion
*pci_region
= dev
->real_device
.regions
;
1386 /* Clear initial capabilities pointer and status copied from hw */
1387 pci_set_byte(pci_dev
->config
+ PCI_CAPABILITY_LIST
, 0);
1388 pci_set_word(pci_dev
->config
+ PCI_STATUS
,
1389 pci_get_word(pci_dev
->config
+ PCI_STATUS
) &
1390 ~PCI_STATUS_CAP_LIST
);
1392 #ifdef KVM_CAP_IRQ_ROUTING
1393 #ifdef KVM_CAP_DEVICE_MSI
1394 /* Expose MSI capability
1395 * MSI capability is the 1st capability in capability config */
1396 if ((pos
= pci_find_cap_offset(pci_dev
, PCI_CAP_ID_MSI
, 0))) {
1397 dev
->cap
.available
|= ASSIGNED_DEVICE_CAP_MSI
;
1398 /* Only 32-bit/no-mask currently supported */
1399 if ((ret
= pci_add_capability(pci_dev
, PCI_CAP_ID_MSI
, pos
, 10)) < 0) {
1403 pci_set_word(pci_dev
->config
+ pos
+ PCI_MSI_FLAGS
,
1404 pci_get_word(pci_dev
->config
+ pos
+ PCI_MSI_FLAGS
) &
1405 PCI_MSI_FLAGS_QMASK
);
1406 pci_set_long(pci_dev
->config
+ pos
+ PCI_MSI_ADDRESS_LO
, 0);
1407 pci_set_word(pci_dev
->config
+ pos
+ PCI_MSI_DATA_32
, 0);
1409 /* Set writable fields */
1410 pci_set_word(pci_dev
->wmask
+ pos
+ PCI_MSI_FLAGS
,
1411 PCI_MSI_FLAGS_QSIZE
| PCI_MSI_FLAGS_ENABLE
);
1412 pci_set_long(pci_dev
->wmask
+ pos
+ PCI_MSI_ADDRESS_LO
, 0xfffffffc);
1413 pci_set_word(pci_dev
->wmask
+ pos
+ PCI_MSI_DATA_32
, 0xffff);
1416 #ifdef KVM_CAP_DEVICE_MSIX
1417 /* Expose MSI-X capability */
1418 if ((pos
= pci_find_cap_offset(pci_dev
, PCI_CAP_ID_MSIX
, 0))) {
1420 uint32_t msix_table_entry
;
1422 dev
->cap
.available
|= ASSIGNED_DEVICE_CAP_MSIX
;
1423 if ((ret
= pci_add_capability(pci_dev
, PCI_CAP_ID_MSIX
, pos
, 12)) < 0) {
1427 pci_set_word(pci_dev
->config
+ pos
+ PCI_MSIX_FLAGS
,
1428 pci_get_word(pci_dev
->config
+ pos
+ PCI_MSIX_FLAGS
) &
1431 /* Only enable and function mask bits are writable */
1432 pci_set_word(pci_dev
->wmask
+ pos
+ PCI_MSIX_FLAGS
,
1433 PCI_MSIX_FLAGS_ENABLE
| PCI_MSIX_FLAGS_MASKALL
);
1435 msix_table_entry
= pci_get_long(pci_dev
->config
+ pos
+ PCI_MSIX_TABLE
);
1436 bar_nr
= msix_table_entry
& PCI_MSIX_BIR
;
1437 msix_table_entry
&= ~PCI_MSIX_BIR
;
1438 dev
->msix_table_addr
= pci_region
[bar_nr
].base_addr
+ msix_table_entry
;
1443 /* Minimal PM support, nothing writable, device appears to NAK changes */
1444 if ((pos
= pci_find_cap_offset(pci_dev
, PCI_CAP_ID_PM
, 0))) {
1446 if ((ret
= pci_add_capability(pci_dev
, PCI_CAP_ID_PM
, pos
,
1447 PCI_PM_SIZEOF
)) < 0) {
1451 pmc
= pci_get_word(pci_dev
->config
+ pos
+ PCI_CAP_FLAGS
);
1452 pmc
&= (PCI_PM_CAP_VER_MASK
| PCI_PM_CAP_DSI
);
1453 pci_set_word(pci_dev
->config
+ pos
+ PCI_CAP_FLAGS
, pmc
);
1455 /* assign_device will bring the device up to D0, so we don't need
1456 * to worry about doing that ourselves here. */
1457 pci_set_word(pci_dev
->config
+ pos
+ PCI_PM_CTRL
,
1458 PCI_PM_CTRL_NO_SOFT_RESET
);
1460 pci_set_byte(pci_dev
->config
+ pos
+ PCI_PM_PPB_EXTENSIONS
, 0);
1461 pci_set_byte(pci_dev
->config
+ pos
+ PCI_PM_DATA_REGISTER
, 0);
1464 if ((pos
= pci_find_cap_offset(pci_dev
, PCI_CAP_ID_EXP
, 0))) {
1466 uint16_t type
, devctl
, lnkcap
, lnksta
;
1468 int size
= 0x3c; /* version 2 size */
1470 version
= pci_get_byte(pci_dev
->config
+ pos
+ PCI_EXP_FLAGS
);
1471 version
&= PCI_EXP_FLAGS_VERS
;
1474 } else if (version
> 2) {
1475 fprintf(stderr
, "Unsupported PCI express capability version %d\n",
1480 if ((ret
= pci_add_capability(pci_dev
, PCI_CAP_ID_EXP
,
1485 type
= pci_get_word(pci_dev
->config
+ pos
+ PCI_EXP_FLAGS
);
1486 type
= (type
& PCI_EXP_FLAGS_TYPE
) >> 8;
1487 if (type
!= PCI_EXP_TYPE_ENDPOINT
&&
1488 type
!= PCI_EXP_TYPE_LEG_END
&& type
!= PCI_EXP_TYPE_RC_END
) {
1490 "Device assignment only supports endpoint assignment, "
1491 "device type %d\n", type
);
1495 /* capabilities, pass existing read-only copy
1496 * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */
1498 /* device capabilities: hide FLR */
1499 devcap
= pci_get_long(pci_dev
->config
+ pos
+ PCI_EXP_DEVCAP
);
1500 devcap
&= ~PCI_EXP_DEVCAP_FLR
;
1501 pci_set_long(pci_dev
->config
+ pos
+ PCI_EXP_DEVCAP
, devcap
);
1503 /* device control: clear all error reporting enable bits, leaving
1504 * leaving only a few host values. Note, these are
1505 * all writable, but not passed to hw.
1507 devctl
= pci_get_word(pci_dev
->config
+ pos
+ PCI_EXP_DEVCTL
);
1508 devctl
= (devctl
& (PCI_EXP_DEVCTL_READRQ
| PCI_EXP_DEVCTL_PAYLOAD
)) |
1509 PCI_EXP_DEVCTL_RELAX_EN
| PCI_EXP_DEVCTL_NOSNOOP_EN
;
1510 pci_set_word(pci_dev
->config
+ pos
+ PCI_EXP_DEVCTL
, devctl
);
1511 devctl
= PCI_EXP_DEVCTL_BCR_FLR
| PCI_EXP_DEVCTL_AUX_PME
;
1512 pci_set_word(pci_dev
->wmask
+ pos
+ PCI_EXP_DEVCTL
, ~devctl
);
1514 /* Clear device status */
1515 pci_set_word(pci_dev
->config
+ pos
+ PCI_EXP_DEVSTA
, 0);
1517 /* Link capabilities, expose links and latencues, clear reporting */
1518 lnkcap
= pci_get_word(pci_dev
->config
+ pos
+ PCI_EXP_LNKCAP
);
1519 lnkcap
&= (PCI_EXP_LNKCAP_SLS
| PCI_EXP_LNKCAP_MLW
|
1520 PCI_EXP_LNKCAP_ASPMS
| PCI_EXP_LNKCAP_L0SEL
|
1521 PCI_EXP_LNKCAP_L1EL
);
1522 pci_set_word(pci_dev
->config
+ pos
+ PCI_EXP_LNKCAP
, lnkcap
);
1523 pci_set_word(pci_dev
->wmask
+ pos
+ PCI_EXP_LNKCAP
,
1524 PCI_EXP_LNKCTL_ASPMC
| PCI_EXP_LNKCTL_RCB
|
1525 PCI_EXP_LNKCTL_CCC
| PCI_EXP_LNKCTL_ES
|
1526 PCI_EXP_LNKCTL_CLKREQ_EN
| PCI_EXP_LNKCTL_HAWD
);
1528 /* Link control, pass existing read-only copy. Should be writable? */
1530 /* Link status, only expose current speed and width */
1531 lnksta
= pci_get_word(pci_dev
->config
+ pos
+ PCI_EXP_LNKSTA
);
1532 lnksta
&= (PCI_EXP_LNKSTA_CLS
| PCI_EXP_LNKSTA_NLW
);
1533 pci_set_word(pci_dev
->config
+ pos
+ PCI_EXP_LNKSTA
, lnksta
);
1536 /* Slot capabilities, control, status - not needed for endpoints */
1537 pci_set_long(pci_dev
->config
+ pos
+ PCI_EXP_SLTCAP
, 0);
1538 pci_set_word(pci_dev
->config
+ pos
+ PCI_EXP_SLTCTL
, 0);
1539 pci_set_word(pci_dev
->config
+ pos
+ PCI_EXP_SLTSTA
, 0);
1541 /* Root control, capabilities, status - not needed for endpoints */
1542 pci_set_word(pci_dev
->config
+ pos
+ PCI_EXP_RTCTL
, 0);
1543 pci_set_word(pci_dev
->config
+ pos
+ PCI_EXP_RTCAP
, 0);
1544 pci_set_long(pci_dev
->config
+ pos
+ PCI_EXP_RTSTA
, 0);
1546 /* Device capabilities/control 2, pass existing read-only copy */
1547 /* Link control 2, pass existing read-only copy */
1551 if ((pos
= pci_find_cap_offset(pci_dev
, PCI_CAP_ID_PCIX
, 0))) {
1555 /* Only expose the minimum, 8 byte capability */
1556 if ((ret
= pci_add_capability(pci_dev
, PCI_CAP_ID_PCIX
, pos
, 8)) < 0) {
1560 /* Command register, clear upper bits, including extended modes */
1561 cmd
= pci_get_word(pci_dev
->config
+ pos
+ PCI_X_CMD
);
1562 cmd
&= (PCI_X_CMD_DPERR_E
| PCI_X_CMD_ERO
| PCI_X_CMD_MAX_READ
|
1563 PCI_X_CMD_MAX_SPLIT
);
1564 pci_set_word(pci_dev
->config
+ pos
+ PCI_X_CMD
, cmd
);
1566 /* Status register, update with emulated PCI bus location, clear
1567 * error bits, leave the rest. */
1568 status
= pci_get_long(pci_dev
->config
+ pos
+ PCI_X_STATUS
);
1569 status
&= ~(PCI_X_STATUS_BUS
| PCI_X_STATUS_DEVFN
);
1570 status
|= (pci_bus_num(pci_dev
->bus
) << 8) | pci_dev
->devfn
;
1571 status
&= ~(PCI_X_STATUS_SPL_DISC
| PCI_X_STATUS_UNX_SPL
|
1572 PCI_X_STATUS_SPL_ERR
);
1573 pci_set_long(pci_dev
->config
+ pos
+ PCI_X_STATUS
, status
);
1576 if ((pos
= pci_find_cap_offset(pci_dev
, PCI_CAP_ID_VPD
, 0))) {
1577 /* Direct R/W passthrough */
1578 if ((ret
= pci_add_capability(pci_dev
, PCI_CAP_ID_VPD
, pos
, 8)) < 0) {
1583 /* Devices can have multiple vendor capabilities, get them all */
1584 for (pos
= 0; (pos
= pci_find_cap_offset(pci_dev
, PCI_CAP_ID_VNDR
, pos
));
1585 pos
+= PCI_CAP_LIST_NEXT
) {
1586 uint8_t len
= pci_get_byte(pci_dev
->config
+ pos
+ PCI_CAP_FLAGS
);
1587 /* Direct R/W passthrough */
1588 if ((ret
= pci_add_capability(pci_dev
, PCI_CAP_ID_VNDR
,
1597 static uint32_t msix_mmio_readl(void *opaque
, target_phys_addr_t addr
)
1599 AssignedDevice
*adev
= opaque
;
1600 unsigned int offset
= addr
& 0xfff;
1601 void *page
= adev
->msix_table_page
;
1604 memcpy(&val
, (void *)((char *)page
+ offset
), 4);
1609 static uint32_t msix_mmio_readb(void *opaque
, target_phys_addr_t addr
)
1611 return ((msix_mmio_readl(opaque
, addr
& ~3)) >>
1612 (8 * (addr
& 3))) & 0xff;
1615 static uint32_t msix_mmio_readw(void *opaque
, target_phys_addr_t addr
)
1617 return ((msix_mmio_readl(opaque
, addr
& ~3)) >>
1618 (8 * (addr
& 3))) & 0xffff;
1621 static void msix_mmio_writel(void *opaque
,
1622 target_phys_addr_t addr
, uint32_t val
)
1624 AssignedDevice
*adev
= opaque
;
1625 unsigned int offset
= addr
& 0xfff;
1626 void *page
= adev
->msix_table_page
;
1628 DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
1630 memcpy((void *)((char *)page
+ offset
), &val
, 4);
1633 static void msix_mmio_writew(void *opaque
,
1634 target_phys_addr_t addr
, uint32_t val
)
1636 msix_mmio_writel(opaque
, addr
& ~3,
1637 (val
& 0xffff) << (8*(addr
& 3)));
1640 static void msix_mmio_writeb(void *opaque
,
1641 target_phys_addr_t addr
, uint32_t val
)
1643 msix_mmio_writel(opaque
, addr
& ~3,
1644 (val
& 0xff) << (8*(addr
& 3)));
1647 static CPUWriteMemoryFunc
*msix_mmio_write
[] = {
1648 msix_mmio_writeb
, msix_mmio_writew
, msix_mmio_writel
1651 static CPUReadMemoryFunc
*msix_mmio_read
[] = {
1652 msix_mmio_readb
, msix_mmio_readw
, msix_mmio_readl
1655 static int assigned_dev_register_msix_mmio(AssignedDevice
*dev
)
1657 dev
->msix_table_page
= mmap(NULL
, 0x1000,
1658 PROT_READ
|PROT_WRITE
,
1659 MAP_ANONYMOUS
|MAP_PRIVATE
, 0, 0);
1660 if (dev
->msix_table_page
== MAP_FAILED
) {
1661 fprintf(stderr
, "fail allocate msix_table_page! %s\n",
1665 memset(dev
->msix_table_page
, 0, 0x1000);
1666 dev
->mmio_index
= cpu_register_io_memory(
1667 msix_mmio_read
, msix_mmio_write
, dev
,
1668 DEVICE_NATIVE_ENDIAN
);
1672 static void assigned_dev_unregister_msix_mmio(AssignedDevice
*dev
)
1674 if (!dev
->msix_table_page
)
1677 cpu_unregister_io_memory(dev
->mmio_index
);
1678 dev
->mmio_index
= 0;
1680 if (munmap(dev
->msix_table_page
, 0x1000) == -1) {
1681 fprintf(stderr
, "error unmapping msix_table_page! %s\n",
1684 dev
->msix_table_page
= NULL
;
1687 static const VMStateDescription vmstate_assigned_device
= {
1688 .name
= "pci-assign",
1689 .fields
= (VMStateField
[]) {
1690 VMSTATE_END_OF_LIST()
1694 static void reset_assigned_device(DeviceState
*dev
)
1696 PCIDevice
*pci_dev
= DO_UPCAST(PCIDevice
, qdev
, dev
);
1697 AssignedDevice
*adev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1698 char reset_file
[64];
1699 const char reset
[] = "1";
1702 snprintf(reset_file
, sizeof(reset_file
),
1703 "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/reset",
1704 adev
->host
.seg
, adev
->host
.bus
, adev
->host
.dev
, adev
->host
.func
);
1707 * Issue a device reset via pci-sysfs. Note that we use write(2) here
1708 * and ignore the return value because some kernels have a bug that
1709 * returns 0 rather than bytes written on success, sending us into an
1710 * infinite retry loop using other write mechanisms.
1712 fd
= open(reset_file
, O_WRONLY
);
1714 ret
= write(fd
, reset
, strlen(reset
));
1719 * When a 0 is written to the command register, the device is logically
1720 * disconnected from the PCI bus. This avoids further DMA transfers.
1722 assigned_dev_pci_write_config(pci_dev
, PCI_COMMAND
, 0, 2);
1725 static int assigned_initfn(struct PCIDevice
*pci_dev
)
1727 AssignedDevice
*dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1728 uint8_t e_device
, e_intx
;
1731 if (!kvm_enabled()) {
1732 error_report("pci-assign: error: requires KVM support");
1736 if (!dev
->host
.seg
&& !dev
->host
.bus
&& !dev
->host
.dev
&& !dev
->host
.func
) {
1737 error_report("pci-assign: error: no host device specified");
1741 if (get_real_device(dev
, dev
->host
.seg
, dev
->host
.bus
,
1742 dev
->host
.dev
, dev
->host
.func
)) {
1743 error_report("pci-assign: Error: Couldn't get real device (%s)!",
1748 /* handle real device's MMIO/PIO BARs */
1749 if (assigned_dev_register_regions(dev
->real_device
.regions
,
1750 dev
->real_device
.region_number
,
1754 /* handle interrupt routing */
1755 e_device
= (dev
->dev
.devfn
>> 3) & 0x1f;
1756 e_intx
= dev
->dev
.config
[0x3d] - 1;
1757 dev
->intpin
= e_intx
;
1760 dev
->h_segnr
= dev
->host
.seg
;
1761 dev
->h_busnr
= dev
->host
.bus
;
1762 dev
->h_devfn
= PCI_DEVFN(dev
->host
.dev
, dev
->host
.func
);
1764 if (assigned_device_pci_cap_init(pci_dev
) < 0)
1767 /* assign device to guest */
1768 r
= assign_device(dev
);
1772 /* assign irq for the device */
1773 r
= assign_irq(dev
);
1777 /* intercept MSI-X entry page in the MMIO */
1778 if (dev
->cap
.available
& ASSIGNED_DEVICE_CAP_MSIX
)
1779 if (assigned_dev_register_msix_mmio(dev
))
1782 assigned_dev_load_option_rom(dev
);
1783 QLIST_INSERT_HEAD(&devs
, dev
, next
);
1785 add_boot_device_path(dev
->bootindex
, &pci_dev
->qdev
, NULL
);
1787 /* Register a vmsd so that we can mark it unmigratable. */
1788 vmstate_register(&dev
->dev
.qdev
, 0, &vmstate_assigned_device
, dev
);
1789 register_device_unmigratable(&dev
->dev
.qdev
,
1790 vmstate_assigned_device
.name
, dev
);
1795 deassign_device(dev
);
1797 free_assigned_device(dev
);
1801 static int assigned_exitfn(struct PCIDevice
*pci_dev
)
1803 AssignedDevice
*dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1805 vmstate_unregister(&dev
->dev
.qdev
, &vmstate_assigned_device
, dev
);
1806 QLIST_REMOVE(dev
, next
);
1807 deassign_device(dev
);
1808 free_assigned_device(dev
);
1812 static int parse_hostaddr(DeviceState
*dev
, Property
*prop
, const char *str
)
1814 PCIHostDevice
*ptr
= qdev_get_prop_ptr(dev
, prop
);
1817 rc
= pci_parse_host_devaddr(str
, &ptr
->seg
, &ptr
->bus
, &ptr
->dev
, &ptr
->func
);
1823 static int print_hostaddr(DeviceState
*dev
, Property
*prop
, char *dest
, size_t len
)
1825 PCIHostDevice
*ptr
= qdev_get_prop_ptr(dev
, prop
);
1827 return snprintf(dest
, len
, "%02x:%02x.%x", ptr
->bus
, ptr
->dev
, ptr
->func
);
1830 PropertyInfo qdev_prop_hostaddr
= {
1831 .name
= "pci-hostaddr",
1833 .size
= sizeof(PCIHostDevice
),
1834 .parse
= parse_hostaddr
,
1835 .print
= print_hostaddr
,
1838 static PCIDeviceInfo assign_info
= {
1839 .qdev
.name
= "pci-assign",
1840 .qdev
.desc
= "pass through host pci devices to the guest",
1841 .qdev
.size
= sizeof(AssignedDevice
),
1842 .qdev
.reset
= reset_assigned_device
,
1843 .init
= assigned_initfn
,
1844 .exit
= assigned_exitfn
,
1845 .config_read
= assigned_dev_pci_read_config
,
1846 .config_write
= assigned_dev_pci_write_config
,
1847 .qdev
.props
= (Property
[]) {
1848 DEFINE_PROP("host", AssignedDevice
, host
, qdev_prop_hostaddr
, PCIHostDevice
),
1849 DEFINE_PROP_BIT("iommu", AssignedDevice
, features
,
1850 ASSIGNED_DEVICE_USE_IOMMU_BIT
, true),
1851 DEFINE_PROP_BIT("prefer_msi", AssignedDevice
, features
,
1852 ASSIGNED_DEVICE_PREFER_MSI_BIT
, true),
1853 DEFINE_PROP_INT32("bootindex", AssignedDevice
, bootindex
, -1),
1854 DEFINE_PROP_STRING("configfd", AssignedDevice
, configfd_name
),
1855 DEFINE_PROP_END_OF_LIST(),
1859 static void assign_register_devices(void)
1861 pci_qdev_register(&assign_info
);
1864 device_init(assign_register_devices
)
1867 * Scan the assigned devices for the devices that have an option ROM, and then
1868 * load the corresponding ROM data to RAM. If an error occurs while loading an
1869 * option ROM, we just ignore that option ROM and continue with the next one.
1871 static void assigned_dev_load_option_rom(AssignedDevice
*dev
)
1873 char name
[32], rom_file
[64];
1879 /* If loading ROM from file, pci handles it */
1880 if (dev
->dev
.romfile
|| !dev
->dev
.rom_bar
)
1883 snprintf(rom_file
, sizeof(rom_file
),
1884 "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
1885 dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
1887 if (stat(rom_file
, &st
)) {
1891 if (access(rom_file
, F_OK
)) {
1892 fprintf(stderr
, "pci-assign: Insufficient privileges for %s\n",
1897 /* Write "1" to the ROM file to enable it */
1898 fp
= fopen(rom_file
, "r+");
1903 if (fwrite(&val
, 1, 1, fp
) != 1) {
1906 fseek(fp
, 0, SEEK_SET
);
1908 snprintf(name
, sizeof(name
), "%s.rom", dev
->dev
.qdev
.info
->name
);
1909 dev
->dev
.rom_offset
= qemu_ram_alloc(&dev
->dev
.qdev
, name
, st
.st_size
);
1910 ptr
= qemu_get_ram_ptr(dev
->dev
.rom_offset
);
1911 memset(ptr
, 0xff, st
.st_size
);
1913 if (!fread(ptr
, 1, st
.st_size
, fp
)) {
1914 fprintf(stderr
, "pci-assign: Cannot read from host %s\n"
1915 "\tDevice option ROM contents are probably invalid "
1916 "(check dmesg).\n\tSkip option ROM probe with rombar=0, "
1917 "or load from file with romfile=\n", rom_file
);
1918 qemu_ram_free(dev
->dev
.rom_offset
);
1919 dev
->dev
.rom_offset
= 0;
1923 pci_register_bar(&dev
->dev
, PCI_ROM_SLOT
,
1924 st
.st_size
, 0, pci_map_option_rom
);
1926 /* Write "0" to disable ROM */
1927 fseek(fp
, 0, SEEK_SET
);
1929 if (!fwrite(&val
, 1, 1, fp
)) {
1930 DEBUG("%s\n", "Failed to disable pci-sysfs rom file");