2 * Copyright (c) 2007, Neocleus Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 * Assign a PCI device from the host to a guest VM.
20 * Adapted for KVM by Qumranet.
22 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
23 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
24 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
25 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
26 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
31 #include <sys/types.h>
36 #include "qemu-error.h"
38 #include "device-assignment.h"
42 /* From linux/ioport.h */
43 #define IORESOURCE_IO 0x00000100 /* Resource type */
44 #define IORESOURCE_MEM 0x00000200
45 #define IORESOURCE_IRQ 0x00000400
46 #define IORESOURCE_DMA 0x00000800
47 #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
49 /* #define DEVICE_ASSIGNMENT_DEBUG 1 */
51 #ifdef DEVICE_ASSIGNMENT_DEBUG
52 #define DEBUG(fmt, ...) \
54 fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
57 #define DEBUG(fmt, ...) do { } while(0)
60 static void assigned_dev_load_option_rom(AssignedDevice
*dev
);
62 static uint32_t guest_to_host_ioport(AssignedDevRegion
*region
, uint32_t addr
)
64 return region
->u
.r_baseport
+ (addr
- region
->e_physbase
);
67 static void assigned_dev_ioport_writeb(void *opaque
, uint32_t addr
,
70 AssignedDevRegion
*r_access
= opaque
;
71 uint32_t r_pio
= guest_to_host_ioport(r_access
, addr
);
73 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
74 r_pio
, (int)r_access
->e_physbase
,
75 (unsigned long)r_access
->u
.r_baseport
, value
);
80 static void assigned_dev_ioport_writew(void *opaque
, uint32_t addr
,
83 AssignedDevRegion
*r_access
= opaque
;
84 uint32_t r_pio
= guest_to_host_ioport(r_access
, addr
);
86 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
87 r_pio
, (int)r_access
->e_physbase
,
88 (unsigned long)r_access
->u
.r_baseport
, value
);
93 static void assigned_dev_ioport_writel(void *opaque
, uint32_t addr
,
96 AssignedDevRegion
*r_access
= opaque
;
97 uint32_t r_pio
= guest_to_host_ioport(r_access
, addr
);
99 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
100 r_pio
, (int)r_access
->e_physbase
,
101 (unsigned long)r_access
->u
.r_baseport
, value
);
106 static uint32_t assigned_dev_ioport_readb(void *opaque
, uint32_t addr
)
108 AssignedDevRegion
*r_access
= opaque
;
109 uint32_t r_pio
= guest_to_host_ioport(r_access
, addr
);
114 DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
115 r_pio
, (int)r_access
->e_physbase
,
116 (unsigned long)r_access
->u
.r_baseport
, value
);
121 static uint32_t assigned_dev_ioport_readw(void *opaque
, uint32_t addr
)
123 AssignedDevRegion
*r_access
= opaque
;
124 uint32_t r_pio
= guest_to_host_ioport(r_access
, addr
);
129 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
130 r_pio
, (int)r_access
->e_physbase
,
131 (unsigned long)r_access
->u
.r_baseport
, value
);
136 static uint32_t assigned_dev_ioport_readl(void *opaque
, uint32_t addr
)
138 AssignedDevRegion
*r_access
= opaque
;
139 uint32_t r_pio
= guest_to_host_ioport(r_access
, addr
);
144 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
145 r_pio
, (int)r_access
->e_physbase
,
146 (unsigned long)r_access
->u
.r_baseport
, value
);
151 static uint32_t slow_bar_readb(void *opaque
, target_phys_addr_t addr
)
153 AssignedDevRegion
*d
= opaque
;
154 uint8_t *in
= d
->u
.r_virtbase
+ addr
;
158 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx
" val=0x%08x\n", addr
, r
);
163 static uint32_t slow_bar_readw(void *opaque
, target_phys_addr_t addr
)
165 AssignedDevRegion
*d
= opaque
;
166 uint16_t *in
= d
->u
.r_virtbase
+ addr
;
170 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx
" val=0x%08x\n", addr
, r
);
175 static uint32_t slow_bar_readl(void *opaque
, target_phys_addr_t addr
)
177 AssignedDevRegion
*d
= opaque
;
178 uint32_t *in
= d
->u
.r_virtbase
+ addr
;
182 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx
" val=0x%08x\n", addr
, r
);
187 static void slow_bar_writeb(void *opaque
, target_phys_addr_t addr
, uint32_t val
)
189 AssignedDevRegion
*d
= opaque
;
190 uint8_t *out
= d
->u
.r_virtbase
+ addr
;
192 DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx
" val=0x%02x\n", addr
, val
);
196 static void slow_bar_writew(void *opaque
, target_phys_addr_t addr
, uint32_t val
)
198 AssignedDevRegion
*d
= opaque
;
199 uint16_t *out
= d
->u
.r_virtbase
+ addr
;
201 DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx
" val=0x%04x\n", addr
, val
);
205 static void slow_bar_writel(void *opaque
, target_phys_addr_t addr
, uint32_t val
)
207 AssignedDevRegion
*d
= opaque
;
208 uint32_t *out
= d
->u
.r_virtbase
+ addr
;
210 DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx
" val=0x%08x\n", addr
, val
);
214 static CPUWriteMemoryFunc
* const slow_bar_write
[] = {
220 static CPUReadMemoryFunc
* const slow_bar_read
[] = {
226 static void assigned_dev_iomem_map_slow(PCIDevice
*pci_dev
, int region_num
,
227 pcibus_t e_phys
, pcibus_t e_size
,
230 AssignedDevice
*r_dev
= container_of(pci_dev
, AssignedDevice
, dev
);
231 AssignedDevRegion
*region
= &r_dev
->v_addrs
[region_num
];
232 PCIRegion
*real_region
= &r_dev
->real_device
.regions
[region_num
];
235 DEBUG("%s", "slow map\n");
236 if (region_num
== PCI_ROM_SLOT
)
237 m
= cpu_register_io_memory(slow_bar_read
, NULL
, region
);
239 m
= cpu_register_io_memory(slow_bar_read
, slow_bar_write
, region
);
240 cpu_register_physical_memory(e_phys
, e_size
, m
);
242 /* MSI-X MMIO page */
244 real_region
->base_addr
<= r_dev
->msix_table_addr
&&
245 real_region
->base_addr
+ real_region
->size
>= r_dev
->msix_table_addr
) {
246 int offset
= r_dev
->msix_table_addr
- real_region
->base_addr
;
248 cpu_register_physical_memory(e_phys
+ offset
,
249 TARGET_PAGE_SIZE
, r_dev
->mmio_index
);
253 static void assigned_dev_iomem_map(PCIDevice
*pci_dev
, int region_num
,
254 pcibus_t e_phys
, pcibus_t e_size
, int type
)
256 AssignedDevice
*r_dev
= container_of(pci_dev
, AssignedDevice
, dev
);
257 AssignedDevRegion
*region
= &r_dev
->v_addrs
[region_num
];
258 PCIRegion
*real_region
= &r_dev
->real_device
.regions
[region_num
];
259 int ret
= 0, flags
= 0;
261 DEBUG("e_phys=%08" FMT_PCIBUS
" r_virt=%p type=%d len=%08" FMT_PCIBUS
" region_num=%d \n",
262 e_phys
, region
->u
.r_virtbase
, type
, e_size
, region_num
);
264 region
->e_physbase
= e_phys
;
265 region
->e_size
= e_size
;
269 if (region_num
== PCI_ROM_SLOT
)
272 cpu_register_physical_memory(e_phys
, e_size
, region
->memory_index
| flags
);
274 /* deal with MSI-X MMIO page */
275 if (real_region
->base_addr
<= r_dev
->msix_table_addr
&&
276 real_region
->base_addr
+ real_region
->size
>=
277 r_dev
->msix_table_addr
) {
278 int offset
= r_dev
->msix_table_addr
- real_region
->base_addr
;
280 cpu_register_physical_memory(e_phys
+ offset
,
281 TARGET_PAGE_SIZE
, r_dev
->mmio_index
);
286 fprintf(stderr
, "%s: Error: create new mapping failed\n", __func__
);
291 static void assigned_dev_ioport_map(PCIDevice
*pci_dev
, int region_num
,
292 pcibus_t addr
, pcibus_t size
, int type
)
294 AssignedDevice
*r_dev
= container_of(pci_dev
, AssignedDevice
, dev
);
295 AssignedDevRegion
*region
= &r_dev
->v_addrs
[region_num
];
296 int first_map
= (region
->e_size
== 0);
299 region
->e_physbase
= addr
;
300 region
->e_size
= size
;
302 DEBUG("e_phys=0x%" FMT_PCIBUS
" r_baseport=%x type=0x%x len=%" FMT_PCIBUS
" region_num=%d \n",
303 addr
, region
->u
.r_baseport
, type
, size
, region_num
);
306 struct ioperm_data
*data
;
308 data
= qemu_mallocz(sizeof(struct ioperm_data
));
310 fprintf(stderr
, "%s: Out of memory\n", __func__
);
314 data
->start_port
= region
->u
.r_baseport
;
315 data
->num
= region
->r_size
;
318 kvm_add_ioperm_data(data
);
320 for (env
= first_cpu
; env
; env
= env
->next_cpu
)
321 kvm_ioperm(env
, data
);
324 register_ioport_read(addr
, size
, 1, assigned_dev_ioport_readb
,
325 (r_dev
->v_addrs
+ region_num
));
326 register_ioport_read(addr
, size
, 2, assigned_dev_ioport_readw
,
327 (r_dev
->v_addrs
+ region_num
));
328 register_ioport_read(addr
, size
, 4, assigned_dev_ioport_readl
,
329 (r_dev
->v_addrs
+ region_num
));
330 register_ioport_write(addr
, size
, 1, assigned_dev_ioport_writeb
,
331 (r_dev
->v_addrs
+ region_num
));
332 register_ioport_write(addr
, size
, 2, assigned_dev_ioport_writew
,
333 (r_dev
->v_addrs
+ region_num
));
334 register_ioport_write(addr
, size
, 4, assigned_dev_ioport_writel
,
335 (r_dev
->v_addrs
+ region_num
));
338 static uint8_t pci_find_cap_offset(struct pci_dev
*pci_dev
, uint8_t cap
)
342 int pos
= PCI_CAPABILITY_LIST
;
345 status
= pci_read_byte(pci_dev
, PCI_STATUS
);
346 if ((status
& PCI_STATUS_CAP_LIST
) == 0)
350 pos
= pci_read_byte(pci_dev
, pos
);
355 id
= pci_read_byte(pci_dev
, pos
+ PCI_CAP_LIST_ID
);
362 pos
+= PCI_CAP_LIST_NEXT
;
367 static void assigned_dev_pci_write_config(PCIDevice
*d
, uint32_t address
,
368 uint32_t val
, int len
)
372 AssignedDevice
*pci_dev
= container_of(d
, AssignedDevice
, dev
);
374 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
375 ((d
->devfn
>> 3) & 0x1F), (d
->devfn
& 0x7),
376 (uint16_t) address
, val
, len
);
378 if (address
== 0x4) {
379 pci_default_write_config(d
, address
, val
, len
);
380 /* Continue to program the card */
383 if ((address
>= 0x10 && address
<= 0x24) || address
== 0x30 ||
384 address
== 0x34 || address
== 0x3c || address
== 0x3d ||
385 pci_access_cap_config(d
, address
, len
)) {
386 /* used for update-mappings (BAR emulation) */
387 pci_default_write_config(d
, address
, val
, len
);
391 DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
392 ((d
->devfn
>> 3) & 0x1F), (d
->devfn
& 0x7),
393 (uint16_t) address
, val
, len
);
395 fd
= pci_dev
->real_device
.config_fd
;
398 ret
= pwrite(fd
, &val
, len
, address
);
400 if ((ret
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
403 fprintf(stderr
, "%s: pwrite failed, ret = %zd errno = %d\n",
404 __func__
, ret
, errno
);
410 static uint32_t assigned_dev_pci_read_config(PCIDevice
*d
, uint32_t address
,
416 AssignedDevice
*pci_dev
= container_of(d
, AssignedDevice
, dev
);
418 if (address
< 0x4 || (pci_dev
->need_emulate_cmd
&& address
== 0x4) ||
419 (address
>= 0x10 && address
<= 0x24) || address
== 0x30 ||
420 address
== 0x34 || address
== 0x3c || address
== 0x3d ||
421 pci_access_cap_config(d
, address
, len
)) {
422 val
= pci_default_read_config(d
, address
, len
);
423 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
424 (d
->devfn
>> 3) & 0x1F, (d
->devfn
& 0x7), address
, val
, len
);
428 /* vga specific, remove later */
432 fd
= pci_dev
->real_device
.config_fd
;
435 ret
= pread(fd
, &val
, len
, address
);
437 if ((ret
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
440 fprintf(stderr
, "%s: pread failed, ret = %zd errno = %d\n",
441 __func__
, ret
, errno
);
447 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
448 (d
->devfn
>> 3) & 0x1F, (d
->devfn
& 0x7), address
, val
, len
);
450 if (!pci_dev
->cap
.available
) {
451 /* kill the special capabilities */
452 if (address
== 4 && len
== 4)
454 else if (address
== 6)
461 static int assigned_dev_register_regions(PCIRegion
*io_regions
,
462 unsigned long regions_num
,
463 AssignedDevice
*pci_dev
)
466 PCIRegion
*cur_region
= io_regions
;
468 for (i
= 0; i
< regions_num
; i
++, cur_region
++) {
469 if (!cur_region
->valid
)
471 pci_dev
->v_addrs
[i
].num
= i
;
473 /* handle memory io regions */
474 if (cur_region
->type
& IORESOURCE_MEM
) {
476 int t
= cur_region
->type
& IORESOURCE_PREFETCH
477 ? PCI_BASE_ADDRESS_MEM_PREFETCH
478 : PCI_BASE_ADDRESS_SPACE_MEMORY
;
480 if (cur_region
->size
& 0xFFF) {
481 if (i
!= PCI_ROM_SLOT
) {
482 fprintf(stderr
, "PCI region %d at address 0x%llx "
483 "has size 0x%x, which is not a multiple of 4K. "
484 "You might experience some performance hit "
486 i
, (unsigned long long)cur_region
->base_addr
,
492 /* map physical memory */
493 pci_dev
->v_addrs
[i
].e_physbase
= cur_region
->base_addr
;
494 if (i
== PCI_ROM_SLOT
) {
495 pci_dev
->v_addrs
[i
].u
.r_virtbase
=
498 PROT_WRITE
| PROT_READ
, MAP_ANONYMOUS
| MAP_PRIVATE
,
502 pci_dev
->v_addrs
[i
].u
.r_virtbase
=
505 PROT_WRITE
| PROT_READ
, MAP_SHARED
,
506 cur_region
->resource_fd
, (off_t
) 0);
509 if (pci_dev
->v_addrs
[i
].u
.r_virtbase
== MAP_FAILED
) {
510 pci_dev
->v_addrs
[i
].u
.r_virtbase
= NULL
;
511 fprintf(stderr
, "%s: Error: Couldn't mmap 0x%x!"
513 (uint32_t) (cur_region
->base_addr
));
517 if (i
== PCI_ROM_SLOT
) {
518 memset(pci_dev
->v_addrs
[i
].u
.r_virtbase
, 0,
519 (cur_region
->size
+ 0xFFF) & 0xFFFFF000);
520 mprotect(pci_dev
->v_addrs
[PCI_ROM_SLOT
].u
.r_virtbase
,
521 (cur_region
->size
+ 0xFFF) & 0xFFFFF000, PROT_READ
);
524 pci_dev
->v_addrs
[i
].r_size
= cur_region
->size
;
525 pci_dev
->v_addrs
[i
].e_size
= 0;
528 pci_dev
->v_addrs
[i
].u
.r_virtbase
+=
529 (cur_region
->base_addr
& 0xFFF);
533 void *virtbase
= pci_dev
->v_addrs
[i
].u
.r_virtbase
;
535 pci_dev
->v_addrs
[i
].memory_index
= qemu_ram_map(cur_region
->size
,
538 pci_dev
->v_addrs
[i
].memory_index
= 0;
540 pci_register_bar((PCIDevice
*) pci_dev
, i
,
542 slow_map
? assigned_dev_iomem_map_slow
543 : assigned_dev_iomem_map
);
546 /* handle port io regions */
547 pci_dev
->v_addrs
[i
].e_physbase
= cur_region
->base_addr
;
548 pci_dev
->v_addrs
[i
].u
.r_baseport
= cur_region
->base_addr
;
549 pci_dev
->v_addrs
[i
].r_size
= cur_region
->size
;
550 pci_dev
->v_addrs
[i
].e_size
= 0;
552 pci_register_bar((PCIDevice
*) pci_dev
, i
,
553 cur_region
->size
, PCI_BASE_ADDRESS_SPACE_IO
,
554 assigned_dev_ioport_map
);
556 /* not relevant for port io */
557 pci_dev
->v_addrs
[i
].memory_index
= 0;
564 static int get_real_id(const char *devpath
, const char *idname
, uint16_t *val
)
570 snprintf(name
, sizeof(name
), "%s%s", devpath
, idname
);
571 f
= fopen(name
, "r");
573 fprintf(stderr
, "%s: %s: %m\n", __func__
, name
);
576 if (fscanf(f
, "%li\n", &id
) == 1) {
586 static int get_real_vendor_id(const char *devpath
, uint16_t *val
)
588 return get_real_id(devpath
, "vendor", val
);
591 static int get_real_device_id(const char *devpath
, uint16_t *val
)
593 return get_real_id(devpath
, "device", val
);
596 static int get_real_device(AssignedDevice
*pci_dev
, uint16_t r_seg
,
597 uint8_t r_bus
, uint8_t r_dev
, uint8_t r_func
)
599 char dir
[128], name
[128];
602 unsigned long long start
, end
, size
, flags
;
606 PCIDevRegions
*dev
= &pci_dev
->real_device
;
608 dev
->region_number
= 0;
610 snprintf(dir
, sizeof(dir
), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
611 r_seg
, r_bus
, r_dev
, r_func
);
613 snprintf(name
, sizeof(name
), "%sconfig", dir
);
615 fd
= open(name
, O_RDWR
);
617 fprintf(stderr
, "%s: %s: %m\n", __func__
, name
);
622 r
= read(fd
, pci_dev
->dev
.config
, pci_config_size(&pci_dev
->dev
));
624 if (errno
== EINTR
|| errno
== EAGAIN
)
626 fprintf(stderr
, "%s: read failed, errno = %d\n", __func__
, errno
);
629 snprintf(name
, sizeof(name
), "%sresource", dir
);
631 f
= fopen(name
, "r");
633 fprintf(stderr
, "%s: %s: %m\n", __func__
, name
);
637 for (r
= 0; r
< PCI_NUM_REGIONS
; r
++) {
638 if (fscanf(f
, "%lli %lli %lli\n", &start
, &end
, &flags
) != 3)
641 rp
= dev
->regions
+ r
;
643 size
= end
- start
+ 1;
644 flags
&= IORESOURCE_IO
| IORESOURCE_MEM
| IORESOURCE_PREFETCH
;
645 if (size
== 0 || (flags
& ~IORESOURCE_PREFETCH
) == 0)
647 if (flags
& IORESOURCE_MEM
) {
648 flags
&= ~IORESOURCE_IO
;
649 if (r
!= PCI_ROM_SLOT
) {
650 snprintf(name
, sizeof(name
), "%sresource%d", dir
, r
);
651 fd
= open(name
, O_RDWR
);
654 rp
->resource_fd
= fd
;
657 flags
&= ~IORESOURCE_PREFETCH
;
661 rp
->base_addr
= start
;
663 DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
664 r
, rp
->size
, start
, rp
->type
, rp
->resource_fd
);
669 /* read and fill vendor ID */
670 v
= get_real_vendor_id(dir
, &id
);
674 pci_dev
->dev
.config
[0] = id
& 0xff;
675 pci_dev
->dev
.config
[1] = (id
& 0xff00) >> 8;
677 /* read and fill device ID */
678 v
= get_real_device_id(dir
, &id
);
682 pci_dev
->dev
.config
[2] = id
& 0xff;
683 pci_dev
->dev
.config
[3] = (id
& 0xff00) >> 8;
685 /* dealing with virtual function device */
686 snprintf(name
, sizeof(name
), "%sphysfn/", dir
);
687 if (!stat(name
, &statbuf
))
688 pci_dev
->need_emulate_cmd
= 1;
690 pci_dev
->need_emulate_cmd
= 0;
692 dev
->region_number
= r
;
696 static QLIST_HEAD(, AssignedDevice
) devs
= QLIST_HEAD_INITIALIZER(devs
);
698 #ifdef KVM_CAP_IRQ_ROUTING
699 static void free_dev_irq_entries(AssignedDevice
*dev
)
703 for (i
= 0; i
< dev
->irq_entries_nr
; i
++)
704 kvm_del_routing_entry(kvm_context
, &dev
->entry
[i
]);
707 dev
->irq_entries_nr
= 0;
711 static void free_assigned_device(AssignedDevice
*dev
)
716 for (i
= 0; i
< dev
->real_device
.region_number
; i
++) {
717 PCIRegion
*pci_region
= &dev
->real_device
.regions
[i
];
718 AssignedDevRegion
*region
= &dev
->v_addrs
[i
];
720 if (!pci_region
->valid
)
723 if (pci_region
->type
& IORESOURCE_IO
) {
724 kvm_remove_ioperm_data(region
->u
.r_baseport
, region
->r_size
);
726 } else if (pci_region
->type
& IORESOURCE_MEM
) {
727 if (region
->u
.r_virtbase
) {
728 int ret
= munmap(region
->u
.r_virtbase
,
729 (pci_region
->size
+ 0xFFF) & 0xFFFFF000);
732 "Failed to unmap assigned device region: %s\n",
738 if (dev
->real_device
.config_fd
) {
739 close(dev
->real_device
.config_fd
);
740 dev
->real_device
.config_fd
= 0;
743 #ifdef KVM_CAP_IRQ_ROUTING
744 free_dev_irq_entries(dev
);
749 static uint32_t calc_assigned_dev_id(uint16_t seg
, uint8_t bus
, uint8_t devfn
)
751 return (uint32_t)seg
<< 16 | (uint32_t)bus
<< 8 | (uint32_t)devfn
;
754 static void assign_failed_examine(AssignedDevice
*dev
)
756 char name
[PATH_MAX
], dir
[PATH_MAX
], driver
[PATH_MAX
] = {}, *ns
;
757 uint16_t vendor_id
, device_id
;
760 sprintf(dir
, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
761 dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
763 sprintf(name
, "%sdriver", dir
);
765 r
= readlink(name
, driver
, sizeof(driver
));
766 if ((r
<= 0) || r
>= sizeof(driver
) || !(ns
= strrchr(driver
, '/'))) {
772 if (get_real_vendor_id(dir
, &vendor_id
) ||
773 get_real_device_id(dir
, &device_id
)) {
777 fprintf(stderr
, "*** The driver '%s' is occupying your device "
778 "%04x:%02x:%02x.%x.\n",
779 ns
, dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
780 fprintf(stderr
, "***\n");
781 fprintf(stderr
, "*** You can try the following commands to free it:\n");
782 fprintf(stderr
, "***\n");
783 fprintf(stderr
, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
784 "new_id\n", vendor_id
, device_id
);
785 fprintf(stderr
, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
787 dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
, ns
);
788 fprintf(stderr
, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
790 dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
791 fprintf(stderr
, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
792 "/remove_id\n", vendor_id
, device_id
);
793 fprintf(stderr
, "***\n");
798 fprintf(stderr
, "Couldn't find out why.\n");
801 static int assign_device(AssignedDevice
*dev
)
803 struct kvm_assigned_pci_dev assigned_dev_data
;
806 #ifdef KVM_CAP_PCI_SEGMENT
807 /* Only pass non-zero PCI segment to capable module */
808 if (!kvm_check_extension(kvm_state
, KVM_CAP_PCI_SEGMENT
) &&
810 fprintf(stderr
, "Can't assign device inside non-zero PCI segment "
811 "as this KVM module doesn't support it.\n");
816 memset(&assigned_dev_data
, 0, sizeof(assigned_dev_data
));
817 assigned_dev_data
.assigned_dev_id
=
818 calc_assigned_dev_id(dev
->h_segnr
, dev
->h_busnr
, dev
->h_devfn
);
819 #ifdef KVM_CAP_PCI_SEGMENT
820 assigned_dev_data
.segnr
= dev
->h_segnr
;
822 assigned_dev_data
.busnr
= dev
->h_busnr
;
823 assigned_dev_data
.devfn
= dev
->h_devfn
;
826 /* We always enable the IOMMU unless disabled on the command line */
827 if (dev
->use_iommu
) {
828 if (!kvm_check_extension(kvm_state
, KVM_CAP_IOMMU
)) {
829 fprintf(stderr
, "No IOMMU found. Unable to assign device \"%s\"\n",
833 assigned_dev_data
.flags
|= KVM_DEV_ASSIGN_ENABLE_IOMMU
;
839 r
= kvm_assign_pci_device(kvm_context
, &assigned_dev_data
);
841 fprintf(stderr
, "Failed to assign device \"%s\" : %s\n",
842 dev
->dev
.qdev
.id
, strerror(-r
));
846 assign_failed_examine(dev
);
855 static int assign_irq(AssignedDevice
*dev
)
857 struct kvm_assigned_irq assigned_irq_data
;
860 /* Interrupt PIN 0 means don't use INTx */
861 if (pci_read_byte(dev
->pdev
, PCI_INTERRUPT_PIN
) == 0)
864 irq
= pci_map_irq(&dev
->dev
, dev
->intpin
);
865 irq
= piix_get_irq(irq
);
868 irq
= ipf_map_irq(&dev
->dev
, irq
);
871 if (dev
->girq
== irq
)
874 memset(&assigned_irq_data
, 0, sizeof(assigned_irq_data
));
875 assigned_irq_data
.assigned_dev_id
=
876 calc_assigned_dev_id(dev
->h_segnr
, dev
->h_busnr
, dev
->h_devfn
);
877 assigned_irq_data
.guest_irq
= irq
;
878 assigned_irq_data
.host_irq
= dev
->real_device
.irq
;
879 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
880 if (dev
->irq_requested_type
) {
881 assigned_irq_data
.flags
= dev
->irq_requested_type
;
882 r
= kvm_deassign_irq(kvm_context
, &assigned_irq_data
);
883 /* -ENXIO means no assigned irq */
884 if (r
&& r
!= -ENXIO
)
885 perror("assign_irq: deassign");
888 assigned_irq_data
.flags
= KVM_DEV_IRQ_GUEST_INTX
;
889 if (dev
->cap
.available
& ASSIGNED_DEVICE_CAP_MSI
)
890 assigned_irq_data
.flags
|= KVM_DEV_IRQ_HOST_MSI
;
892 assigned_irq_data
.flags
|= KVM_DEV_IRQ_HOST_INTX
;
895 r
= kvm_assign_irq(kvm_context
, &assigned_irq_data
);
897 fprintf(stderr
, "Failed to assign irq for \"%s\": %s\n",
898 dev
->dev
.qdev
.id
, strerror(-r
));
899 fprintf(stderr
, "Perhaps you are assigning a device "
900 "that shares an IRQ with another device?\n");
905 dev
->irq_requested_type
= assigned_irq_data
.flags
;
909 static void deassign_device(AssignedDevice
*dev
)
911 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
912 struct kvm_assigned_pci_dev assigned_dev_data
;
915 memset(&assigned_dev_data
, 0, sizeof(assigned_dev_data
));
916 assigned_dev_data
.assigned_dev_id
=
917 calc_assigned_dev_id(dev
->h_segnr
, dev
->h_busnr
, dev
->h_devfn
);
919 r
= kvm_deassign_pci_device(kvm_context
, &assigned_dev_data
);
921 fprintf(stderr
, "Failed to deassign device \"%s\" : %s\n",
922 dev
->dev
.qdev
.id
, strerror(-r
));
927 AssignedDevInfo
*get_assigned_device(int pcibus
, int slot
)
929 AssignedDevice
*assigned_dev
= NULL
;
930 AssignedDevInfo
*adev
= NULL
;
932 QLIST_FOREACH(adev
, &adev_head
, next
) {
933 assigned_dev
= adev
->assigned_dev
;
934 if (pci_bus_num(assigned_dev
->dev
.bus
) == pcibus
&&
935 PCI_SLOT(assigned_dev
->dev
.devfn
) == slot
)
943 /* The pci config space got updated. Check if irq numbers have changed
946 void assigned_dev_update_irqs(void)
948 AssignedDevice
*dev
, *next
;
951 dev
= QLIST_FIRST(&devs
);
953 next
= QLIST_NEXT(dev
, next
);
956 qdev_unplug(&dev
->dev
.qdev
);
961 #ifdef KVM_CAP_IRQ_ROUTING
963 #ifdef KVM_CAP_DEVICE_MSI
964 static void assigned_dev_update_msi(PCIDevice
*pci_dev
, unsigned int ctrl_pos
)
966 struct kvm_assigned_irq assigned_irq_data
;
967 AssignedDevice
*assigned_dev
= container_of(pci_dev
, AssignedDevice
, dev
);
968 uint8_t ctrl_byte
= pci_dev
->config
[ctrl_pos
];
971 memset(&assigned_irq_data
, 0, sizeof assigned_irq_data
);
972 assigned_irq_data
.assigned_dev_id
=
973 calc_assigned_dev_id(assigned_dev
->h_segnr
, assigned_dev
->h_busnr
,
974 (uint8_t)assigned_dev
->h_devfn
);
976 if (assigned_dev
->irq_requested_type
) {
977 assigned_irq_data
.flags
= assigned_dev
->irq_requested_type
;
978 free_dev_irq_entries(assigned_dev
);
979 r
= kvm_deassign_irq(kvm_context
, &assigned_irq_data
);
980 /* -ENXIO means no assigned irq */
981 if (r
&& r
!= -ENXIO
)
982 perror("assigned_dev_update_msi: deassign irq");
985 if (ctrl_byte
& PCI_MSI_FLAGS_ENABLE
) {
986 assigned_dev
->entry
= calloc(1, sizeof(struct kvm_irq_routing_entry
));
987 if (!assigned_dev
->entry
) {
988 perror("assigned_dev_update_msi: ");
991 assigned_dev
->entry
->u
.msi
.address_lo
=
992 *(uint32_t *)(pci_dev
->config
+ pci_dev
->cap
.start
+
994 assigned_dev
->entry
->u
.msi
.address_hi
= 0;
995 assigned_dev
->entry
->u
.msi
.data
= *(uint16_t *)(pci_dev
->config
+
996 pci_dev
->cap
.start
+ PCI_MSI_DATA_32
);
997 assigned_dev
->entry
->type
= KVM_IRQ_ROUTING_MSI
;
998 r
= kvm_get_irq_route_gsi(kvm_context
);
1000 perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
1003 assigned_dev
->entry
->gsi
= r
;
1005 kvm_add_routing_entry(kvm_context
, assigned_dev
->entry
);
1006 if (kvm_commit_irq_routes(kvm_context
) < 0) {
1007 perror("assigned_dev_update_msi: kvm_commit_irq_routes");
1008 assigned_dev
->cap
.state
&= ~ASSIGNED_DEVICE_MSI_ENABLED
;
1011 assigned_dev
->irq_entries_nr
= 1;
1013 assigned_irq_data
.guest_irq
= assigned_dev
->entry
->gsi
;
1014 assigned_irq_data
.flags
= KVM_DEV_IRQ_HOST_MSI
| KVM_DEV_IRQ_GUEST_MSI
;
1015 if (kvm_assign_irq(kvm_context
, &assigned_irq_data
) < 0)
1016 perror("assigned_dev_enable_msi: assign irq");
1018 assigned_dev
->irq_requested_type
= assigned_irq_data
.flags
;
1023 #ifdef KVM_CAP_DEVICE_MSIX
1024 static int assigned_dev_update_msix_mmio(PCIDevice
*pci_dev
)
1026 AssignedDevice
*adev
= container_of(pci_dev
, AssignedDevice
, dev
);
1027 uint16_t entries_nr
= 0, entries_max_nr
;
1028 int pos
= 0, i
, r
= 0;
1029 uint32_t msg_addr
, msg_upper_addr
, msg_data
, msg_ctrl
;
1030 struct kvm_assigned_msix_nr msix_nr
;
1031 struct kvm_assigned_msix_entry msix_entry
;
1032 void *va
= adev
->msix_table_page
;
1034 if (adev
->cap
.available
& ASSIGNED_DEVICE_CAP_MSI
)
1035 pos
= pci_dev
->cap
.start
+ PCI_CAPABILITY_CONFIG_MSI_LENGTH
;
1037 pos
= pci_dev
->cap
.start
;
1039 entries_max_nr
= pci_dev
->config
[pos
+ 2];
1040 entries_max_nr
&= PCI_MSIX_TABSIZE
;
1041 entries_max_nr
+= 1;
1043 /* Get the usable entry number for allocating */
1044 for (i
= 0; i
< entries_max_nr
; i
++) {
1045 memcpy(&msg_ctrl
, va
+ i
* 16 + 12, 4);
1046 memcpy(&msg_data
, va
+ i
* 16 + 8, 4);
1047 /* Ignore unused entry even it's unmasked */
1053 if (entries_nr
== 0) {
1054 fprintf(stderr
, "MSI-X entry number is zero!\n");
1057 msix_nr
.assigned_dev_id
= calc_assigned_dev_id(adev
->h_segnr
, adev
->h_busnr
,
1058 (uint8_t)adev
->h_devfn
);
1059 msix_nr
.entry_nr
= entries_nr
;
1060 r
= kvm_assign_set_msix_nr(kvm_context
, &msix_nr
);
1062 fprintf(stderr
, "fail to set MSI-X entry number for MSIX! %s\n",
1067 free_dev_irq_entries(adev
);
1068 adev
->irq_entries_nr
= entries_nr
;
1069 adev
->entry
= calloc(entries_nr
, sizeof(struct kvm_irq_routing_entry
));
1071 perror("assigned_dev_update_msix_mmio: ");
1075 msix_entry
.assigned_dev_id
= msix_nr
.assigned_dev_id
;
1077 for (i
= 0; i
< entries_max_nr
; i
++) {
1078 if (entries_nr
>= msix_nr
.entry_nr
)
1080 memcpy(&msg_ctrl
, va
+ i
* 16 + 12, 4);
1081 memcpy(&msg_data
, va
+ i
* 16 + 8, 4);
1085 memcpy(&msg_addr
, va
+ i
* 16, 4);
1086 memcpy(&msg_upper_addr
, va
+ i
* 16 + 4, 4);
1088 r
= kvm_get_irq_route_gsi(kvm_context
);
1092 adev
->entry
[entries_nr
].gsi
= r
;
1093 adev
->entry
[entries_nr
].type
= KVM_IRQ_ROUTING_MSI
;
1094 adev
->entry
[entries_nr
].flags
= 0;
1095 adev
->entry
[entries_nr
].u
.msi
.address_lo
= msg_addr
;
1096 adev
->entry
[entries_nr
].u
.msi
.address_hi
= msg_upper_addr
;
1097 adev
->entry
[entries_nr
].u
.msi
.data
= msg_data
;
1098 DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data
, msg_addr
);
1099 kvm_add_routing_entry(kvm_context
, &adev
->entry
[entries_nr
]);
1101 msix_entry
.gsi
= adev
->entry
[entries_nr
].gsi
;
1102 msix_entry
.entry
= i
;
1103 r
= kvm_assign_set_msix_entry(kvm_context
, &msix_entry
);
1105 fprintf(stderr
, "fail to set MSI-X entry! %s\n", strerror(-r
));
1108 DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
1109 msix_entry
.gsi
, msix_entry
.entry
);
1113 if (r
== 0 && kvm_commit_irq_routes(kvm_context
) < 0) {
1114 perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
1121 static void assigned_dev_update_msix(PCIDevice
*pci_dev
, unsigned int ctrl_pos
)
1123 struct kvm_assigned_irq assigned_irq_data
;
1124 AssignedDevice
*assigned_dev
= container_of(pci_dev
, AssignedDevice
, dev
);
1125 uint16_t *ctrl_word
= (uint16_t *)(pci_dev
->config
+ ctrl_pos
);
1128 memset(&assigned_irq_data
, 0, sizeof assigned_irq_data
);
1129 assigned_irq_data
.assigned_dev_id
=
1130 calc_assigned_dev_id(assigned_dev
->h_segnr
, assigned_dev
->h_busnr
,
1131 (uint8_t)assigned_dev
->h_devfn
);
1133 if (assigned_dev
->irq_requested_type
) {
1134 assigned_irq_data
.flags
= assigned_dev
->irq_requested_type
;
1135 free_dev_irq_entries(assigned_dev
);
1136 r
= kvm_deassign_irq(kvm_context
, &assigned_irq_data
);
1137 /* -ENXIO means no assigned irq */
1138 if (r
&& r
!= -ENXIO
)
1139 perror("assigned_dev_update_msix: deassign irq");
1141 assigned_irq_data
.flags
= KVM_DEV_IRQ_HOST_MSIX
| KVM_DEV_IRQ_GUEST_MSIX
;
1143 if (*ctrl_word
& PCI_MSIX_ENABLE
) {
1144 if (assigned_dev_update_msix_mmio(pci_dev
) < 0) {
1145 perror("assigned_dev_update_msix_mmio");
1148 if (kvm_assign_irq(kvm_context
, &assigned_irq_data
) < 0) {
1149 perror("assigned_dev_enable_msix: assign irq");
1152 assigned_dev
->irq_requested_type
= assigned_irq_data
.flags
;
1158 static void assigned_device_pci_cap_write_config(PCIDevice
*pci_dev
, uint32_t address
,
1159 uint32_t val
, int len
)
1161 AssignedDevice
*assigned_dev
= container_of(pci_dev
, AssignedDevice
, dev
);
1162 unsigned int pos
= pci_dev
->cap
.start
, ctrl_pos
;
1164 pci_default_cap_write_config(pci_dev
, address
, val
, len
);
1165 #ifdef KVM_CAP_IRQ_ROUTING
1166 #ifdef KVM_CAP_DEVICE_MSI
1167 if (assigned_dev
->cap
.available
& ASSIGNED_DEVICE_CAP_MSI
) {
1168 ctrl_pos
= pos
+ PCI_MSI_FLAGS
;
1169 if (address
<= ctrl_pos
&& address
+ len
> ctrl_pos
)
1170 assigned_dev_update_msi(pci_dev
, ctrl_pos
);
1171 pos
+= PCI_CAPABILITY_CONFIG_MSI_LENGTH
;
1174 #ifdef KVM_CAP_DEVICE_MSIX
1175 if (assigned_dev
->cap
.available
& ASSIGNED_DEVICE_CAP_MSIX
) {
1177 if (address
<= ctrl_pos
&& address
+ len
> ctrl_pos
) {
1178 ctrl_pos
--; /* control is word long */
1179 assigned_dev_update_msix(pci_dev
, ctrl_pos
);
1181 pos
+= PCI_CAPABILITY_CONFIG_MSIX_LENGTH
;
1188 static int assigned_device_pci_cap_init(PCIDevice
*pci_dev
)
1190 AssignedDevice
*dev
= container_of(pci_dev
, AssignedDevice
, dev
);
1191 PCIRegion
*pci_region
= dev
->real_device
.regions
;
1192 int next_cap_pt
= 0;
1194 pci_dev
->cap
.length
= 0;
1195 #ifdef KVM_CAP_IRQ_ROUTING
1196 #ifdef KVM_CAP_DEVICE_MSI
1197 /* Expose MSI capability
1198 * MSI capability is the 1st capability in capability config */
1199 if (pci_find_cap_offset(dev
->pdev
, PCI_CAP_ID_MSI
)) {
1200 dev
->cap
.available
|= ASSIGNED_DEVICE_CAP_MSI
;
1201 memset(&pci_dev
->config
[pci_dev
->cap
.start
+ pci_dev
->cap
.length
],
1202 0, PCI_CAPABILITY_CONFIG_MSI_LENGTH
);
1203 pci_dev
->config
[pci_dev
->cap
.start
+ pci_dev
->cap
.length
] =
1205 pci_dev
->cap
.length
+= PCI_CAPABILITY_CONFIG_MSI_LENGTH
;
1209 #ifdef KVM_CAP_DEVICE_MSIX
1210 /* Expose MSI-X capability */
1211 if (pci_find_cap_offset(dev
->pdev
, PCI_CAP_ID_MSIX
)) {
1212 int pos
, entry_nr
, bar_nr
;
1213 uint32_t msix_table_entry
;
1214 dev
->cap
.available
|= ASSIGNED_DEVICE_CAP_MSIX
;
1215 memset(&pci_dev
->config
[pci_dev
->cap
.start
+ pci_dev
->cap
.length
],
1216 0, PCI_CAPABILITY_CONFIG_MSIX_LENGTH
);
1217 pos
= pci_find_cap_offset(dev
->pdev
, PCI_CAP_ID_MSIX
);
1218 entry_nr
= pci_read_word(dev
->pdev
, pos
+ 2) & PCI_MSIX_TABSIZE
;
1219 pci_dev
->config
[pci_dev
->cap
.start
+ pci_dev
->cap
.length
] = 0x11;
1220 pci_dev
->config
[pci_dev
->cap
.start
+
1221 pci_dev
->cap
.length
+ 2] = entry_nr
;
1222 msix_table_entry
= pci_read_long(dev
->pdev
, pos
+ PCI_MSIX_TABLE
);
1223 *(uint32_t *)(pci_dev
->config
+ pci_dev
->cap
.start
+
1224 pci_dev
->cap
.length
+ PCI_MSIX_TABLE
) = msix_table_entry
;
1225 *(uint32_t *)(pci_dev
->config
+ pci_dev
->cap
.start
+
1226 pci_dev
->cap
.length
+ PCI_MSIX_PBA
) =
1227 pci_read_long(dev
->pdev
, pos
+ PCI_MSIX_PBA
);
1228 bar_nr
= msix_table_entry
& PCI_MSIX_BIR
;
1229 msix_table_entry
&= ~PCI_MSIX_BIR
;
1230 dev
->msix_table_addr
= pci_region
[bar_nr
].base_addr
+ msix_table_entry
;
1231 if (next_cap_pt
!= 0) {
1232 pci_dev
->config
[pci_dev
->cap
.start
+ next_cap_pt
] =
1233 pci_dev
->cap
.start
+ pci_dev
->cap
.length
;
1234 next_cap_pt
+= PCI_CAPABILITY_CONFIG_MSI_LENGTH
;
1237 pci_dev
->cap
.length
+= PCI_CAPABILITY_CONFIG_MSIX_LENGTH
;
1245 static uint32_t msix_mmio_readl(void *opaque
, target_phys_addr_t addr
)
1247 AssignedDevice
*adev
= opaque
;
1248 unsigned int offset
= addr
& 0xfff;
1249 void *page
= adev
->msix_table_page
;
1252 memcpy(&val
, (void *)((char *)page
+ offset
), 4);
1257 static uint32_t msix_mmio_readb(void *opaque
, target_phys_addr_t addr
)
1259 return ((msix_mmio_readl(opaque
, addr
& ~3)) >>
1260 (8 * (addr
& 3))) & 0xff;
1263 static uint32_t msix_mmio_readw(void *opaque
, target_phys_addr_t addr
)
1265 return ((msix_mmio_readl(opaque
, addr
& ~3)) >>
1266 (8 * (addr
& 3))) & 0xffff;
1269 static void msix_mmio_writel(void *opaque
,
1270 target_phys_addr_t addr
, uint32_t val
)
1272 AssignedDevice
*adev
= opaque
;
1273 unsigned int offset
= addr
& 0xfff;
1274 void *page
= adev
->msix_table_page
;
1276 DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
1278 memcpy((void *)((char *)page
+ offset
), &val
, 4);
1281 static void msix_mmio_writew(void *opaque
,
1282 target_phys_addr_t addr
, uint32_t val
)
1284 msix_mmio_writel(opaque
, addr
& ~3,
1285 (val
& 0xffff) << (8*(addr
& 3)));
1288 static void msix_mmio_writeb(void *opaque
,
1289 target_phys_addr_t addr
, uint32_t val
)
1291 msix_mmio_writel(opaque
, addr
& ~3,
1292 (val
& 0xff) << (8*(addr
& 3)));
1295 static CPUWriteMemoryFunc
*msix_mmio_write
[] = {
1296 msix_mmio_writeb
, msix_mmio_writew
, msix_mmio_writel
1299 static CPUReadMemoryFunc
*msix_mmio_read
[] = {
1300 msix_mmio_readb
, msix_mmio_readw
, msix_mmio_readl
1303 static int assigned_dev_register_msix_mmio(AssignedDevice
*dev
)
1305 dev
->msix_table_page
= mmap(NULL
, 0x1000,
1306 PROT_READ
|PROT_WRITE
,
1307 MAP_ANONYMOUS
|MAP_PRIVATE
, 0, 0);
1308 if (dev
->msix_table_page
== MAP_FAILED
) {
1309 fprintf(stderr
, "fail allocate msix_table_page! %s\n",
1313 memset(dev
->msix_table_page
, 0, 0x1000);
1314 dev
->mmio_index
= cpu_register_io_memory(
1315 msix_mmio_read
, msix_mmio_write
, dev
);
1319 static int assigned_initfn(struct PCIDevice
*pci_dev
)
1321 AssignedDevice
*dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1322 struct pci_access
*pacc
;
1323 uint8_t e_device
, e_intx
;
1326 if (!dev
->host
.seg
&& !dev
->host
.bus
&& !dev
->host
.dev
&& !dev
->host
.func
) {
1327 error_report("pci-assign: error: no host device specified");
1331 if (get_real_device(dev
, dev
->host
.seg
, dev
->host
.bus
,
1332 dev
->host
.dev
, dev
->host
.func
)) {
1333 error_report("pci-assign: Error: Couldn't get real device (%s)!",
1338 /* handle real device's MMIO/PIO BARs */
1339 if (assigned_dev_register_regions(dev
->real_device
.regions
,
1340 dev
->real_device
.region_number
,
1344 /* handle interrupt routing */
1345 e_device
= (dev
->dev
.devfn
>> 3) & 0x1f;
1346 e_intx
= dev
->dev
.config
[0x3d] - 1;
1347 dev
->intpin
= e_intx
;
1350 dev
->h_segnr
= dev
->host
.seg
;
1351 dev
->h_busnr
= dev
->host
.bus
;
1352 dev
->h_devfn
= PCI_DEVFN(dev
->host
.dev
, dev
->host
.func
);
1356 dev
->pdev
= pci_get_dev(pacc
, dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
1358 if (pci_enable_capability_support(pci_dev
, 0, NULL
,
1359 assigned_device_pci_cap_write_config
,
1360 assigned_device_pci_cap_init
) < 0)
1363 /* assign device to guest */
1364 r
= assign_device(dev
);
1368 /* assign irq for the device */
1369 r
= assign_irq(dev
);
1373 /* intercept MSI-X entry page in the MMIO */
1374 if (dev
->cap
.available
& ASSIGNED_DEVICE_CAP_MSIX
)
1375 if (assigned_dev_register_msix_mmio(dev
))
1378 assigned_dev_load_option_rom(dev
);
1379 QLIST_INSERT_HEAD(&devs
, dev
, next
);
1383 deassign_device(dev
);
1385 free_assigned_device(dev
);
1389 static int assigned_exitfn(struct PCIDevice
*pci_dev
)
1391 AssignedDevice
*dev
= DO_UPCAST(AssignedDevice
, dev
, pci_dev
);
1393 QLIST_REMOVE(dev
, next
);
1394 deassign_device(dev
);
1395 free_assigned_device(dev
);
1399 static int parse_hostaddr(DeviceState
*dev
, Property
*prop
, const char *str
)
1401 PCIHostDevice
*ptr
= qdev_get_prop_ptr(dev
, prop
);
1404 rc
= pci_parse_host_devaddr(str
, &ptr
->seg
, &ptr
->bus
, &ptr
->dev
, &ptr
->func
);
1410 static int print_hostaddr(DeviceState
*dev
, Property
*prop
, char *dest
, size_t len
)
1412 PCIHostDevice
*ptr
= qdev_get_prop_ptr(dev
, prop
);
1414 return snprintf(dest
, len
, "%02x:%02x.%x", ptr
->bus
, ptr
->dev
, ptr
->func
);
1417 PropertyInfo qdev_prop_hostaddr
= {
1418 .name
= "pci-hostaddr",
1420 .size
= sizeof(PCIHostDevice
),
1421 .parse
= parse_hostaddr
,
1422 .print
= print_hostaddr
,
1425 static PCIDeviceInfo assign_info
= {
1426 .qdev
.name
= "pci-assign",
1427 .qdev
.desc
= "pass through host pci devices to the guest",
1428 .qdev
.size
= sizeof(AssignedDevice
),
1429 .init
= assigned_initfn
,
1430 .exit
= assigned_exitfn
,
1431 .config_read
= assigned_dev_pci_read_config
,
1432 .config_write
= assigned_dev_pci_write_config
,
1433 .qdev
.props
= (Property
[]) {
1434 DEFINE_PROP("host", AssignedDevice
, host
, qdev_prop_hostaddr
, PCIHostDevice
),
1435 DEFINE_PROP_UINT32("iommu", AssignedDevice
, use_iommu
, 1),
1436 DEFINE_PROP_END_OF_LIST(),
1440 static void assign_register_devices(void)
1442 pci_qdev_register(&assign_info
);
1445 device_init(assign_register_devices
)
1449 * Syntax to assign device:
1451 * -pcidevice host=bus:dev.func[,dma=none][,name=Foo]
1454 * -pcidevice host=00:13.0,dma=pvdma
1456 * dma can currently only be 'none' to disable iommu support.
1458 QemuOpts
*add_assigned_device(const char *arg
)
1460 QemuOpts
*opts
= NULL
;
1461 char host
[64], id
[64], dma
[8];
1464 r
= get_param_value(host
, sizeof(host
), "host", arg
);
1467 r
= get_param_value(id
, sizeof(id
), "id", arg
);
1469 r
= get_param_value(id
, sizeof(id
), "name", arg
);
1471 r
= get_param_value(id
, sizeof(id
), "host", arg
);
1473 opts
= qemu_opts_create(&qemu_device_opts
, id
, 0);
1476 qemu_opt_set(opts
, "driver", "pci-assign");
1477 qemu_opt_set(opts
, "host", host
);
1479 #ifdef KVM_CAP_IOMMU
1480 r
= get_param_value(dma
, sizeof(dma
), "dma", arg
);
1481 if (r
&& !strncmp(dma
, "none", 4))
1482 qemu_opt_set(opts
, "iommu", "0");
1484 qemu_opts_print(opts
, NULL
);
1488 fprintf(stderr
, "pcidevice argument parse error; "
1489 "please check the help text for usage\n");
1491 qemu_opts_del(opts
);
1495 void add_assigned_devices(PCIBus
*bus
, const char **devices
, int n_devices
)
1500 for (i
= 0; i
< n_devices
; i
++) {
1501 opts
= add_assigned_device(devices
[i
]);
1503 fprintf(stderr
, "Could not add assigned device %s\n", devices
[i
]);
1506 /* generic code will call qdev_device_add() for the device */
1511 * Scan the assigned devices for the devices that have an option ROM, and then
1512 * load the corresponding ROM data to RAM. If an error occurs while loading an
1513 * option ROM, we just ignore that option ROM and continue with the next one.
1515 static void assigned_dev_load_option_rom(AssignedDevice
*dev
)
1523 snprintf(rom_file
, sizeof(rom_file
),
1524 "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
1525 dev
->host
.seg
, dev
->host
.bus
, dev
->host
.dev
, dev
->host
.func
);
1527 if (access(rom_file
, F_OK
))
1530 /* Write something to the ROM file to enable it */
1531 fp
= fopen(rom_file
, "wb");
1534 len
= fwrite(&i
, 1, 1, fp
);
1539 /* The file has to be closed and reopened, otherwise it won't work */
1540 fp
= fopen(rom_file
, "rb");
1544 fseek(fp
, 0, SEEK_END
);
1546 fseek(fp
, 0, SEEK_SET
);
1554 ret
= fread(buf
, size
, 1, fp
);
1555 if (!feof(fp
) || ferror(fp
) || ret
!= 1) {
1562 /* Copy ROM contents into the space backing the ROM BAR */
1563 if (dev
->v_addrs
[PCI_ROM_SLOT
].r_size
>= size
&&
1564 dev
->v_addrs
[PCI_ROM_SLOT
].u
.r_virtbase
) {
1565 mprotect(dev
->v_addrs
[PCI_ROM_SLOT
].u
.r_virtbase
,
1566 size
, PROT_READ
| PROT_WRITE
);
1567 memcpy(dev
->v_addrs
[PCI_ROM_SLOT
].u
.r_virtbase
,
1569 mprotect(dev
->v_addrs
[PCI_ROM_SLOT
].u
.r_virtbase
,