device-assignment: use stdint types
[qemu-kvm/amd-iommu.git] / hw / device-assignment.c
blobfd09ec309f2663060a47a5b4aa46b4d590f57b16
1 /*
2 * Copyright (c) 2007, Neocleus Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 * Assign a PCI device from the host to a guest VM.
20 * Adapted for KVM by Qumranet.
22 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
23 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
24 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
25 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
26 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <sys/io.h>
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include "qemu-kvm.h"
34 #include "hw.h"
35 #include "pc.h"
36 #include "qemu-error.h"
37 #include "console.h"
38 #include "device-assignment.h"
39 #include "loader.h"
40 #include <pci/pci.h>
42 /* From linux/ioport.h */
43 #define IORESOURCE_IO 0x00000100 /* Resource type */
44 #define IORESOURCE_MEM 0x00000200
45 #define IORESOURCE_IRQ 0x00000400
46 #define IORESOURCE_DMA 0x00000800
47 #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
49 /* #define DEVICE_ASSIGNMENT_DEBUG 1 */
51 #ifdef DEVICE_ASSIGNMENT_DEBUG
52 #define DEBUG(fmt, ...) \
53 do { \
54 fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
55 } while (0)
56 #else
57 #define DEBUG(fmt, ...) do { } while(0)
58 #endif
60 static void assigned_dev_load_option_rom(AssignedDevice *dev);
62 static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
64 return region->u.r_baseport + (addr - region->e_physbase);
67 static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
68 uint32_t value)
70 AssignedDevRegion *r_access = opaque;
71 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
73 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
74 r_pio, (int)r_access->e_physbase,
75 (unsigned long)r_access->u.r_baseport, value);
77 outb(value, r_pio);
80 static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
81 uint32_t value)
83 AssignedDevRegion *r_access = opaque;
84 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
86 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
87 r_pio, (int)r_access->e_physbase,
88 (unsigned long)r_access->u.r_baseport, value);
90 outw(value, r_pio);
93 static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
94 uint32_t value)
96 AssignedDevRegion *r_access = opaque;
97 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
99 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
100 r_pio, (int)r_access->e_physbase,
101 (unsigned long)r_access->u.r_baseport, value);
103 outl(value, r_pio);
106 static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
108 AssignedDevRegion *r_access = opaque;
109 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
110 uint32_t value;
112 value = inb(r_pio);
114 DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
115 r_pio, (int)r_access->e_physbase,
116 (unsigned long)r_access->u.r_baseport, value);
118 return value;
121 static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
123 AssignedDevRegion *r_access = opaque;
124 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
125 uint32_t value;
127 value = inw(r_pio);
129 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
130 r_pio, (int)r_access->e_physbase,
131 (unsigned long)r_access->u.r_baseport, value);
133 return value;
136 static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
138 AssignedDevRegion *r_access = opaque;
139 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
140 uint32_t value;
142 value = inl(r_pio);
144 DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
145 r_pio, (int)r_access->e_physbase,
146 (unsigned long)r_access->u.r_baseport, value);
148 return value;
151 static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
153 AssignedDevRegion *d = opaque;
154 uint8_t *in = d->u.r_virtbase + addr;
155 uint32_t r;
157 r = *in;
158 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
160 return r;
163 static uint32_t slow_bar_readw(void *opaque, target_phys_addr_t addr)
165 AssignedDevRegion *d = opaque;
166 uint16_t *in = d->u.r_virtbase + addr;
167 uint32_t r;
169 r = *in;
170 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
172 return r;
175 static uint32_t slow_bar_readl(void *opaque, target_phys_addr_t addr)
177 AssignedDevRegion *d = opaque;
178 uint32_t *in = d->u.r_virtbase + addr;
179 uint32_t r;
181 r = *in;
182 DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
184 return r;
187 static void slow_bar_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
189 AssignedDevRegion *d = opaque;
190 uint8_t *out = d->u.r_virtbase + addr;
192 DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val);
193 *out = val;
196 static void slow_bar_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
198 AssignedDevRegion *d = opaque;
199 uint16_t *out = d->u.r_virtbase + addr;
201 DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val);
202 *out = val;
205 static void slow_bar_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
207 AssignedDevRegion *d = opaque;
208 uint32_t *out = d->u.r_virtbase + addr;
210 DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val);
211 *out = val;
214 static CPUWriteMemoryFunc * const slow_bar_write[] = {
215 &slow_bar_writeb,
216 &slow_bar_writew,
217 &slow_bar_writel
220 static CPUReadMemoryFunc * const slow_bar_read[] = {
221 &slow_bar_readb,
222 &slow_bar_readw,
223 &slow_bar_readl
226 static void assigned_dev_iomem_map_slow(PCIDevice *pci_dev, int region_num,
227 pcibus_t e_phys, pcibus_t e_size,
228 int type)
230 AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
231 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
232 PCIRegion *real_region = &r_dev->real_device.regions[region_num];
233 int m;
235 DEBUG("%s", "slow map\n");
236 if (region_num == PCI_ROM_SLOT)
237 m = cpu_register_io_memory(slow_bar_read, NULL, region);
238 else
239 m = cpu_register_io_memory(slow_bar_read, slow_bar_write, region);
240 cpu_register_physical_memory(e_phys, e_size, m);
242 /* MSI-X MMIO page */
243 if ((e_size > 0) &&
244 real_region->base_addr <= r_dev->msix_table_addr &&
245 real_region->base_addr + real_region->size >= r_dev->msix_table_addr) {
246 int offset = r_dev->msix_table_addr - real_region->base_addr;
248 cpu_register_physical_memory(e_phys + offset,
249 TARGET_PAGE_SIZE, r_dev->mmio_index);
253 static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
254 pcibus_t e_phys, pcibus_t e_size, int type)
256 AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
257 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
258 PCIRegion *real_region = &r_dev->real_device.regions[region_num];
259 int ret = 0, flags = 0;
261 DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n",
262 e_phys, region->u.r_virtbase, type, e_size, region_num);
264 region->e_physbase = e_phys;
265 region->e_size = e_size;
267 if (e_size > 0) {
269 if (region_num == PCI_ROM_SLOT)
270 flags |= IO_MEM_ROM;
272 cpu_register_physical_memory(e_phys, e_size, region->memory_index | flags);
274 /* deal with MSI-X MMIO page */
275 if (real_region->base_addr <= r_dev->msix_table_addr &&
276 real_region->base_addr + real_region->size >=
277 r_dev->msix_table_addr) {
278 int offset = r_dev->msix_table_addr - real_region->base_addr;
280 cpu_register_physical_memory(e_phys + offset,
281 TARGET_PAGE_SIZE, r_dev->mmio_index);
285 if (ret != 0) {
286 fprintf(stderr, "%s: Error: create new mapping failed\n", __func__);
287 exit(1);
291 static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
292 pcibus_t addr, pcibus_t size, int type)
294 AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
295 AssignedDevRegion *region = &r_dev->v_addrs[region_num];
296 int first_map = (region->e_size == 0);
297 CPUState *env;
299 region->e_physbase = addr;
300 region->e_size = size;
302 DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n",
303 addr, region->u.r_baseport, type, size, region_num);
305 if (first_map) {
306 struct ioperm_data *data;
308 data = qemu_mallocz(sizeof(struct ioperm_data));
309 if (data == NULL) {
310 fprintf(stderr, "%s: Out of memory\n", __func__);
311 exit(1);
314 data->start_port = region->u.r_baseport;
315 data->num = region->r_size;
316 data->turn_on = 1;
318 kvm_add_ioperm_data(data);
320 for (env = first_cpu; env; env = env->next_cpu)
321 kvm_ioperm(env, data);
324 register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,
325 (r_dev->v_addrs + region_num));
326 register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
327 (r_dev->v_addrs + region_num));
328 register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
329 (r_dev->v_addrs + region_num));
330 register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
331 (r_dev->v_addrs + region_num));
332 register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
333 (r_dev->v_addrs + region_num));
334 register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
335 (r_dev->v_addrs + region_num));
338 static uint8_t pci_find_cap_offset(struct pci_dev *pci_dev, uint8_t cap)
340 int id;
341 int max_cap = 48;
342 int pos = PCI_CAPABILITY_LIST;
343 int status;
345 status = pci_read_byte(pci_dev, PCI_STATUS);
346 if ((status & PCI_STATUS_CAP_LIST) == 0)
347 return 0;
349 while (max_cap--) {
350 pos = pci_read_byte(pci_dev, pos);
351 if (pos < 0x40)
352 break;
354 pos &= ~3;
355 id = pci_read_byte(pci_dev, pos + PCI_CAP_LIST_ID);
357 if (id == 0xff)
358 break;
359 if (id == cap)
360 return pos;
362 pos += PCI_CAP_LIST_NEXT;
364 return 0;
367 static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
368 uint32_t val, int len)
370 int fd;
371 ssize_t ret;
372 AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
374 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
375 ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
376 (uint16_t) address, val, len);
378 if (address == 0x4) {
379 pci_default_write_config(d, address, val, len);
380 /* Continue to program the card */
383 if ((address >= 0x10 && address <= 0x24) || address == 0x30 ||
384 address == 0x34 || address == 0x3c || address == 0x3d ||
385 pci_access_cap_config(d, address, len)) {
386 /* used for update-mappings (BAR emulation) */
387 pci_default_write_config(d, address, val, len);
388 return;
391 DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
392 ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
393 (uint16_t) address, val, len);
395 fd = pci_dev->real_device.config_fd;
397 again:
398 ret = pwrite(fd, &val, len, address);
399 if (ret != len) {
400 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
401 goto again;
403 fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
404 __func__, ret, errno);
406 exit(1);
410 static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
411 int len)
413 uint32_t val = 0;
414 int fd;
415 ssize_t ret;
416 AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
418 if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) ||
419 (address >= 0x10 && address <= 0x24) || address == 0x30 ||
420 address == 0x34 || address == 0x3c || address == 0x3d ||
421 pci_access_cap_config(d, address, len)) {
422 val = pci_default_read_config(d, address, len);
423 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
424 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
425 return val;
428 /* vga specific, remove later */
429 if (address == 0xFC)
430 goto do_log;
432 fd = pci_dev->real_device.config_fd;
434 again:
435 ret = pread(fd, &val, len, address);
436 if (ret != len) {
437 if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
438 goto again;
440 fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
441 __func__, ret, errno);
443 exit(1);
446 do_log:
447 DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
448 (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
450 if (!pci_dev->cap.available) {
451 /* kill the special capabilities */
452 if (address == 4 && len == 4)
453 val &= ~0x100000;
454 else if (address == 6)
455 val &= ~0x10;
458 return val;
461 static int assigned_dev_register_regions(PCIRegion *io_regions,
462 unsigned long regions_num,
463 AssignedDevice *pci_dev)
465 uint32_t i;
466 PCIRegion *cur_region = io_regions;
468 for (i = 0; i < regions_num; i++, cur_region++) {
469 if (!cur_region->valid)
470 continue;
471 pci_dev->v_addrs[i].num = i;
473 /* handle memory io regions */
474 if (cur_region->type & IORESOURCE_MEM) {
475 int slow_map = 0;
476 int t = cur_region->type & IORESOURCE_PREFETCH
477 ? PCI_BASE_ADDRESS_MEM_PREFETCH
478 : PCI_BASE_ADDRESS_SPACE_MEMORY;
480 if (cur_region->size & 0xFFF) {
481 if (i != PCI_ROM_SLOT) {
482 fprintf(stderr, "PCI region %d at address 0x%llx "
483 "has size 0x%x, which is not a multiple of 4K. "
484 "You might experience some performance hit "
485 "due to that.\n",
486 i, (unsigned long long)cur_region->base_addr,
487 cur_region->size);
489 slow_map = 1;
492 /* map physical memory */
493 pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
494 if (i == PCI_ROM_SLOT) {
495 pci_dev->v_addrs[i].u.r_virtbase =
496 mmap(NULL,
497 cur_region->size,
498 PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE,
499 0, (off_t) 0);
501 } else {
502 pci_dev->v_addrs[i].u.r_virtbase =
503 mmap(NULL,
504 cur_region->size,
505 PROT_WRITE | PROT_READ, MAP_SHARED,
506 cur_region->resource_fd, (off_t) 0);
509 if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) {
510 pci_dev->v_addrs[i].u.r_virtbase = NULL;
511 fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
512 "\n", __func__,
513 (uint32_t) (cur_region->base_addr));
514 return -1;
517 if (i == PCI_ROM_SLOT) {
518 memset(pci_dev->v_addrs[i].u.r_virtbase, 0,
519 (cur_region->size + 0xFFF) & 0xFFFFF000);
520 mprotect(pci_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
521 (cur_region->size + 0xFFF) & 0xFFFFF000, PROT_READ);
524 pci_dev->v_addrs[i].r_size = cur_region->size;
525 pci_dev->v_addrs[i].e_size = 0;
527 /* add offset */
528 pci_dev->v_addrs[i].u.r_virtbase +=
529 (cur_region->base_addr & 0xFFF);
532 if (!slow_map) {
533 void *virtbase = pci_dev->v_addrs[i].u.r_virtbase;
535 pci_dev->v_addrs[i].memory_index = qemu_ram_map(cur_region->size,
536 virtbase);
537 } else
538 pci_dev->v_addrs[i].memory_index = 0;
540 pci_register_bar((PCIDevice *) pci_dev, i,
541 cur_region->size, t,
542 slow_map ? assigned_dev_iomem_map_slow
543 : assigned_dev_iomem_map);
544 continue;
546 /* handle port io regions */
547 pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
548 pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
549 pci_dev->v_addrs[i].r_size = cur_region->size;
550 pci_dev->v_addrs[i].e_size = 0;
552 pci_register_bar((PCIDevice *) pci_dev, i,
553 cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
554 assigned_dev_ioport_map);
556 /* not relevant for port io */
557 pci_dev->v_addrs[i].memory_index = 0;
560 /* success */
561 return 0;
564 static int get_real_id(const char *devpath, const char *idname, uint16_t *val)
566 FILE *f;
567 char name[128];
568 long id;
570 snprintf(name, sizeof(name), "%s%s", devpath, idname);
571 f = fopen(name, "r");
572 if (f == NULL) {
573 fprintf(stderr, "%s: %s: %m\n", __func__, name);
574 return -1;
576 if (fscanf(f, "%li\n", &id) == 1) {
577 *val = id;
578 } else {
579 return -1;
581 fclose(f);
583 return 0;
586 static int get_real_vendor_id(const char *devpath, uint16_t *val)
588 return get_real_id(devpath, "vendor", val);
591 static int get_real_device_id(const char *devpath, uint16_t *val)
593 return get_real_id(devpath, "device", val);
596 static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg,
597 uint8_t r_bus, uint8_t r_dev, uint8_t r_func)
599 char dir[128], name[128];
600 int fd, r = 0, v;
601 FILE *f;
602 unsigned long long start, end, size, flags;
603 uint16_t id;
604 struct stat statbuf;
605 PCIRegion *rp;
606 PCIDevRegions *dev = &pci_dev->real_device;
608 dev->region_number = 0;
610 snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
611 r_seg, r_bus, r_dev, r_func);
613 snprintf(name, sizeof(name), "%sconfig", dir);
615 fd = open(name, O_RDWR);
616 if (fd == -1) {
617 fprintf(stderr, "%s: %s: %m\n", __func__, name);
618 return 1;
620 dev->config_fd = fd;
621 again:
622 r = read(fd, pci_dev->dev.config, pci_config_size(&pci_dev->dev));
623 if (r < 0) {
624 if (errno == EINTR || errno == EAGAIN)
625 goto again;
626 fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
629 snprintf(name, sizeof(name), "%sresource", dir);
631 f = fopen(name, "r");
632 if (f == NULL) {
633 fprintf(stderr, "%s: %s: %m\n", __func__, name);
634 return 1;
637 for (r = 0; r < PCI_NUM_REGIONS; r++) {
638 if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3)
639 break;
641 rp = dev->regions + r;
642 rp->valid = 0;
643 size = end - start + 1;
644 flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
645 if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
646 continue;
647 if (flags & IORESOURCE_MEM) {
648 flags &= ~IORESOURCE_IO;
649 if (r != PCI_ROM_SLOT) {
650 snprintf(name, sizeof(name), "%sresource%d", dir, r);
651 fd = open(name, O_RDWR);
652 if (fd == -1)
653 continue;
654 rp->resource_fd = fd;
656 } else
657 flags &= ~IORESOURCE_PREFETCH;
659 rp->type = flags;
660 rp->valid = 1;
661 rp->base_addr = start;
662 rp->size = size;
663 DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
664 r, rp->size, start, rp->type, rp->resource_fd);
667 fclose(f);
669 /* read and fill vendor ID */
670 v = get_real_vendor_id(dir, &id);
671 if (v) {
672 return 1;
674 pci_dev->dev.config[0] = id & 0xff;
675 pci_dev->dev.config[1] = (id & 0xff00) >> 8;
677 /* read and fill device ID */
678 v = get_real_device_id(dir, &id);
679 if (v) {
680 return 1;
682 pci_dev->dev.config[2] = id & 0xff;
683 pci_dev->dev.config[3] = (id & 0xff00) >> 8;
685 /* dealing with virtual function device */
686 snprintf(name, sizeof(name), "%sphysfn/", dir);
687 if (!stat(name, &statbuf))
688 pci_dev->need_emulate_cmd = 1;
689 else
690 pci_dev->need_emulate_cmd = 0;
692 dev->region_number = r;
693 return 0;
696 static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs);
698 #ifdef KVM_CAP_IRQ_ROUTING
699 static void free_dev_irq_entries(AssignedDevice *dev)
701 int i;
703 for (i = 0; i < dev->irq_entries_nr; i++)
704 kvm_del_routing_entry(kvm_context, &dev->entry[i]);
705 free(dev->entry);
706 dev->entry = NULL;
707 dev->irq_entries_nr = 0;
709 #endif
711 static void free_assigned_device(AssignedDevice *dev)
713 if (dev) {
714 int i;
716 for (i = 0; i < dev->real_device.region_number; i++) {
717 PCIRegion *pci_region = &dev->real_device.regions[i];
718 AssignedDevRegion *region = &dev->v_addrs[i];
720 if (!pci_region->valid)
721 continue;
723 if (pci_region->type & IORESOURCE_IO) {
724 kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
725 continue;
726 } else if (pci_region->type & IORESOURCE_MEM) {
727 if (region->u.r_virtbase) {
728 int ret = munmap(region->u.r_virtbase,
729 (pci_region->size + 0xFFF) & 0xFFFFF000);
730 if (ret != 0)
731 fprintf(stderr,
732 "Failed to unmap assigned device region: %s\n",
733 strerror(errno));
738 if (dev->real_device.config_fd) {
739 close(dev->real_device.config_fd);
740 dev->real_device.config_fd = 0;
743 #ifdef KVM_CAP_IRQ_ROUTING
744 free_dev_irq_entries(dev);
745 #endif
749 static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
751 return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
754 static void assign_failed_examine(AssignedDevice *dev)
756 char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
757 uint16_t vendor_id, device_id;
758 int r;
760 sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
761 dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
763 sprintf(name, "%sdriver", dir);
765 r = readlink(name, driver, sizeof(driver));
766 if ((r <= 0) || r >= sizeof(driver) || !(ns = strrchr(driver, '/'))) {
767 goto fail;
770 ns++;
772 if (get_real_vendor_id(dir, &vendor_id) ||
773 get_real_device_id(dir, &device_id)) {
774 goto fail;
777 fprintf(stderr, "*** The driver '%s' is occupying your device "
778 "%04x:%02x:%02x.%x.\n",
779 ns, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
780 fprintf(stderr, "***\n");
781 fprintf(stderr, "*** You can try the following commands to free it:\n");
782 fprintf(stderr, "***\n");
783 fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
784 "new_id\n", vendor_id, device_id);
785 fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
786 "%s/unbind\n",
787 dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func, ns);
788 fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
789 "pci-stub/bind\n",
790 dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
791 fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
792 "/remove_id\n", vendor_id, device_id);
793 fprintf(stderr, "***\n");
795 return;
797 fail:
798 fprintf(stderr, "Couldn't find out why.\n");
801 static int assign_device(AssignedDevice *dev)
803 struct kvm_assigned_pci_dev assigned_dev_data;
804 int r;
806 #ifdef KVM_CAP_PCI_SEGMENT
807 /* Only pass non-zero PCI segment to capable module */
808 if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) &&
809 dev->h_segnr) {
810 fprintf(stderr, "Can't assign device inside non-zero PCI segment "
811 "as this KVM module doesn't support it.\n");
812 return -ENODEV;
814 #endif
816 memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
817 assigned_dev_data.assigned_dev_id =
818 calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
819 #ifdef KVM_CAP_PCI_SEGMENT
820 assigned_dev_data.segnr = dev->h_segnr;
821 #endif
822 assigned_dev_data.busnr = dev->h_busnr;
823 assigned_dev_data.devfn = dev->h_devfn;
825 #ifdef KVM_CAP_IOMMU
826 /* We always enable the IOMMU unless disabled on the command line */
827 if (dev->use_iommu) {
828 if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) {
829 fprintf(stderr, "No IOMMU found. Unable to assign device \"%s\"\n",
830 dev->dev.qdev.id);
831 return -ENODEV;
833 assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
835 #else
836 dev->use_iommu = 0;
837 #endif
839 r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
840 if (r < 0) {
841 fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
842 dev->dev.qdev.id, strerror(-r));
844 switch (r) {
845 case -EBUSY:
846 assign_failed_examine(dev);
847 break;
848 default:
849 break;
852 return r;
855 static int assign_irq(AssignedDevice *dev)
857 struct kvm_assigned_irq assigned_irq_data;
858 int irq, r = 0;
860 /* Interrupt PIN 0 means don't use INTx */
861 if (pci_read_byte(dev->pdev, PCI_INTERRUPT_PIN) == 0)
862 return 0;
864 irq = pci_map_irq(&dev->dev, dev->intpin);
865 irq = piix_get_irq(irq);
867 #ifdef TARGET_IA64
868 irq = ipf_map_irq(&dev->dev, irq);
869 #endif
871 if (dev->girq == irq)
872 return r;
874 memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
875 assigned_irq_data.assigned_dev_id =
876 calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
877 assigned_irq_data.guest_irq = irq;
878 assigned_irq_data.host_irq = dev->real_device.irq;
879 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
880 if (dev->irq_requested_type) {
881 assigned_irq_data.flags = dev->irq_requested_type;
882 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
883 /* -ENXIO means no assigned irq */
884 if (r && r != -ENXIO)
885 perror("assign_irq: deassign");
888 assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX;
889 if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
890 assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_MSI;
891 else
892 assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_INTX;
893 #endif
895 r = kvm_assign_irq(kvm_context, &assigned_irq_data);
896 if (r < 0) {
897 fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
898 dev->dev.qdev.id, strerror(-r));
899 fprintf(stderr, "Perhaps you are assigning a device "
900 "that shares an IRQ with another device?\n");
901 return r;
904 dev->girq = irq;
905 dev->irq_requested_type = assigned_irq_data.flags;
906 return r;
909 static void deassign_device(AssignedDevice *dev)
911 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
912 struct kvm_assigned_pci_dev assigned_dev_data;
913 int r;
915 memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
916 assigned_dev_data.assigned_dev_id =
917 calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
919 r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data);
920 if (r < 0)
921 fprintf(stderr, "Failed to deassign device \"%s\" : %s\n",
922 dev->dev.qdev.id, strerror(-r));
923 #endif
926 #if 0
927 AssignedDevInfo *get_assigned_device(int pcibus, int slot)
929 AssignedDevice *assigned_dev = NULL;
930 AssignedDevInfo *adev = NULL;
932 QLIST_FOREACH(adev, &adev_head, next) {
933 assigned_dev = adev->assigned_dev;
934 if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
935 PCI_SLOT(assigned_dev->dev.devfn) == slot)
936 return adev;
939 return NULL;
941 #endif
943 /* The pci config space got updated. Check if irq numbers have changed
944 * for our devices
946 void assigned_dev_update_irqs(void)
948 AssignedDevice *dev, *next;
949 int r;
951 dev = QLIST_FIRST(&devs);
952 while (dev) {
953 next = QLIST_NEXT(dev, next);
954 r = assign_irq(dev);
955 if (r < 0)
956 qdev_unplug(&dev->dev.qdev);
957 dev = next;
961 #ifdef KVM_CAP_IRQ_ROUTING
963 #ifdef KVM_CAP_DEVICE_MSI
964 static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
966 struct kvm_assigned_irq assigned_irq_data;
967 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
968 uint8_t ctrl_byte = pci_dev->config[ctrl_pos];
969 int r;
971 memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
972 assigned_irq_data.assigned_dev_id =
973 calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
974 (uint8_t)assigned_dev->h_devfn);
976 if (assigned_dev->irq_requested_type) {
977 assigned_irq_data.flags = assigned_dev->irq_requested_type;
978 free_dev_irq_entries(assigned_dev);
979 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
980 /* -ENXIO means no assigned irq */
981 if (r && r != -ENXIO)
982 perror("assigned_dev_update_msi: deassign irq");
985 if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
986 assigned_dev->entry = calloc(1, sizeof(struct kvm_irq_routing_entry));
987 if (!assigned_dev->entry) {
988 perror("assigned_dev_update_msi: ");
989 return;
991 assigned_dev->entry->u.msi.address_lo =
992 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
993 PCI_MSI_ADDRESS_LO);
994 assigned_dev->entry->u.msi.address_hi = 0;
995 assigned_dev->entry->u.msi.data = *(uint16_t *)(pci_dev->config +
996 pci_dev->cap.start + PCI_MSI_DATA_32);
997 assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
998 r = kvm_get_irq_route_gsi(kvm_context);
999 if (r < 0) {
1000 perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
1001 return;
1003 assigned_dev->entry->gsi = r;
1005 kvm_add_routing_entry(kvm_context, assigned_dev->entry);
1006 if (kvm_commit_irq_routes(kvm_context) < 0) {
1007 perror("assigned_dev_update_msi: kvm_commit_irq_routes");
1008 assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED;
1009 return;
1011 assigned_dev->irq_entries_nr = 1;
1013 assigned_irq_data.guest_irq = assigned_dev->entry->gsi;
1014 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI;
1015 if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0)
1016 perror("assigned_dev_enable_msi: assign irq");
1018 assigned_dev->irq_requested_type = assigned_irq_data.flags;
1021 #endif
1023 #ifdef KVM_CAP_DEVICE_MSIX
1024 static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
1026 AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
1027 uint16_t entries_nr = 0, entries_max_nr;
1028 int pos = 0, i, r = 0;
1029 uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
1030 struct kvm_assigned_msix_nr msix_nr;
1031 struct kvm_assigned_msix_entry msix_entry;
1032 void *va = adev->msix_table_page;
1034 if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
1035 pos = pci_dev->cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH;
1036 else
1037 pos = pci_dev->cap.start;
1039 entries_max_nr = pci_dev->config[pos + 2];
1040 entries_max_nr &= PCI_MSIX_TABSIZE;
1041 entries_max_nr += 1;
1043 /* Get the usable entry number for allocating */
1044 for (i = 0; i < entries_max_nr; i++) {
1045 memcpy(&msg_ctrl, va + i * 16 + 12, 4);
1046 memcpy(&msg_data, va + i * 16 + 8, 4);
1047 /* Ignore unused entry even it's unmasked */
1048 if (msg_data == 0)
1049 continue;
1050 entries_nr ++;
1053 if (entries_nr == 0) {
1054 fprintf(stderr, "MSI-X entry number is zero!\n");
1055 return -EINVAL;
1057 msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
1058 (uint8_t)adev->h_devfn);
1059 msix_nr.entry_nr = entries_nr;
1060 r = kvm_assign_set_msix_nr(kvm_context, &msix_nr);
1061 if (r != 0) {
1062 fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
1063 strerror(-r));
1064 return r;
1067 free_dev_irq_entries(adev);
1068 adev->irq_entries_nr = entries_nr;
1069 adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
1070 if (!adev->entry) {
1071 perror("assigned_dev_update_msix_mmio: ");
1072 return -errno;
1075 msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
1076 entries_nr = 0;
1077 for (i = 0; i < entries_max_nr; i++) {
1078 if (entries_nr >= msix_nr.entry_nr)
1079 break;
1080 memcpy(&msg_ctrl, va + i * 16 + 12, 4);
1081 memcpy(&msg_data, va + i * 16 + 8, 4);
1082 if (msg_data == 0)
1083 continue;
1085 memcpy(&msg_addr, va + i * 16, 4);
1086 memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
1088 r = kvm_get_irq_route_gsi(kvm_context);
1089 if (r < 0)
1090 return r;
1092 adev->entry[entries_nr].gsi = r;
1093 adev->entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
1094 adev->entry[entries_nr].flags = 0;
1095 adev->entry[entries_nr].u.msi.address_lo = msg_addr;
1096 adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
1097 adev->entry[entries_nr].u.msi.data = msg_data;
1098 DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
1099 kvm_add_routing_entry(kvm_context, &adev->entry[entries_nr]);
1101 msix_entry.gsi = adev->entry[entries_nr].gsi;
1102 msix_entry.entry = i;
1103 r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
1104 if (r) {
1105 fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
1106 break;
1108 DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
1109 msix_entry.gsi, msix_entry.entry);
1110 entries_nr ++;
1113 if (r == 0 && kvm_commit_irq_routes(kvm_context) < 0) {
1114 perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
1115 return -EINVAL;
1118 return r;
1121 static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
1123 struct kvm_assigned_irq assigned_irq_data;
1124 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
1125 uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
1126 int r;
1128 memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
1129 assigned_irq_data.assigned_dev_id =
1130 calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
1131 (uint8_t)assigned_dev->h_devfn);
1133 if (assigned_dev->irq_requested_type) {
1134 assigned_irq_data.flags = assigned_dev->irq_requested_type;
1135 free_dev_irq_entries(assigned_dev);
1136 r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
1137 /* -ENXIO means no assigned irq */
1138 if (r && r != -ENXIO)
1139 perror("assigned_dev_update_msix: deassign irq");
1141 assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX | KVM_DEV_IRQ_GUEST_MSIX;
1143 if (*ctrl_word & PCI_MSIX_ENABLE) {
1144 if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
1145 perror("assigned_dev_update_msix_mmio");
1146 return;
1148 if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
1149 perror("assigned_dev_enable_msix: assign irq");
1150 return;
1152 assigned_dev->irq_requested_type = assigned_irq_data.flags;
1155 #endif
1156 #endif
1158 static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
1159 uint32_t val, int len)
1161 AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
1162 unsigned int pos = pci_dev->cap.start, ctrl_pos;
1164 pci_default_cap_write_config(pci_dev, address, val, len);
1165 #ifdef KVM_CAP_IRQ_ROUTING
1166 #ifdef KVM_CAP_DEVICE_MSI
1167 if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
1168 ctrl_pos = pos + PCI_MSI_FLAGS;
1169 if (address <= ctrl_pos && address + len > ctrl_pos)
1170 assigned_dev_update_msi(pci_dev, ctrl_pos);
1171 pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
1173 #endif
1174 #ifdef KVM_CAP_DEVICE_MSIX
1175 if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
1176 ctrl_pos = pos + 3;
1177 if (address <= ctrl_pos && address + len > ctrl_pos) {
1178 ctrl_pos--; /* control is word long */
1179 assigned_dev_update_msix(pci_dev, ctrl_pos);
1181 pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
1183 #endif
1184 #endif
1185 return;
1188 static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
1190 AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
1191 PCIRegion *pci_region = dev->real_device.regions;
1192 int next_cap_pt = 0;
1194 pci_dev->cap.length = 0;
1195 #ifdef KVM_CAP_IRQ_ROUTING
1196 #ifdef KVM_CAP_DEVICE_MSI
1197 /* Expose MSI capability
1198 * MSI capability is the 1st capability in capability config */
1199 if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSI)) {
1200 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
1201 memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length],
1202 0, PCI_CAPABILITY_CONFIG_MSI_LENGTH);
1203 pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] =
1204 PCI_CAP_ID_MSI;
1205 pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
1206 next_cap_pt = 1;
1208 #endif
1209 #ifdef KVM_CAP_DEVICE_MSIX
1210 /* Expose MSI-X capability */
1211 if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX)) {
1212 int pos, entry_nr, bar_nr;
1213 uint32_t msix_table_entry;
1214 dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
1215 memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length],
1216 0, PCI_CAPABILITY_CONFIG_MSIX_LENGTH);
1217 pos = pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX);
1218 entry_nr = pci_read_word(dev->pdev, pos + 2) & PCI_MSIX_TABSIZE;
1219 pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] = 0x11;
1220 pci_dev->config[pci_dev->cap.start +
1221 pci_dev->cap.length + 2] = entry_nr;
1222 msix_table_entry = pci_read_long(dev->pdev, pos + PCI_MSIX_TABLE);
1223 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
1224 pci_dev->cap.length + PCI_MSIX_TABLE) = msix_table_entry;
1225 *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
1226 pci_dev->cap.length + PCI_MSIX_PBA) =
1227 pci_read_long(dev->pdev, pos + PCI_MSIX_PBA);
1228 bar_nr = msix_table_entry & PCI_MSIX_BIR;
1229 msix_table_entry &= ~PCI_MSIX_BIR;
1230 dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
1231 if (next_cap_pt != 0) {
1232 pci_dev->config[pci_dev->cap.start + next_cap_pt] =
1233 pci_dev->cap.start + pci_dev->cap.length;
1234 next_cap_pt += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
1235 } else
1236 next_cap_pt = 1;
1237 pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
1239 #endif
1240 #endif
1242 return 0;
1245 static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
1247 AssignedDevice *adev = opaque;
1248 unsigned int offset = addr & 0xfff;
1249 void *page = adev->msix_table_page;
1250 uint32_t val = 0;
1252 memcpy(&val, (void *)((char *)page + offset), 4);
1254 return val;
1257 static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
1259 return ((msix_mmio_readl(opaque, addr & ~3)) >>
1260 (8 * (addr & 3))) & 0xff;
1263 static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
1265 return ((msix_mmio_readl(opaque, addr & ~3)) >>
1266 (8 * (addr & 3))) & 0xffff;
1269 static void msix_mmio_writel(void *opaque,
1270 target_phys_addr_t addr, uint32_t val)
1272 AssignedDevice *adev = opaque;
1273 unsigned int offset = addr & 0xfff;
1274 void *page = adev->msix_table_page;
1276 DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
1277 addr, val);
1278 memcpy((void *)((char *)page + offset), &val, 4);
1281 static void msix_mmio_writew(void *opaque,
1282 target_phys_addr_t addr, uint32_t val)
1284 msix_mmio_writel(opaque, addr & ~3,
1285 (val & 0xffff) << (8*(addr & 3)));
1288 static void msix_mmio_writeb(void *opaque,
1289 target_phys_addr_t addr, uint32_t val)
1291 msix_mmio_writel(opaque, addr & ~3,
1292 (val & 0xff) << (8*(addr & 3)));
1295 static CPUWriteMemoryFunc *msix_mmio_write[] = {
1296 msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel
1299 static CPUReadMemoryFunc *msix_mmio_read[] = {
1300 msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
1303 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
1305 dev->msix_table_page = mmap(NULL, 0x1000,
1306 PROT_READ|PROT_WRITE,
1307 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
1308 if (dev->msix_table_page == MAP_FAILED) {
1309 fprintf(stderr, "fail allocate msix_table_page! %s\n",
1310 strerror(errno));
1311 return -EFAULT;
1313 memset(dev->msix_table_page, 0, 0x1000);
1314 dev->mmio_index = cpu_register_io_memory(
1315 msix_mmio_read, msix_mmio_write, dev);
1316 return 0;
1319 static int assigned_initfn(struct PCIDevice *pci_dev)
1321 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1322 struct pci_access *pacc;
1323 uint8_t e_device, e_intx;
1324 int r;
1326 if (!dev->host.seg && !dev->host.bus && !dev->host.dev && !dev->host.func) {
1327 error_report("pci-assign: error: no host device specified");
1328 goto out;
1331 if (get_real_device(dev, dev->host.seg, dev->host.bus,
1332 dev->host.dev, dev->host.func)) {
1333 error_report("pci-assign: Error: Couldn't get real device (%s)!",
1334 dev->dev.qdev.id);
1335 goto out;
1338 /* handle real device's MMIO/PIO BARs */
1339 if (assigned_dev_register_regions(dev->real_device.regions,
1340 dev->real_device.region_number,
1341 dev))
1342 goto out;
1344 /* handle interrupt routing */
1345 e_device = (dev->dev.devfn >> 3) & 0x1f;
1346 e_intx = dev->dev.config[0x3d] - 1;
1347 dev->intpin = e_intx;
1348 dev->run = 0;
1349 dev->girq = -1;
1350 dev->h_segnr = dev->host.seg;
1351 dev->h_busnr = dev->host.bus;
1352 dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func);
1354 pacc = pci_alloc();
1355 pci_init(pacc);
1356 dev->pdev = pci_get_dev(pacc, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
1358 if (pci_enable_capability_support(pci_dev, 0, NULL,
1359 assigned_device_pci_cap_write_config,
1360 assigned_device_pci_cap_init) < 0)
1361 goto assigned_out;
1363 /* assign device to guest */
1364 r = assign_device(dev);
1365 if (r < 0)
1366 goto assigned_out;
1368 /* assign irq for the device */
1369 r = assign_irq(dev);
1370 if (r < 0)
1371 goto assigned_out;
1373 /* intercept MSI-X entry page in the MMIO */
1374 if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
1375 if (assigned_dev_register_msix_mmio(dev))
1376 goto assigned_out;
1378 assigned_dev_load_option_rom(dev);
1379 QLIST_INSERT_HEAD(&devs, dev, next);
1380 return 0;
1382 assigned_out:
1383 deassign_device(dev);
1384 out:
1385 free_assigned_device(dev);
1386 return -1;
1389 static int assigned_exitfn(struct PCIDevice *pci_dev)
1391 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
1393 QLIST_REMOVE(dev, next);
1394 deassign_device(dev);
1395 free_assigned_device(dev);
1396 return 0;
1399 static int parse_hostaddr(DeviceState *dev, Property *prop, const char *str)
1401 PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
1402 int rc;
1404 rc = pci_parse_host_devaddr(str, &ptr->seg, &ptr->bus, &ptr->dev, &ptr->func);
1405 if (rc != 0)
1406 return -1;
1407 return 0;
1410 static int print_hostaddr(DeviceState *dev, Property *prop, char *dest, size_t len)
1412 PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
1414 return snprintf(dest, len, "%02x:%02x.%x", ptr->bus, ptr->dev, ptr->func);
1417 PropertyInfo qdev_prop_hostaddr = {
1418 .name = "pci-hostaddr",
1419 .type = -1,
1420 .size = sizeof(PCIHostDevice),
1421 .parse = parse_hostaddr,
1422 .print = print_hostaddr,
1425 static PCIDeviceInfo assign_info = {
1426 .qdev.name = "pci-assign",
1427 .qdev.desc = "pass through host pci devices to the guest",
1428 .qdev.size = sizeof(AssignedDevice),
1429 .init = assigned_initfn,
1430 .exit = assigned_exitfn,
1431 .config_read = assigned_dev_pci_read_config,
1432 .config_write = assigned_dev_pci_write_config,
1433 .qdev.props = (Property[]) {
1434 DEFINE_PROP("host", AssignedDevice, host, qdev_prop_hostaddr, PCIHostDevice),
1435 DEFINE_PROP_UINT32("iommu", AssignedDevice, use_iommu, 1),
1436 DEFINE_PROP_END_OF_LIST(),
1440 static void assign_register_devices(void)
1442 pci_qdev_register(&assign_info);
1445 device_init(assign_register_devices)
1449 * Syntax to assign device:
1451 * -pcidevice host=bus:dev.func[,dma=none][,name=Foo]
1453 * Example:
1454 * -pcidevice host=00:13.0,dma=pvdma
1456 * dma can currently only be 'none' to disable iommu support.
1458 QemuOpts *add_assigned_device(const char *arg)
1460 QemuOpts *opts = NULL;
1461 char host[64], id[64], dma[8];
1462 int r;
1464 r = get_param_value(host, sizeof(host), "host", arg);
1465 if (!r)
1466 goto bad;
1467 r = get_param_value(id, sizeof(id), "id", arg);
1468 if (!r)
1469 r = get_param_value(id, sizeof(id), "name", arg);
1470 if (!r)
1471 r = get_param_value(id, sizeof(id), "host", arg);
1473 opts = qemu_opts_create(&qemu_device_opts, id, 0);
1474 if (!opts)
1475 goto bad;
1476 qemu_opt_set(opts, "driver", "pci-assign");
1477 qemu_opt_set(opts, "host", host);
1479 #ifdef KVM_CAP_IOMMU
1480 r = get_param_value(dma, sizeof(dma), "dma", arg);
1481 if (r && !strncmp(dma, "none", 4))
1482 qemu_opt_set(opts, "iommu", "0");
1483 #endif
1484 qemu_opts_print(opts, NULL);
1485 return opts;
1487 bad:
1488 fprintf(stderr, "pcidevice argument parse error; "
1489 "please check the help text for usage\n");
1490 if (opts)
1491 qemu_opts_del(opts);
1492 return NULL;
1495 void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices)
1497 QemuOpts *opts;
1498 int i;
1500 for (i = 0; i < n_devices; i++) {
1501 opts = add_assigned_device(devices[i]);
1502 if (opts == NULL) {
1503 fprintf(stderr, "Could not add assigned device %s\n", devices[i]);
1504 exit(1);
1506 /* generic code will call qdev_device_add() for the device */
1511 * Scan the assigned devices for the devices that have an option ROM, and then
1512 * load the corresponding ROM data to RAM. If an error occurs while loading an
1513 * option ROM, we just ignore that option ROM and continue with the next one.
1515 static void assigned_dev_load_option_rom(AssignedDevice *dev)
1517 int size, len, ret;
1518 void *buf;
1519 FILE *fp;
1520 uint8_t i = 1;
1521 char rom_file[64];
1523 snprintf(rom_file, sizeof(rom_file),
1524 "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
1525 dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
1527 if (access(rom_file, F_OK))
1528 return;
1530 /* Write something to the ROM file to enable it */
1531 fp = fopen(rom_file, "wb");
1532 if (fp == NULL)
1533 return;
1534 len = fwrite(&i, 1, 1, fp);
1535 fclose(fp);
1536 if (len != 1)
1537 return;
1539 /* The file has to be closed and reopened, otherwise it won't work */
1540 fp = fopen(rom_file, "rb");
1541 if (fp == NULL)
1542 return;
1544 fseek(fp, 0, SEEK_END);
1545 size = ftell(fp);
1546 fseek(fp, 0, SEEK_SET);
1548 buf = malloc(size);
1549 if (buf == NULL) {
1550 fclose(fp);
1551 return;
1554 ret = fread(buf, size, 1, fp);
1555 if (!feof(fp) || ferror(fp) || ret != 1) {
1556 free(buf);
1557 fclose(fp);
1558 return;
1560 fclose(fp);
1562 /* Copy ROM contents into the space backing the ROM BAR */
1563 if (dev->v_addrs[PCI_ROM_SLOT].r_size >= size &&
1564 dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase) {
1565 mprotect(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1566 size, PROT_READ | PROT_WRITE);
1567 memcpy(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1568 buf, size);
1569 mprotect(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
1570 size, PROT_READ);
1573 free(buf);