2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/linker_set.h>
45 #include <machine/vmm.h>
57 #define CONF1_ADDR_PORT 0x0cf8
58 #define CONF1_DATA_PORT 0x0cfc
60 #define CONF1_ENABLE 0x80000000ul
62 #define MAXBUSES (PCI_BUSMAX + 1)
63 #define MAXSLOTS (PCI_SLOTMAX + 1)
64 #define MAXFUNCS (PCI_FUNCMAX + 1)
69 struct pci_devinst
*fi_devi
;
79 struct intxinfo si_intpins
[4];
80 struct funcinfo si_funcs
[MAXFUNCS
];
84 uint16_t iobase
, iolimit
; /* I/O window */
85 uint32_t membase32
, memlimit32
; /* mmio window below 4GB */
86 uint64_t membase64
, memlimit64
; /* mmio window above 4GB */
87 struct slotinfo slotinfo
[MAXSLOTS
];
90 static struct businfo
*pci_businfo
[MAXBUSES
];
92 SET_DECLARE(pci_devemu_set
, struct pci_devemu
);
94 static uint64_t pci_emul_iobase
;
95 static uint64_t pci_emul_membase32
;
96 static uint64_t pci_emul_membase64
;
98 #define PCI_EMUL_IOBASE 0x2000
99 #define PCI_EMUL_IOLIMIT 0x10000
101 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */
102 #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
103 SYSRES_MEM(PCI_EMUL_ECFG_BASE
, PCI_EMUL_ECFG_SIZE
);
105 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE
107 #define PCI_EMUL_MEMBASE64 0xD000000000UL
108 #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL
110 static struct pci_devemu
*pci_emul_finddev(char *name
);
111 static void pci_lintr_route(struct pci_devinst
*pi
);
112 static void pci_lintr_update(struct pci_devinst
*pi
);
113 static void pci_cfgrw(struct vmctx
*ctx
, int vcpu
, int in
, int bus
, int slot
,
114 int func
, int coff
, int bytes
, uint32_t *val
);
117 CFGWRITE(struct pci_devinst
*pi
, int coff
, uint32_t val
, int bytes
)
121 pci_set_cfgdata8(pi
, coff
, val
);
123 pci_set_cfgdata16(pi
, coff
, val
);
125 pci_set_cfgdata32(pi
, coff
, val
);
128 static __inline
uint32_t
129 CFGREAD(struct pci_devinst
*pi
, int coff
, int bytes
)
133 return (pci_get_cfgdata8(pi
, coff
));
135 return (pci_get_cfgdata16(pi
, coff
));
137 return (pci_get_cfgdata32(pi
, coff
));
145 * Slot options are in the form:
147 * <bus>:<slot>:<func>,<emul>[,<config>]
148 * <slot>[:<func>],<emul>[,<config>]
152 * emul is a string describing the type of PCI device e.g. virtio-net
153 * config is an optional string, depending on the device, that can be
154 * used for configuration.
160 pci_parse_slot_usage(char *aopt
)
163 fprintf(stderr
, "Invalid PCI slot info field \"%s\"\n", aopt
);
167 pci_parse_slot(char *opt
)
171 char *emul
, *config
, *str
, *cp
;
172 int error
, bnum
, snum
, fnum
;
177 emul
= config
= NULL
;
178 if ((cp
= strchr(str
, ',')) != NULL
) {
181 if ((cp
= strchr(emul
, ',')) != NULL
) {
186 pci_parse_slot_usage(opt
);
190 /* <bus>:<slot>:<func> */
191 if (sscanf(str
, "%d:%d:%d", &bnum
, &snum
, &fnum
) != 3) {
194 if (sscanf(str
, "%d:%d", &snum
, &fnum
) != 2) {
197 if (sscanf(str
, "%d", &snum
) != 1) {
203 if (bnum
< 0 || bnum
>= MAXBUSES
|| snum
< 0 || snum
>= MAXSLOTS
||
204 fnum
< 0 || fnum
>= MAXFUNCS
) {
205 pci_parse_slot_usage(opt
);
209 if (pci_businfo
[bnum
] == NULL
)
210 pci_businfo
[bnum
] = calloc(1, sizeof(struct businfo
));
212 bi
= pci_businfo
[bnum
];
213 si
= &bi
->slotinfo
[snum
];
215 if (si
->si_funcs
[fnum
].fi_name
!= NULL
) {
216 fprintf(stderr
, "pci slot %d:%d already occupied!\n",
221 if (pci_emul_finddev(emul
) == NULL
) {
222 fprintf(stderr
, "pci slot %d:%d: unknown device \"%s\"\n",
228 si
->si_funcs
[fnum
].fi_name
= emul
;
229 si
->si_funcs
[fnum
].fi_param
= config
;
239 pci_valid_pba_offset(struct pci_devinst
*pi
, uint64_t offset
)
242 if (offset
< pi
->pi_msix
.pba_offset
)
245 if (offset
>= pi
->pi_msix
.pba_offset
+ pi
->pi_msix
.pba_size
) {
253 pci_emul_msix_twrite(struct pci_devinst
*pi
, uint64_t offset
, int size
,
256 int msix_entry_offset
;
260 /* support only 4 or 8 byte writes */
261 if (size
!= 4 && size
!= 8)
265 * Return if table index is beyond what device supports
267 tab_index
= offset
/ MSIX_TABLE_ENTRY_SIZE
;
268 if (tab_index
>= pi
->pi_msix
.table_count
)
271 msix_entry_offset
= offset
% MSIX_TABLE_ENTRY_SIZE
;
273 /* support only aligned writes */
274 if ((msix_entry_offset
% size
) != 0)
277 dest
= (char *)(pi
->pi_msix
.table
+ tab_index
);
278 dest
+= msix_entry_offset
;
281 *((uint32_t *)dest
) = value
;
283 *((uint64_t *)dest
) = value
;
289 pci_emul_msix_tread(struct pci_devinst
*pi
, uint64_t offset
, int size
)
292 int msix_entry_offset
;
294 uint64_t retval
= ~0;
297 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X
298 * table but we also allow 1 byte access to accommodate reads from
301 if (size
!= 1 && size
!= 4 && size
!= 8)
304 msix_entry_offset
= offset
% MSIX_TABLE_ENTRY_SIZE
;
306 /* support only aligned reads */
307 if ((msix_entry_offset
% size
) != 0) {
311 tab_index
= offset
/ MSIX_TABLE_ENTRY_SIZE
;
313 if (tab_index
< pi
->pi_msix
.table_count
) {
314 /* valid MSI-X Table access */
315 dest
= (char *)(pi
->pi_msix
.table
+ tab_index
);
316 dest
+= msix_entry_offset
;
319 retval
= *((uint8_t *)dest
);
321 retval
= *((uint32_t *)dest
);
323 retval
= *((uint64_t *)dest
);
324 } else if (pci_valid_pba_offset(pi
, offset
)) {
325 /* return 0 for PBA access */
333 pci_msix_table_bar(struct pci_devinst
*pi
)
336 if (pi
->pi_msix
.table
!= NULL
)
337 return (pi
->pi_msix
.table_bar
);
343 pci_msix_pba_bar(struct pci_devinst
*pi
)
346 if (pi
->pi_msix
.table
!= NULL
)
347 return (pi
->pi_msix
.pba_bar
);
353 pci_emul_io_handler(struct vmctx
*ctx
, int vcpu
, int in
, int port
, int bytes
,
354 uint32_t *eax
, void *arg
)
356 struct pci_devinst
*pdi
= arg
;
357 struct pci_devemu
*pe
= pdi
->pi_d
;
361 for (i
= 0; i
<= PCI_BARMAX
; i
++) {
362 if (pdi
->pi_bar
[i
].type
== PCIBAR_IO
&&
363 port
>= pdi
->pi_bar
[i
].addr
&&
364 port
+ bytes
<= pdi
->pi_bar
[i
].addr
+ pdi
->pi_bar
[i
].size
) {
365 offset
= port
- pdi
->pi_bar
[i
].addr
;
367 *eax
= (*pe
->pe_barread
)(ctx
, vcpu
, pdi
, i
,
370 (*pe
->pe_barwrite
)(ctx
, vcpu
, pdi
, i
, offset
,
379 pci_emul_mem_handler(struct vmctx
*ctx
, int vcpu
, int dir
, uint64_t addr
,
380 int size
, uint64_t *val
, void *arg1
, long arg2
)
382 struct pci_devinst
*pdi
= arg1
;
383 struct pci_devemu
*pe
= pdi
->pi_d
;
385 int bidx
= (int) arg2
;
387 assert(bidx
<= PCI_BARMAX
);
388 assert(pdi
->pi_bar
[bidx
].type
== PCIBAR_MEM32
||
389 pdi
->pi_bar
[bidx
].type
== PCIBAR_MEM64
);
390 assert(addr
>= pdi
->pi_bar
[bidx
].addr
&&
391 addr
+ size
<= pdi
->pi_bar
[bidx
].addr
+ pdi
->pi_bar
[bidx
].size
);
393 offset
= addr
- pdi
->pi_bar
[bidx
].addr
;
395 if (dir
== MEM_F_WRITE
) {
397 (*pe
->pe_barwrite
)(ctx
, vcpu
, pdi
, bidx
, offset
,
398 4, *val
& 0xffffffff);
399 (*pe
->pe_barwrite
)(ctx
, vcpu
, pdi
, bidx
, offset
+ 4,
402 (*pe
->pe_barwrite
)(ctx
, vcpu
, pdi
, bidx
, offset
,
407 *val
= (*pe
->pe_barread
)(ctx
, vcpu
, pdi
, bidx
,
409 *val
|= (*pe
->pe_barread
)(ctx
, vcpu
, pdi
, bidx
,
410 offset
+ 4, 4) << 32;
412 *val
= (*pe
->pe_barread
)(ctx
, vcpu
, pdi
, bidx
,
422 pci_emul_alloc_resource(uint64_t *baseptr
, uint64_t limit
, uint64_t size
,
427 assert((size
& (size
- 1)) == 0); /* must be a power of 2 */
429 base
= roundup2(*baseptr
, size
);
431 if (base
+ size
<= limit
) {
433 *baseptr
= base
+ size
;
440 pci_emul_alloc_bar(struct pci_devinst
*pdi
, int idx
, enum pcibar_type type
,
444 return (pci_emul_alloc_pbar(pdi
, idx
, 0, type
, size
));
448 * Register (or unregister) the MMIO or I/O region associated with the BAR
449 * register 'idx' of an emulated pci device.
452 modify_bar_registration(struct pci_devinst
*pi
, int idx
, int registration
)
455 struct inout_port iop
;
458 switch (pi
->pi_bar
[idx
].type
) {
460 bzero(&iop
, sizeof(struct inout_port
));
461 iop
.name
= pi
->pi_name
;
462 iop
.port
= pi
->pi_bar
[idx
].addr
;
463 iop
.size
= pi
->pi_bar
[idx
].size
;
465 iop
.flags
= IOPORT_F_INOUT
;
466 iop
.handler
= pci_emul_io_handler
;
468 error
= register_inout(&iop
);
470 error
= unregister_inout(&iop
);
474 bzero(&mr
, sizeof(struct mem_range
));
475 mr
.name
= pi
->pi_name
;
476 mr
.base
= pi
->pi_bar
[idx
].addr
;
477 mr
.size
= pi
->pi_bar
[idx
].size
;
480 mr
.handler
= pci_emul_mem_handler
;
483 error
= register_mem(&mr
);
485 error
= unregister_mem(&mr
);
495 unregister_bar(struct pci_devinst
*pi
, int idx
)
498 modify_bar_registration(pi
, idx
, 0);
502 register_bar(struct pci_devinst
*pi
, int idx
)
505 modify_bar_registration(pi
, idx
, 1);
508 /* Are we decoding i/o port accesses for the emulated pci device? */
510 porten(struct pci_devinst
*pi
)
514 cmd
= pci_get_cfgdata16(pi
, PCIR_COMMAND
);
516 return (cmd
& PCIM_CMD_PORTEN
);
519 /* Are we decoding memory accesses for the emulated pci device? */
521 memen(struct pci_devinst
*pi
)
525 cmd
= pci_get_cfgdata16(pi
, PCIR_COMMAND
);
527 return (cmd
& PCIM_CMD_MEMEN
);
531 * Update the MMIO or I/O address that is decoded by the BAR register.
533 * If the pci device has enabled the address space decoding then intercept
534 * the address range decoded by the BAR register.
537 update_bar_address(struct pci_devinst
*pi
, uint64_t addr
, int idx
, int type
)
541 if (pi
->pi_bar
[idx
].type
== PCIBAR_IO
)
547 unregister_bar(pi
, idx
);
552 pi
->pi_bar
[idx
].addr
= addr
;
555 pi
->pi_bar
[idx
].addr
&= ~0xffffffffUL
;
556 pi
->pi_bar
[idx
].addr
|= addr
;
559 pi
->pi_bar
[idx
].addr
&= 0xffffffff;
560 pi
->pi_bar
[idx
].addr
|= addr
;
567 register_bar(pi
, idx
);
571 pci_emul_alloc_pbar(struct pci_devinst
*pdi
, int idx
, uint64_t hostbase
,
572 enum pcibar_type type
, uint64_t size
)
575 uint64_t *baseptr
, limit
, addr
, mask
, lobits
, bar
;
577 assert(idx
>= 0 && idx
<= PCI_BARMAX
);
579 if ((size
& (size
- 1)) != 0)
580 size
= 1UL << flsl(size
); /* round up to a power of 2 */
582 /* Enforce minimum BAR sizes required by the PCI standard */
583 if (type
== PCIBAR_IO
) {
594 addr
= mask
= lobits
= 0;
597 baseptr
= &pci_emul_iobase
;
598 limit
= PCI_EMUL_IOLIMIT
;
599 mask
= PCIM_BAR_IO_BASE
;
600 lobits
= PCIM_BAR_IO_SPACE
;
605 * Some drivers do not work well if the 64-bit BAR is allocated
606 * above 4GB. Allow for this by allocating small requests under
607 * 4GB unless then allocation size is larger than some arbitrary
608 * number (32MB currently).
610 if (size
> 32 * 1024 * 1024) {
612 * XXX special case for device requiring peer-peer DMA
614 if (size
== 0x100000000UL
)
617 baseptr
= &pci_emul_membase64
;
618 limit
= PCI_EMUL_MEMLIMIT64
;
619 mask
= PCIM_BAR_MEM_BASE
;
620 lobits
= PCIM_BAR_MEM_SPACE
| PCIM_BAR_MEM_64
|
621 PCIM_BAR_MEM_PREFETCH
;
624 baseptr
= &pci_emul_membase32
;
625 limit
= PCI_EMUL_MEMLIMIT32
;
626 mask
= PCIM_BAR_MEM_BASE
;
627 lobits
= PCIM_BAR_MEM_SPACE
| PCIM_BAR_MEM_64
;
631 baseptr
= &pci_emul_membase32
;
632 limit
= PCI_EMUL_MEMLIMIT32
;
633 mask
= PCIM_BAR_MEM_BASE
;
634 lobits
= PCIM_BAR_MEM_SPACE
| PCIM_BAR_MEM_32
;
637 printf("pci_emul_alloc_base: invalid bar type %d\n", type
);
641 if (baseptr
!= NULL
) {
642 error
= pci_emul_alloc_resource(baseptr
, limit
, size
, &addr
);
647 pdi
->pi_bar
[idx
].type
= type
;
648 pdi
->pi_bar
[idx
].addr
= addr
;
649 pdi
->pi_bar
[idx
].size
= size
;
651 /* Initialize the BAR register in config space */
652 bar
= (addr
& mask
) | lobits
;
653 pci_set_cfgdata32(pdi
, PCIR_BAR(idx
), bar
);
655 if (type
== PCIBAR_MEM64
) {
656 assert(idx
+ 1 <= PCI_BARMAX
);
657 pdi
->pi_bar
[idx
+ 1].type
= PCIBAR_MEMHI64
;
658 pci_set_cfgdata32(pdi
, PCIR_BAR(idx
+ 1), bar
>> 32);
661 register_bar(pdi
, idx
);
666 #define CAP_START_OFFSET 0x40
668 pci_emul_add_capability(struct pci_devinst
*pi
, u_char
*capdata
, int caplen
)
670 int i
, capoff
, reallen
;
675 reallen
= roundup2(caplen
, 4); /* dword aligned */
677 sts
= pci_get_cfgdata16(pi
, PCIR_STATUS
);
678 if ((sts
& PCIM_STATUS_CAPPRESENT
) == 0)
679 capoff
= CAP_START_OFFSET
;
681 capoff
= pi
->pi_capend
+ 1;
683 /* Check if we have enough space */
684 if (capoff
+ reallen
> PCI_REGMAX
+ 1)
687 /* Set the previous capability pointer */
688 if ((sts
& PCIM_STATUS_CAPPRESENT
) == 0) {
689 pci_set_cfgdata8(pi
, PCIR_CAP_PTR
, capoff
);
690 pci_set_cfgdata16(pi
, PCIR_STATUS
, sts
|PCIM_STATUS_CAPPRESENT
);
692 pci_set_cfgdata8(pi
, pi
->pi_prevcap
+ 1, capoff
);
694 /* Copy the capability */
695 for (i
= 0; i
< caplen
; i
++)
696 pci_set_cfgdata8(pi
, capoff
+ i
, capdata
[i
]);
698 /* Set the next capability pointer */
699 pci_set_cfgdata8(pi
, capoff
+ 1, 0);
701 pi
->pi_prevcap
= capoff
;
702 pi
->pi_capend
= capoff
+ reallen
- 1;
706 static struct pci_devemu
*
707 pci_emul_finddev(char *name
)
709 struct pci_devemu
**pdpp
, *pdp
;
711 SET_FOREACH(pdpp
, pci_devemu_set
) {
713 if (!strcmp(pdp
->pe_emu
, name
)) {
722 pci_emul_init(struct vmctx
*ctx
, struct pci_devemu
*pde
, int bus
, int slot
,
723 int func
, struct funcinfo
*fi
)
725 struct pci_devinst
*pdi
;
728 pdi
= calloc(1, sizeof(struct pci_devinst
));
734 pthread_mutex_init(&pdi
->pi_lintr
.lock
, NULL
);
735 pdi
->pi_lintr
.pin
= 0;
736 pdi
->pi_lintr
.state
= IDLE
;
737 pdi
->pi_lintr
.pirq_pin
= 0;
738 pdi
->pi_lintr
.ioapic_irq
= 0;
740 snprintf(pdi
->pi_name
, PI_NAMESZ
, "%s-pci-%d", pde
->pe_emu
, slot
);
742 /* Disable legacy interrupts */
743 pci_set_cfgdata8(pdi
, PCIR_INTLINE
, 255);
744 pci_set_cfgdata8(pdi
, PCIR_INTPIN
, 0);
746 pci_set_cfgdata8(pdi
, PCIR_COMMAND
,
747 PCIM_CMD_PORTEN
| PCIM_CMD_MEMEN
| PCIM_CMD_BUSMASTEREN
);
749 err
= (*pde
->pe_init
)(ctx
, pdi
, fi
->fi_param
);
759 pci_populate_msicap(struct msicap
*msicap
, int msgnum
, int nextptr
)
763 /* Number of msi messages must be a power of 2 between 1 and 32 */
764 assert((msgnum
& (msgnum
- 1)) == 0 && msgnum
>= 1 && msgnum
<= 32);
765 mmc
= ffs(msgnum
) - 1;
767 bzero(msicap
, sizeof(struct msicap
));
768 msicap
->capid
= PCIY_MSI
;
769 msicap
->nextptr
= nextptr
;
770 msicap
->msgctrl
= PCIM_MSICTRL_64BIT
| (mmc
<< 1);
774 pci_emul_add_msicap(struct pci_devinst
*pi
, int msgnum
)
776 struct msicap msicap
;
778 pci_populate_msicap(&msicap
, msgnum
, 0);
780 return (pci_emul_add_capability(pi
, (u_char
*)&msicap
, sizeof(msicap
)));
784 pci_populate_msixcap(struct msixcap
*msixcap
, int msgnum
, int barnum
,
785 uint32_t msix_tab_size
)
788 assert(msix_tab_size
% 4096 == 0);
790 bzero(msixcap
, sizeof(struct msixcap
));
791 msixcap
->capid
= PCIY_MSIX
;
794 * Message Control Register, all fields set to
795 * zero except for the Table Size.
796 * Note: Table size N is encoded as N-1
798 msixcap
->msgctrl
= msgnum
- 1;
802 * - MSI-X table start at offset 0
803 * - PBA table starts at a 4K aligned offset after the MSI-X table
805 msixcap
->table_info
= barnum
& PCIM_MSIX_BIR_MASK
;
806 msixcap
->pba_info
= msix_tab_size
| (barnum
& PCIM_MSIX_BIR_MASK
);
810 pci_msix_table_init(struct pci_devinst
*pi
, int table_entries
)
814 assert(table_entries
> 0);
815 assert(table_entries
<= MAX_MSIX_TABLE_ENTRIES
);
817 table_size
= table_entries
* MSIX_TABLE_ENTRY_SIZE
;
818 pi
->pi_msix
.table
= calloc(1, table_size
);
820 /* set mask bit of vector control register */
821 for (i
= 0; i
< table_entries
; i
++)
822 pi
->pi_msix
.table
[i
].vector_control
|= PCIM_MSIX_VCTRL_MASK
;
826 pci_emul_add_msixcap(struct pci_devinst
*pi
, int msgnum
, int barnum
)
829 struct msixcap msixcap
;
831 assert(msgnum
>= 1 && msgnum
<= MAX_MSIX_TABLE_ENTRIES
);
832 assert(barnum
>= 0 && barnum
<= PCIR_MAX_BAR_0
);
834 tab_size
= msgnum
* MSIX_TABLE_ENTRY_SIZE
;
836 /* Align table size to nearest 4K */
837 tab_size
= roundup2(tab_size
, 4096);
839 pi
->pi_msix
.table_bar
= barnum
;
840 pi
->pi_msix
.pba_bar
= barnum
;
841 pi
->pi_msix
.table_offset
= 0;
842 pi
->pi_msix
.table_count
= msgnum
;
843 pi
->pi_msix
.pba_offset
= tab_size
;
844 pi
->pi_msix
.pba_size
= PBA_SIZE(msgnum
);
846 pci_msix_table_init(pi
, msgnum
);
848 pci_populate_msixcap(&msixcap
, msgnum
, barnum
, tab_size
);
850 /* allocate memory for MSI-X Table and PBA */
851 pci_emul_alloc_bar(pi
, barnum
, PCIBAR_MEM32
,
852 tab_size
+ pi
->pi_msix
.pba_size
);
854 return (pci_emul_add_capability(pi
, (u_char
*)&msixcap
,
859 msixcap_cfgwrite(struct pci_devinst
*pi
, int capoff
, int offset
,
860 int bytes
, uint32_t val
)
862 uint16_t msgctrl
, rwmask
;
865 off
= offset
- capoff
;
866 /* Message Control Register */
867 if (off
== 2 && bytes
== 2) {
868 rwmask
= PCIM_MSIXCTRL_MSIX_ENABLE
| PCIM_MSIXCTRL_FUNCTION_MASK
;
869 msgctrl
= pci_get_cfgdata16(pi
, offset
);
871 msgctrl
|= val
& rwmask
;
874 pi
->pi_msix
.enabled
= val
& PCIM_MSIXCTRL_MSIX_ENABLE
;
875 pi
->pi_msix
.function_mask
= val
& PCIM_MSIXCTRL_FUNCTION_MASK
;
876 pci_lintr_update(pi
);
879 CFGWRITE(pi
, offset
, val
, bytes
);
883 msicap_cfgwrite(struct pci_devinst
*pi
, int capoff
, int offset
,
884 int bytes
, uint32_t val
)
886 uint16_t msgctrl
, rwmask
, msgdata
, mme
;
890 * If guest is writing to the message control register make sure
891 * we do not overwrite read-only fields.
893 if ((offset
- capoff
) == 2 && bytes
== 2) {
894 rwmask
= PCIM_MSICTRL_MME_MASK
| PCIM_MSICTRL_MSI_ENABLE
;
895 msgctrl
= pci_get_cfgdata16(pi
, offset
);
897 msgctrl
|= val
& rwmask
;
900 addrlo
= pci_get_cfgdata32(pi
, capoff
+ 4);
901 if (msgctrl
& PCIM_MSICTRL_64BIT
)
902 msgdata
= pci_get_cfgdata16(pi
, capoff
+ 12);
904 msgdata
= pci_get_cfgdata16(pi
, capoff
+ 8);
906 mme
= msgctrl
& PCIM_MSICTRL_MME_MASK
;
907 pi
->pi_msi
.enabled
= msgctrl
& PCIM_MSICTRL_MSI_ENABLE
? 1 : 0;
908 if (pi
->pi_msi
.enabled
) {
909 pi
->pi_msi
.addr
= addrlo
;
910 pi
->pi_msi
.msg_data
= msgdata
;
911 pi
->pi_msi
.maxmsgnum
= 1 << (mme
>> 4);
913 pi
->pi_msi
.maxmsgnum
= 0;
915 pci_lintr_update(pi
);
918 CFGWRITE(pi
, offset
, val
, bytes
);
922 pciecap_cfgwrite(struct pci_devinst
*pi
, int capoff
, int offset
,
923 int bytes
, uint32_t val
)
926 /* XXX don't write to the readonly parts */
927 CFGWRITE(pi
, offset
, val
, bytes
);
930 #define PCIECAP_VERSION 0x2
932 pci_emul_add_pciecap(struct pci_devinst
*pi
, int type
)
935 struct pciecap pciecap
;
937 if (type
!= PCIEM_TYPE_ROOT_PORT
)
940 bzero(&pciecap
, sizeof(pciecap
));
942 pciecap
.capid
= PCIY_EXPRESS
;
943 pciecap
.pcie_capabilities
= PCIECAP_VERSION
| PCIEM_TYPE_ROOT_PORT
;
944 pciecap
.link_capabilities
= 0x411; /* gen1, x1 */
945 pciecap
.link_status
= 0x11; /* gen1, x1 */
947 err
= pci_emul_add_capability(pi
, (u_char
*)&pciecap
, sizeof(pciecap
));
952 * This function assumes that 'coff' is in the capabilities region of the
956 pci_emul_capwrite(struct pci_devinst
*pi
, int offset
, int bytes
, uint32_t val
)
959 uint8_t capoff
, nextoff
;
961 /* Do not allow un-aligned writes */
962 if ((offset
& (bytes
- 1)) != 0)
965 /* Find the capability that we want to update */
966 capoff
= CAP_START_OFFSET
;
968 nextoff
= pci_get_cfgdata8(pi
, capoff
+ 1);
971 if (offset
>= capoff
&& offset
< nextoff
)
976 assert(offset
>= capoff
);
979 * Capability ID and Next Capability Pointer are readonly.
980 * However, some o/s's do 4-byte writes that include these.
981 * For this case, trim the write back to 2 bytes and adjust
984 if (offset
== capoff
|| offset
== capoff
+ 1) {
985 if (offset
== capoff
&& bytes
== 4) {
993 capid
= pci_get_cfgdata8(pi
, capoff
);
996 msicap_cfgwrite(pi
, capoff
, offset
, bytes
, val
);
999 msixcap_cfgwrite(pi
, capoff
, offset
, bytes
, val
);
1002 pciecap_cfgwrite(pi
, capoff
, offset
, bytes
, val
);
1010 pci_emul_iscap(struct pci_devinst
*pi
, int offset
)
1014 sts
= pci_get_cfgdata16(pi
, PCIR_STATUS
);
1015 if ((sts
& PCIM_STATUS_CAPPRESENT
) != 0) {
1016 if (offset
>= CAP_START_OFFSET
&& offset
<= pi
->pi_capend
)
1023 pci_emul_fallback_handler(struct vmctx
*ctx
, int vcpu
, int dir
, uint64_t addr
,
1024 int size
, uint64_t *val
, void *arg1
, long arg2
)
1027 * Ignore writes; return 0xff's for reads. The mem read code
1028 * will take care of truncating to the correct size.
1030 if (dir
== MEM_F_READ
) {
1031 *val
= 0xffffffffffffffff;
1038 pci_emul_ecfg_handler(struct vmctx
*ctx
, int vcpu
, int dir
, uint64_t addr
,
1039 int bytes
, uint64_t *val
, void *arg1
, long arg2
)
1041 int bus
, slot
, func
, coff
, in
;
1043 coff
= addr
& 0xfff;
1044 func
= (addr
>> 12) & 0x7;
1045 slot
= (addr
>> 15) & 0x1f;
1046 bus
= (addr
>> 20) & 0xff;
1047 in
= (dir
== MEM_F_READ
);
1050 pci_cfgrw(ctx
, vcpu
, in
, bus
, slot
, func
, coff
, bytes
, (uint32_t *)val
);
1058 return (PCI_EMUL_ECFG_BASE
);
1061 #define BUSIO_ROUNDUP 32
1062 #define BUSMEM_ROUNDUP (1024 * 1024)
1065 init_pci(struct vmctx
*ctx
)
1067 struct mem_range mr
;
1068 struct pci_devemu
*pde
;
1070 struct slotinfo
*si
;
1071 struct funcinfo
*fi
;
1073 int bus
, slot
, func
;
1076 pci_emul_iobase
= PCI_EMUL_IOBASE
;
1077 pci_emul_membase32
= vm_get_lowmem_limit(ctx
);
1078 pci_emul_membase64
= PCI_EMUL_MEMBASE64
;
1080 for (bus
= 0; bus
< MAXBUSES
; bus
++) {
1081 if ((bi
= pci_businfo
[bus
]) == NULL
)
1084 * Keep track of the i/o and memory resources allocated to
1087 bi
->iobase
= pci_emul_iobase
;
1088 bi
->membase32
= pci_emul_membase32
;
1089 bi
->membase64
= pci_emul_membase64
;
1091 for (slot
= 0; slot
< MAXSLOTS
; slot
++) {
1092 si
= &bi
->slotinfo
[slot
];
1093 for (func
= 0; func
< MAXFUNCS
; func
++) {
1094 fi
= &si
->si_funcs
[func
];
1095 if (fi
->fi_name
== NULL
)
1097 pde
= pci_emul_finddev(fi
->fi_name
);
1098 assert(pde
!= NULL
);
1099 error
= pci_emul_init(ctx
, pde
, bus
, slot
,
1107 * Add some slop to the I/O and memory resources decoded by
1108 * this bus to give a guest some flexibility if it wants to
1109 * reprogram the BARs.
1111 pci_emul_iobase
+= BUSIO_ROUNDUP
;
1112 pci_emul_iobase
= roundup2(pci_emul_iobase
, BUSIO_ROUNDUP
);
1113 bi
->iolimit
= pci_emul_iobase
;
1115 pci_emul_membase32
+= BUSMEM_ROUNDUP
;
1116 pci_emul_membase32
= roundup2(pci_emul_membase32
,
1118 bi
->memlimit32
= pci_emul_membase32
;
1120 pci_emul_membase64
+= BUSMEM_ROUNDUP
;
1121 pci_emul_membase64
= roundup2(pci_emul_membase64
,
1123 bi
->memlimit64
= pci_emul_membase64
;
1127 * PCI backends are initialized before routing INTx interrupts
1128 * so that LPC devices are able to reserve ISA IRQs before
1129 * routing PIRQ pins.
1131 for (bus
= 0; bus
< MAXBUSES
; bus
++) {
1132 if ((bi
= pci_businfo
[bus
]) == NULL
)
1135 for (slot
= 0; slot
< MAXSLOTS
; slot
++) {
1136 si
= &bi
->slotinfo
[slot
];
1137 for (func
= 0; func
< MAXFUNCS
; func
++) {
1138 fi
= &si
->si_funcs
[func
];
1139 if (fi
->fi_devi
== NULL
)
1141 pci_lintr_route(fi
->fi_devi
);
1148 * The guest physical memory map looks like the following:
1149 * [0, lowmem) guest system memory
1150 * [lowmem, lowmem_limit) memory hole (may be absent)
1151 * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation)
1152 * [0xE0000000, 0xF0000000) PCI extended config window
1153 * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware
1154 * [4GB, 4GB + highmem)
1158 * Accesses to memory addresses that are not allocated to system
1159 * memory or PCI devices return 0xff's.
1161 lowmem
= vm_get_lowmem_size(ctx
);
1162 bzero(&mr
, sizeof(struct mem_range
));
1163 mr
.name
= "PCI hole";
1164 mr
.flags
= MEM_F_RW
| MEM_F_IMMUTABLE
;
1166 mr
.size
= (4ULL * 1024 * 1024 * 1024) - lowmem
;
1167 mr
.handler
= pci_emul_fallback_handler
;
1168 error
= register_mem_fallback(&mr
);
1171 /* PCI extended config space */
1172 bzero(&mr
, sizeof(struct mem_range
));
1173 mr
.name
= "PCI ECFG";
1174 mr
.flags
= MEM_F_RW
| MEM_F_IMMUTABLE
;
1175 mr
.base
= PCI_EMUL_ECFG_BASE
;
1176 mr
.size
= PCI_EMUL_ECFG_SIZE
;
1177 mr
.handler
= pci_emul_ecfg_handler
;
1178 error
= register_mem(&mr
);
1185 pci_apic_prt_entry(int bus
, int slot
, int pin
, int pirq_pin
, int ioapic_irq
,
1189 dsdt_line(" Package ()");
1191 dsdt_line(" 0x%X,", slot
<< 16 | 0xffff);
1192 dsdt_line(" 0x%02X,", pin
- 1);
1193 dsdt_line(" Zero,");
1194 dsdt_line(" 0x%X", ioapic_irq
);
1199 pci_pirq_prt_entry(int bus
, int slot
, int pin
, int pirq_pin
, int ioapic_irq
,
1204 name
= lpc_pirq_name(pirq_pin
);
1207 dsdt_line(" Package ()");
1209 dsdt_line(" 0x%X,", slot
<< 16 | 0xffff);
1210 dsdt_line(" 0x%02X,", pin
- 1);
1211 dsdt_line(" %s,", name
);
1218 * A bhyve virtual machine has a flat PCI hierarchy with a root port
1219 * corresponding to each PCI bus.
1222 pci_bus_write_dsdt(int bus
)
1225 struct slotinfo
*si
;
1226 struct pci_devinst
*pi
;
1227 int count
, func
, slot
;
1230 * If there are no devices on this 'bus' then just return.
1232 if ((bi
= pci_businfo
[bus
]) == NULL
) {
1234 * Bus 0 is special because it decodes the I/O ports used
1235 * for PCI config space access even if there are no devices
1242 dsdt_line(" Device (PC%02X)", bus
);
1244 dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))");
1245 dsdt_line(" Name (_ADR, Zero)");
1247 dsdt_line(" Method (_BBN, 0, NotSerialized)");
1249 dsdt_line(" Return (0x%08X)", bus
);
1251 dsdt_line(" Name (_CRS, ResourceTemplate ()");
1253 dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, "
1254 "MaxFixed, PosDecode,");
1255 dsdt_line(" 0x0000, // Granularity");
1256 dsdt_line(" 0x%04X, // Range Minimum", bus
);
1257 dsdt_line(" 0x%04X, // Range Maximum", bus
);
1258 dsdt_line(" 0x0000, // Translation Offset");
1259 dsdt_line(" 0x0001, // Length");
1264 dsdt_fixed_ioport(0xCF8, 8);
1267 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
1268 "PosDecode, EntireRange,");
1269 dsdt_line(" 0x0000, // Granularity");
1270 dsdt_line(" 0x0000, // Range Minimum");
1271 dsdt_line(" 0x0CF7, // Range Maximum");
1272 dsdt_line(" 0x0000, // Translation Offset");
1273 dsdt_line(" 0x0CF8, // Length");
1274 dsdt_line(" ,, , TypeStatic)");
1276 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
1277 "PosDecode, EntireRange,");
1278 dsdt_line(" 0x0000, // Granularity");
1279 dsdt_line(" 0x0D00, // Range Minimum");
1280 dsdt_line(" 0x%04X, // Range Maximum",
1281 PCI_EMUL_IOBASE
- 1);
1282 dsdt_line(" 0x0000, // Translation Offset");
1283 dsdt_line(" 0x%04X, // Length",
1284 PCI_EMUL_IOBASE
- 0x0D00);
1285 dsdt_line(" ,, , TypeStatic)");
1295 dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, "
1296 "PosDecode, EntireRange,");
1297 dsdt_line(" 0x0000, // Granularity");
1298 dsdt_line(" 0x%04X, // Range Minimum", bi
->iobase
);
1299 dsdt_line(" 0x%04X, // Range Maximum",
1301 dsdt_line(" 0x0000, // Translation Offset");
1302 dsdt_line(" 0x%04X, // Length",
1303 bi
->iolimit
- bi
->iobase
);
1304 dsdt_line(" ,, , TypeStatic)");
1306 /* mmio window (32-bit) */
1307 dsdt_line(" DWordMemory (ResourceProducer, PosDecode, "
1308 "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
1309 dsdt_line(" 0x00000000, // Granularity");
1310 dsdt_line(" 0x%08X, // Range Minimum\n", bi
->membase32
);
1311 dsdt_line(" 0x%08X, // Range Maximum\n",
1312 bi
->memlimit32
- 1);
1313 dsdt_line(" 0x00000000, // Translation Offset");
1314 dsdt_line(" 0x%08X, // Length\n",
1315 bi
->memlimit32
- bi
->membase32
);
1316 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)");
1318 /* mmio window (64-bit) */
1319 dsdt_line(" QWordMemory (ResourceProducer, PosDecode, "
1320 "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
1321 dsdt_line(" 0x0000000000000000, // Granularity");
1322 dsdt_line(" 0x%016lX, // Range Minimum\n", bi
->membase64
);
1323 dsdt_line(" 0x%016lX, // Range Maximum\n",
1324 bi
->memlimit64
- 1);
1325 dsdt_line(" 0x0000000000000000, // Translation Offset");
1326 dsdt_line(" 0x%016lX, // Length\n",
1327 bi
->memlimit64
- bi
->membase64
);
1328 dsdt_line(" ,, , AddressRangeMemory, TypeStatic)");
1331 count
= pci_count_lintr(bus
);
1334 dsdt_line("Name (PPRT, Package ()");
1336 pci_walk_lintr(bus
, pci_pirq_prt_entry
, NULL
);
1338 dsdt_line("Name (APRT, Package ()");
1340 pci_walk_lintr(bus
, pci_apic_prt_entry
, NULL
);
1342 dsdt_line("Method (_PRT, 0, NotSerialized)");
1344 dsdt_line(" If (PICM)");
1346 dsdt_line(" Return (APRT)");
1350 dsdt_line(" Return (PPRT)");
1357 for (slot
= 0; slot
< MAXSLOTS
; slot
++) {
1358 si
= &bi
->slotinfo
[slot
];
1359 for (func
= 0; func
< MAXFUNCS
; func
++) {
1360 pi
= si
->si_funcs
[func
].fi_devi
;
1361 if (pi
!= NULL
&& pi
->pi_d
->pe_write_dsdt
!= NULL
)
1362 pi
->pi_d
->pe_write_dsdt(pi
);
1371 pci_write_dsdt(void)
1376 dsdt_line("Name (PICM, 0x00)");
1377 dsdt_line("Method (_PIC, 1, NotSerialized)");
1379 dsdt_line(" Store (Arg0, PICM)");
1382 dsdt_line("Scope (_SB)");
1384 for (bus
= 0; bus
< MAXBUSES
; bus
++)
1385 pci_bus_write_dsdt(bus
);
1391 pci_bus_configured(int bus
)
1393 assert(bus
>= 0 && bus
< MAXBUSES
);
1394 return (pci_businfo
[bus
] != NULL
);
1398 pci_msi_enabled(struct pci_devinst
*pi
)
1400 return (pi
->pi_msi
.enabled
);
1404 pci_msi_maxmsgnum(struct pci_devinst
*pi
)
1406 if (pi
->pi_msi
.enabled
)
1407 return (pi
->pi_msi
.maxmsgnum
);
1413 pci_msix_enabled(struct pci_devinst
*pi
)
1416 return (pi
->pi_msix
.enabled
&& !pi
->pi_msi
.enabled
);
1420 pci_generate_msix(struct pci_devinst
*pi
, int index
)
1422 struct msix_table_entry
*mte
;
1424 if (!pci_msix_enabled(pi
))
1427 if (pi
->pi_msix
.function_mask
)
1430 if (index
>= pi
->pi_msix
.table_count
)
1433 mte
= &pi
->pi_msix
.table
[index
];
1434 if ((mte
->vector_control
& PCIM_MSIX_VCTRL_MASK
) == 0) {
1435 /* XXX Set PBA bit if interrupt is disabled */
1436 vm_lapic_msi(pi
->pi_vmctx
, mte
->addr
, mte
->msg_data
);
1441 pci_generate_msi(struct pci_devinst
*pi
, int index
)
1444 if (pci_msi_enabled(pi
) && index
< pci_msi_maxmsgnum(pi
)) {
1445 vm_lapic_msi(pi
->pi_vmctx
, pi
->pi_msi
.addr
,
1446 pi
->pi_msi
.msg_data
+ index
);
1451 pci_lintr_permitted(struct pci_devinst
*pi
)
1455 cmd
= pci_get_cfgdata16(pi
, PCIR_COMMAND
);
1456 return (!(pi
->pi_msi
.enabled
|| pi
->pi_msix
.enabled
||
1457 (cmd
& PCIM_CMD_INTxDIS
)));
1461 pci_lintr_request(struct pci_devinst
*pi
)
1464 struct slotinfo
*si
;
1465 int bestpin
, bestcount
, pin
;
1467 bi
= pci_businfo
[pi
->pi_bus
];
1471 * Just allocate a pin from our slot. The pin will be
1472 * assigned IRQs later when interrupts are routed.
1474 si
= &bi
->slotinfo
[pi
->pi_slot
];
1476 bestcount
= si
->si_intpins
[0].ii_count
;
1477 for (pin
= 1; pin
< 4; pin
++) {
1478 if (si
->si_intpins
[pin
].ii_count
< bestcount
) {
1480 bestcount
= si
->si_intpins
[pin
].ii_count
;
1484 si
->si_intpins
[bestpin
].ii_count
++;
1485 pi
->pi_lintr
.pin
= bestpin
+ 1;
1486 pci_set_cfgdata8(pi
, PCIR_INTPIN
, bestpin
+ 1);
1490 pci_lintr_route(struct pci_devinst
*pi
)
1493 struct intxinfo
*ii
;
1495 if (pi
->pi_lintr
.pin
== 0)
1498 bi
= pci_businfo
[pi
->pi_bus
];
1500 ii
= &bi
->slotinfo
[pi
->pi_slot
].si_intpins
[pi
->pi_lintr
.pin
- 1];
1503 * Attempt to allocate an I/O APIC pin for this intpin if one
1504 * is not yet assigned.
1506 if (ii
->ii_ioapic_irq
== 0)
1507 ii
->ii_ioapic_irq
= ioapic_pci_alloc_irq(pi
);
1508 assert(ii
->ii_ioapic_irq
> 0);
1511 * Attempt to allocate a PIRQ pin for this intpin if one is
1514 if (ii
->ii_pirq_pin
== 0)
1515 ii
->ii_pirq_pin
= pirq_alloc_pin(pi
);
1516 assert(ii
->ii_pirq_pin
> 0);
1518 pi
->pi_lintr
.ioapic_irq
= ii
->ii_ioapic_irq
;
1519 pi
->pi_lintr
.pirq_pin
= ii
->ii_pirq_pin
;
1520 pci_set_cfgdata8(pi
, PCIR_INTLINE
, pirq_irq(ii
->ii_pirq_pin
));
1524 pci_lintr_assert(struct pci_devinst
*pi
)
1527 assert(pi
->pi_lintr
.pin
> 0);
1529 pthread_mutex_lock(&pi
->pi_lintr
.lock
);
1530 if (pi
->pi_lintr
.state
== IDLE
) {
1531 if (pci_lintr_permitted(pi
)) {
1532 pi
->pi_lintr
.state
= ASSERTED
;
1535 pi
->pi_lintr
.state
= PENDING
;
1537 pthread_mutex_unlock(&pi
->pi_lintr
.lock
);
1541 pci_lintr_deassert(struct pci_devinst
*pi
)
1544 assert(pi
->pi_lintr
.pin
> 0);
1546 pthread_mutex_lock(&pi
->pi_lintr
.lock
);
1547 if (pi
->pi_lintr
.state
== ASSERTED
) {
1548 pi
->pi_lintr
.state
= IDLE
;
1549 pci_irq_deassert(pi
);
1550 } else if (pi
->pi_lintr
.state
== PENDING
)
1551 pi
->pi_lintr
.state
= IDLE
;
1552 pthread_mutex_unlock(&pi
->pi_lintr
.lock
);
1556 pci_lintr_update(struct pci_devinst
*pi
)
1559 pthread_mutex_lock(&pi
->pi_lintr
.lock
);
1560 if (pi
->pi_lintr
.state
== ASSERTED
&& !pci_lintr_permitted(pi
)) {
1561 pci_irq_deassert(pi
);
1562 pi
->pi_lintr
.state
= PENDING
;
1563 } else if (pi
->pi_lintr
.state
== PENDING
&& pci_lintr_permitted(pi
)) {
1564 pi
->pi_lintr
.state
= ASSERTED
;
1567 pthread_mutex_unlock(&pi
->pi_lintr
.lock
);
1571 pci_count_lintr(int bus
)
1573 int count
, slot
, pin
;
1574 struct slotinfo
*slotinfo
;
1577 if (pci_businfo
[bus
] != NULL
) {
1578 for (slot
= 0; slot
< MAXSLOTS
; slot
++) {
1579 slotinfo
= &pci_businfo
[bus
]->slotinfo
[slot
];
1580 for (pin
= 0; pin
< 4; pin
++) {
1581 if (slotinfo
->si_intpins
[pin
].ii_count
!= 0)
1590 pci_walk_lintr(int bus
, pci_lintr_cb cb
, void *arg
)
1593 struct slotinfo
*si
;
1594 struct intxinfo
*ii
;
1597 if ((bi
= pci_businfo
[bus
]) == NULL
)
1600 for (slot
= 0; slot
< MAXSLOTS
; slot
++) {
1601 si
= &bi
->slotinfo
[slot
];
1602 for (pin
= 0; pin
< 4; pin
++) {
1603 ii
= &si
->si_intpins
[pin
];
1604 if (ii
->ii_count
!= 0)
1605 cb(bus
, slot
, pin
+ 1, ii
->ii_pirq_pin
,
1606 ii
->ii_ioapic_irq
, arg
);
1612 * Return 1 if the emulated device in 'slot' is a multi-function device.
1613 * Return 0 otherwise.
1616 pci_emul_is_mfdev(int bus
, int slot
)
1619 struct slotinfo
*si
;
1623 if ((bi
= pci_businfo
[bus
]) != NULL
) {
1624 si
= &bi
->slotinfo
[slot
];
1625 for (f
= 0; f
< MAXFUNCS
; f
++) {
1626 if (si
->si_funcs
[f
].fi_devi
!= NULL
) {
1631 return (numfuncs
> 1);
1635 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on
1636 * whether or not is a multi-function being emulated in the pci 'slot'.
1639 pci_emul_hdrtype_fixup(int bus
, int slot
, int off
, int bytes
, uint32_t *rv
)
1643 if (off
<= PCIR_HDRTYPE
&& off
+ bytes
> PCIR_HDRTYPE
) {
1644 mfdev
= pci_emul_is_mfdev(bus
, slot
);
1654 *rv
&= ~(PCIM_MFDEV
<< 16);
1656 *rv
|= (PCIM_MFDEV
<< 16);
1664 pci_emul_cmdsts_write(struct pci_devinst
*pi
, int coff
, uint32_t new, int bytes
)
1667 uint32_t cmd
, cmd2
, changed
, old
, readonly
;
1669 cmd
= pci_get_cfgdata16(pi
, PCIR_COMMAND
); /* stash old value */
1672 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3.
1674 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are
1675 * 'write 1 to clear'. However these bits are not set to '1' by
1676 * any device emulation so it is simpler to treat them as readonly.
1678 rshift
= (coff
& 0x3) * 8;
1679 readonly
= 0xFFFFF880 >> rshift
;
1681 old
= CFGREAD(pi
, coff
, bytes
);
1683 new |= (old
& readonly
);
1684 CFGWRITE(pi
, coff
, new, bytes
); /* update config */
1686 cmd2
= pci_get_cfgdata16(pi
, PCIR_COMMAND
); /* get updated value */
1687 changed
= cmd
^ cmd2
;
1690 * If the MMIO or I/O address space decoding has changed then
1691 * register/unregister all BARs that decode that address space.
1693 for (i
= 0; i
<= PCI_BARMAX
; i
++) {
1694 switch (pi
->pi_bar
[i
].type
) {
1696 case PCIBAR_MEMHI64
:
1699 /* I/O address space decoding changed? */
1700 if (changed
& PCIM_CMD_PORTEN
) {
1702 register_bar(pi
, i
);
1704 unregister_bar(pi
, i
);
1709 /* MMIO address space decoding changed? */
1710 if (changed
& PCIM_CMD_MEMEN
) {
1712 register_bar(pi
, i
);
1714 unregister_bar(pi
, i
);
1723 * If INTx has been unmasked and is pending, assert the
1726 pci_lintr_update(pi
);
1730 pci_cfgrw(struct vmctx
*ctx
, int vcpu
, int in
, int bus
, int slot
, int func
,
1731 int coff
, int bytes
, uint32_t *eax
)
1734 struct slotinfo
*si
;
1735 struct pci_devinst
*pi
;
1736 struct pci_devemu
*pe
;
1738 uint64_t addr
, bar
, mask
;
1740 if ((bi
= pci_businfo
[bus
]) != NULL
) {
1741 si
= &bi
->slotinfo
[slot
];
1742 pi
= si
->si_funcs
[func
].fi_devi
;
1747 * Just return if there is no device at this slot:func or if the
1748 * the guest is doing an un-aligned access.
1750 if (pi
== NULL
|| (bytes
!= 1 && bytes
!= 2 && bytes
!= 4) ||
1751 (coff
& (bytes
- 1)) != 0) {
1758 * Ignore all writes beyond the standard config space and return all
1761 if (coff
>= PCI_REGMAX
+ 1) {
1765 * Extended capabilities begin at offset 256 in config
1766 * space. Absence of extended capabilities is signaled
1767 * with all 0s in the extended capability header at
1770 if (coff
<= PCI_REGMAX
+ 4)
1782 /* Let the device emulation override the default handler */
1783 if (pe
->pe_cfgread
!= NULL
) {
1784 needcfg
= pe
->pe_cfgread(ctx
, vcpu
, pi
, coff
, bytes
,
1791 *eax
= CFGREAD(pi
, coff
, bytes
);
1793 pci_emul_hdrtype_fixup(bus
, slot
, coff
, bytes
, eax
);
1795 /* Let the device emulation override the default handler */
1796 if (pe
->pe_cfgwrite
!= NULL
&&
1797 (*pe
->pe_cfgwrite
)(ctx
, vcpu
, pi
, coff
, bytes
, *eax
) == 0)
1801 * Special handling for write to BAR registers
1803 if (coff
>= PCIR_BAR(0) && coff
< PCIR_BAR(PCI_BARMAX
+ 1)) {
1805 * Ignore writes to BAR registers that are not
1808 if (bytes
!= 4 || (coff
& 0x3) != 0)
1810 idx
= (coff
- PCIR_BAR(0)) / 4;
1811 mask
= ~(pi
->pi_bar
[idx
].size
- 1);
1812 switch (pi
->pi_bar
[idx
].type
) {
1814 pi
->pi_bar
[idx
].addr
= bar
= 0;
1819 bar
= addr
| PCIM_BAR_IO_SPACE
;
1821 * Register the new BAR value for interception
1823 if (addr
!= pi
->pi_bar
[idx
].addr
) {
1824 update_bar_address(pi
, addr
, idx
,
1829 addr
= bar
= *eax
& mask
;
1830 bar
|= PCIM_BAR_MEM_SPACE
| PCIM_BAR_MEM_32
;
1831 if (addr
!= pi
->pi_bar
[idx
].addr
) {
1832 update_bar_address(pi
, addr
, idx
,
1837 addr
= bar
= *eax
& mask
;
1838 bar
|= PCIM_BAR_MEM_SPACE
| PCIM_BAR_MEM_64
|
1839 PCIM_BAR_MEM_PREFETCH
;
1840 if (addr
!= (uint32_t)pi
->pi_bar
[idx
].addr
) {
1841 update_bar_address(pi
, addr
, idx
,
1845 case PCIBAR_MEMHI64
:
1846 mask
= ~(pi
->pi_bar
[idx
- 1].size
- 1);
1847 addr
= ((uint64_t)*eax
<< 32) & mask
;
1849 if (bar
!= pi
->pi_bar
[idx
- 1].addr
>> 32) {
1850 update_bar_address(pi
, addr
, idx
- 1,
1857 pci_set_cfgdata32(pi
, coff
, bar
);
1859 } else if (pci_emul_iscap(pi
, coff
)) {
1860 pci_emul_capwrite(pi
, coff
, bytes
, *eax
);
1861 } else if (coff
>= PCIR_COMMAND
&& coff
< PCIR_REVID
) {
1862 pci_emul_cmdsts_write(pi
, coff
, *eax
, bytes
);
1864 CFGWRITE(pi
, coff
, *eax
, bytes
);
1869 static int cfgenable
, cfgbus
, cfgslot
, cfgfunc
, cfgoff
;
1872 pci_emul_cfgaddr(struct vmctx
*ctx
, int vcpu
, int in
, int port
, int bytes
,
1873 uint32_t *eax
, void *arg
)
1879 *eax
= (bytes
== 2) ? 0xffff : 0xff;
1884 x
= (cfgbus
<< 16) | (cfgslot
<< 11) | (cfgfunc
<< 8) | cfgoff
;
1890 cfgenable
= (x
& CONF1_ENABLE
) == CONF1_ENABLE
;
1891 cfgoff
= x
& PCI_REGMAX
;
1892 cfgfunc
= (x
>> 8) & PCI_FUNCMAX
;
1893 cfgslot
= (x
>> 11) & PCI_SLOTMAX
;
1894 cfgbus
= (x
>> 16) & PCI_BUSMAX
;
1899 INOUT_PORT(pci_cfgaddr
, CONF1_ADDR_PORT
, IOPORT_F_INOUT
, pci_emul_cfgaddr
);
1902 pci_emul_cfgdata(struct vmctx
*ctx
, int vcpu
, int in
, int port
, int bytes
,
1903 uint32_t *eax
, void *arg
)
1907 assert(bytes
== 1 || bytes
== 2 || bytes
== 4);
1909 coff
= cfgoff
+ (port
- CONF1_DATA_PORT
);
1911 pci_cfgrw(ctx
, vcpu
, in
, cfgbus
, cfgslot
, cfgfunc
, coff
, bytes
,
1914 /* Ignore accesses to cfgdata if not enabled by cfgaddr */
1921 INOUT_PORT(pci_cfgdata
, CONF1_DATA_PORT
+0, IOPORT_F_INOUT
, pci_emul_cfgdata
);
1922 INOUT_PORT(pci_cfgdata
, CONF1_DATA_PORT
+1, IOPORT_F_INOUT
, pci_emul_cfgdata
);
1923 INOUT_PORT(pci_cfgdata
, CONF1_DATA_PORT
+2, IOPORT_F_INOUT
, pci_emul_cfgdata
);
1924 INOUT_PORT(pci_cfgdata
, CONF1_DATA_PORT
+3, IOPORT_F_INOUT
, pci_emul_cfgdata
);
1926 #define PCI_EMUL_TEST
1927 #ifdef PCI_EMUL_TEST
1929 * Define a dummy test device
1933 struct pci_emul_dsoftc
{
1934 uint8_t ioregs
[DIOSZ
];
1935 uint8_t memregs
[2][DMEMSZ
];
1938 #define PCI_EMUL_MSI_MSGS 4
1939 #define PCI_EMUL_MSIX_MSGS 16
1942 pci_emul_dinit(struct vmctx
*ctx
, struct pci_devinst
*pi
, char *opts
)
1945 struct pci_emul_dsoftc
*sc
;
1947 sc
= calloc(1, sizeof(struct pci_emul_dsoftc
));
1951 pci_set_cfgdata16(pi
, PCIR_DEVICE
, 0x0001);
1952 pci_set_cfgdata16(pi
, PCIR_VENDOR
, 0x10DD);
1953 pci_set_cfgdata8(pi
, PCIR_CLASS
, 0x02);
1955 error
= pci_emul_add_msicap(pi
, PCI_EMUL_MSI_MSGS
);
1958 error
= pci_emul_alloc_bar(pi
, 0, PCIBAR_IO
, DIOSZ
);
1961 error
= pci_emul_alloc_bar(pi
, 1, PCIBAR_MEM32
, DMEMSZ
);
1964 error
= pci_emul_alloc_bar(pi
, 2, PCIBAR_MEM32
, DMEMSZ
);
1971 pci_emul_diow(struct vmctx
*ctx
, int vcpu
, struct pci_devinst
*pi
, int baridx
,
1972 uint64_t offset
, int size
, uint64_t value
)
1975 struct pci_emul_dsoftc
*sc
= pi
->pi_arg
;
1978 if (offset
+ size
> DIOSZ
) {
1979 printf("diow: iow too large, offset %ld size %d\n",
1985 sc
->ioregs
[offset
] = value
& 0xff;
1986 } else if (size
== 2) {
1987 *(uint16_t *)&sc
->ioregs
[offset
] = value
& 0xffff;
1988 } else if (size
== 4) {
1989 *(uint32_t *)&sc
->ioregs
[offset
] = value
;
1991 printf("diow: iow unknown size %d\n", size
);
1995 * Special magic value to generate an interrupt
1997 if (offset
== 4 && size
== 4 && pci_msi_enabled(pi
))
1998 pci_generate_msi(pi
, value
% pci_msi_maxmsgnum(pi
));
2000 if (value
== 0xabcdef) {
2001 for (i
= 0; i
< pci_msi_maxmsgnum(pi
); i
++)
2002 pci_generate_msi(pi
, i
);
2006 if (baridx
== 1 || baridx
== 2) {
2007 if (offset
+ size
> DMEMSZ
) {
2008 printf("diow: memw too large, offset %ld size %d\n",
2013 i
= baridx
- 1; /* 'memregs' index */
2016 sc
->memregs
[i
][offset
] = value
;
2017 } else if (size
== 2) {
2018 *(uint16_t *)&sc
->memregs
[i
][offset
] = value
;
2019 } else if (size
== 4) {
2020 *(uint32_t *)&sc
->memregs
[i
][offset
] = value
;
2021 } else if (size
== 8) {
2022 *(uint64_t *)&sc
->memregs
[i
][offset
] = value
;
2024 printf("diow: memw unknown size %d\n", size
);
2028 * magic interrupt ??
2032 if (baridx
> 2 || baridx
< 0) {
2033 printf("diow: unknown bar idx %d\n", baridx
);
2038 pci_emul_dior(struct vmctx
*ctx
, int vcpu
, struct pci_devinst
*pi
, int baridx
,
2039 uint64_t offset
, int size
)
2041 struct pci_emul_dsoftc
*sc
= pi
->pi_arg
;
2046 if (offset
+ size
> DIOSZ
) {
2047 printf("dior: ior too large, offset %ld size %d\n",
2054 value
= sc
->ioregs
[offset
];
2055 } else if (size
== 2) {
2056 value
= *(uint16_t *) &sc
->ioregs
[offset
];
2057 } else if (size
== 4) {
2058 value
= *(uint32_t *) &sc
->ioregs
[offset
];
2060 printf("dior: ior unknown size %d\n", size
);
2064 if (baridx
== 1 || baridx
== 2) {
2065 if (offset
+ size
> DMEMSZ
) {
2066 printf("dior: memr too large, offset %ld size %d\n",
2071 i
= baridx
- 1; /* 'memregs' index */
2074 value
= sc
->memregs
[i
][offset
];
2075 } else if (size
== 2) {
2076 value
= *(uint16_t *) &sc
->memregs
[i
][offset
];
2077 } else if (size
== 4) {
2078 value
= *(uint32_t *) &sc
->memregs
[i
][offset
];
2079 } else if (size
== 8) {
2080 value
= *(uint64_t *) &sc
->memregs
[i
][offset
];
2082 printf("dior: ior unknown size %d\n", size
);
2087 if (baridx
> 2 || baridx
< 0) {
2088 printf("dior: unknown bar idx %d\n", baridx
);
2095 struct pci_devemu pci_dummy
= {
2097 .pe_init
= pci_emul_dinit
,
2098 .pe_barwrite
= pci_emul_diow
,
2099 .pe_barread
= pci_emul_dior
2101 PCI_EMUL_SET(pci_dummy
);
2103 #endif /* PCI_EMUL_TEST */