2 * QEMU sPAPR PCI host for VFIO
4 * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License,
9 * or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <sys/ioctl.h>
22 #include <linux/vfio.h>
23 #include "hw/ppc/spapr.h"
24 #include "hw/pci-host/spapr.h"
25 #include "hw/pci/msix.h"
26 #include "hw/pci/pci_device.h"
27 #include "hw/vfio/vfio-common.h"
28 #include "qemu/error-report.h"
29 #include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
32 * Interfaces for IBM EEH (Enhanced Error Handling)
34 #ifdef CONFIG_VFIO_PCI
35 static bool vfio_eeh_container_ok(VFIOContainer
*container
)
38 * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
39 * implementation is broken if there are multiple groups in a
40 * container. The hardware works in units of Partitionable
41 * Endpoints (== IOMMU groups) and the EEH operations naively
42 * iterate across all groups in the container, without any logic
43 * to make sure the groups have their state synchronized. For
44 * certain operations (ENABLE) that might be ok, until an error
45 * occurs, but for others (GET_STATE) it's clearly broken.
49 * XXX Once fixed kernels exist, test for them here
52 if (QLIST_EMPTY(&container
->group_list
)) {
56 if (QLIST_NEXT(QLIST_FIRST(&container
->group_list
), container_next
)) {
63 static int vfio_eeh_container_op(VFIOContainer
*container
, uint32_t op
)
65 struct vfio_eeh_pe_op pe_op
= {
66 .argsz
= sizeof(pe_op
),
71 if (!vfio_eeh_container_ok(container
)) {
72 error_report("vfio/eeh: EEH_PE_OP 0x%x: "
73 "kernel requires a container with exactly one group", op
);
77 ret
= ioctl(container
->fd
, VFIO_EEH_PE_OP
, &pe_op
);
79 error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op
);
86 static VFIOContainer
*vfio_eeh_as_container(AddressSpace
*as
)
88 VFIOAddressSpace
*space
= vfio_get_address_space(as
);
89 VFIOContainerBase
*bcontainer
= NULL
;
91 if (QLIST_EMPTY(&space
->containers
)) {
92 /* No containers to act on */
96 bcontainer
= QLIST_FIRST(&space
->containers
);
98 if (QLIST_NEXT(bcontainer
, next
)) {
100 * We don't yet have logic to synchronize EEH state across
101 * multiple containers
108 vfio_put_address_space(space
);
109 return container_of(bcontainer
, VFIOContainer
, bcontainer
);
112 static bool vfio_eeh_as_ok(AddressSpace
*as
)
114 VFIOContainer
*container
= vfio_eeh_as_container(as
);
116 return (container
!= NULL
) && vfio_eeh_container_ok(container
);
119 static int vfio_eeh_as_op(AddressSpace
*as
, uint32_t op
)
121 VFIOContainer
*container
= vfio_eeh_as_container(as
);
126 return vfio_eeh_container_op(container
, op
);
129 bool spapr_phb_eeh_available(SpaprPhbState
*sphb
)
131 return vfio_eeh_as_ok(&sphb
->iommu_as
);
134 static void spapr_phb_vfio_eeh_reenable(SpaprPhbState
*sphb
)
136 vfio_eeh_as_op(&sphb
->iommu_as
, VFIO_EEH_PE_ENABLE
);
139 void spapr_phb_vfio_reset(DeviceState
*qdev
)
142 * The PE might be in frozen state. To reenable the EEH
143 * functionality on it will clean the frozen state, which
144 * ensures that the contained PCI devices will work properly
147 spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev
));
150 static void spapr_eeh_pci_find_device(PCIBus
*bus
, PCIDevice
*pdev
,
153 bool *found
= opaque
;
155 if (object_dynamic_cast(OBJECT(pdev
), "vfio-pci")) {
160 int spapr_phb_vfio_eeh_set_option(SpaprPhbState
*sphb
,
161 unsigned int addr
, int option
)
167 case RTAS_EEH_DISABLE
:
168 op
= VFIO_EEH_PE_DISABLE
;
170 case RTAS_EEH_ENABLE
: {
175 * The EEH functionality is enabled per sphb level instead of
176 * per PCI device. We have already identified this specific sphb
177 * based on buid passed as argument to ibm,set-eeh-option rtas
178 * call. Now we just need to check the validity of the PCI
179 * pass-through devices (vfio-pci) under this sphb bus.
180 * We have already validated that all the devices under this sphb
181 * are from same iommu group (within same PE) before coming here.
183 * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
184 * Rework device EEH PE determination") kernel would call
185 * eeh-set-option for each device in the PE using the device's
186 * config_address as the argument rather than the PE address.
187 * Hence if we check validity of supplied config_addr whether
188 * it matches to this PHB will cause issues with older kernel
189 * versions v5.9 and older. If we return an error from
190 * eeh-set-option when the argument isn't a valid PE address
191 * then older kernels (v5.9 and older) will interpret that as
192 * EEH not being supported.
194 phb
= PCI_HOST_BRIDGE(sphb
);
195 pci_for_each_device(phb
->bus
, (addr
>> 16) & 0xFF,
196 spapr_eeh_pci_find_device
, &found
);
199 return RTAS_OUT_PARAM_ERROR
;
202 op
= VFIO_EEH_PE_ENABLE
;
205 case RTAS_EEH_THAW_IO
:
206 op
= VFIO_EEH_PE_UNFREEZE_IO
;
208 case RTAS_EEH_THAW_DMA
:
209 op
= VFIO_EEH_PE_UNFREEZE_DMA
;
212 return RTAS_OUT_PARAM_ERROR
;
215 ret
= vfio_eeh_as_op(&sphb
->iommu_as
, op
);
217 return RTAS_OUT_HW_ERROR
;
220 return RTAS_OUT_SUCCESS
;
223 int spapr_phb_vfio_eeh_get_state(SpaprPhbState
*sphb
, int *state
)
227 ret
= vfio_eeh_as_op(&sphb
->iommu_as
, VFIO_EEH_PE_GET_STATE
);
229 return RTAS_OUT_PARAM_ERROR
;
233 return RTAS_OUT_SUCCESS
;
236 static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus
*bus
,
240 /* Check if the device is VFIO PCI device */
241 if (!object_dynamic_cast(OBJECT(pdev
), "vfio-pci")) {
246 * The MSIx table will be cleaned out by reset. We need
247 * disable it so that it can be reenabled properly. Also,
248 * the cached MSIx table should be cleared as it's not
249 * reflecting the contents in hardware.
251 if (msix_enabled(pdev
)) {
254 flags
= pci_host_config_read_common(pdev
,
255 pdev
->msix_cap
+ PCI_MSIX_FLAGS
,
256 pci_config_size(pdev
), 2);
257 flags
&= ~PCI_MSIX_FLAGS_ENABLE
;
258 pci_host_config_write_common(pdev
,
259 pdev
->msix_cap
+ PCI_MSIX_FLAGS
,
260 pci_config_size(pdev
), flags
, 2);
266 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus
*bus
, void *opaque
)
268 pci_for_each_device_under_bus(bus
, spapr_phb_vfio_eeh_clear_dev_msix
,
272 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState
*sphb
)
274 PCIHostState
*phb
= PCI_HOST_BRIDGE(sphb
);
276 pci_for_each_bus(phb
->bus
, spapr_phb_vfio_eeh_clear_bus_msix
, NULL
);
279 int spapr_phb_vfio_eeh_reset(SpaprPhbState
*sphb
, int option
)
285 case RTAS_SLOT_RESET_DEACTIVATE
:
286 op
= VFIO_EEH_PE_RESET_DEACTIVATE
;
288 case RTAS_SLOT_RESET_HOT
:
289 spapr_phb_vfio_eeh_pre_reset(sphb
);
290 op
= VFIO_EEH_PE_RESET_HOT
;
292 case RTAS_SLOT_RESET_FUNDAMENTAL
:
293 spapr_phb_vfio_eeh_pre_reset(sphb
);
294 op
= VFIO_EEH_PE_RESET_FUNDAMENTAL
;
297 return RTAS_OUT_PARAM_ERROR
;
300 ret
= vfio_eeh_as_op(&sphb
->iommu_as
, op
);
302 return RTAS_OUT_HW_ERROR
;
305 return RTAS_OUT_SUCCESS
;
308 int spapr_phb_vfio_eeh_configure(SpaprPhbState
*sphb
)
312 ret
= vfio_eeh_as_op(&sphb
->iommu_as
, VFIO_EEH_PE_CONFIGURE
);
314 return RTAS_OUT_PARAM_ERROR
;
317 return RTAS_OUT_SUCCESS
;
322 bool spapr_phb_eeh_available(SpaprPhbState
*sphb
)
327 void spapr_phb_vfio_reset(DeviceState
*qdev
)
331 int spapr_phb_vfio_eeh_set_option(SpaprPhbState
*sphb
,
332 unsigned int addr
, int option
)
334 return RTAS_OUT_NOT_SUPPORTED
;
337 int spapr_phb_vfio_eeh_get_state(SpaprPhbState
*sphb
, int *state
)
339 return RTAS_OUT_NOT_SUPPORTED
;
342 int spapr_phb_vfio_eeh_reset(SpaprPhbState
*sphb
, int option
)
344 return RTAS_OUT_NOT_SUPPORTED
;
347 int spapr_phb_vfio_eeh_configure(SpaprPhbState
*sphb
)
349 return RTAS_OUT_NOT_SUPPORTED
;
352 #endif /* CONFIG_VFIO_PCI */