1 /*
2 * QEMU sPAPR PCI host for VFIO
3 *
4 * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License,
9 * or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <sys/ioctl.h>
22 #include <linux/vfio.h>
23 #include "hw/ppc/spapr.h"
24 #include "hw/pci-host/spapr.h"
25 #include "hw/pci/msix.h"
26 #include "hw/pci/pci_device.h"
27 #include "hw/vfio/vfio-common.h"
28 #include "qemu/error-report.h"
29
30 /*
31 * Interfaces for IBM EEH (Enhanced Error Handling)
32 */
vfio_eeh_container_ok(VFIOContainer * container)33 static bool vfio_eeh_container_ok(VFIOContainer *container)
34 {
35 /*
36 * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
37 * implementation is broken if there are multiple groups in a
38 * container. The hardware works in units of Partitionable
39 * Endpoints (== IOMMU groups) and the EEH operations naively
40 * iterate across all groups in the container, without any logic
41 * to make sure the groups have their state synchronized. For
42 * certain operations (ENABLE) that might be ok, until an error
43 * occurs, but for others (GET_STATE) it's clearly broken.
44 */
45
46 /*
47 * XXX Once fixed kernels exist, test for them here
48 */
49
50 if (QLIST_EMPTY(&container->group_list)) {
51 return false;
52 }
53
54 if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
55 return false;
56 }
57
58 return true;
59 }
60
vfio_eeh_container_op(VFIOContainer * container,uint32_t op)61 static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
62 {
63 struct vfio_eeh_pe_op pe_op = {
64 .argsz = sizeof(pe_op),
65 .op = op,
66 };
67 int ret;
68
69 if (!vfio_eeh_container_ok(container)) {
70 error_report("vfio/eeh: EEH_PE_OP 0x%x: "
71 "kernel requires a container with exactly one group", op);
72 return -EPERM;
73 }
74
75 ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
76 if (ret < 0) {
77 error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
78 return -errno;
79 }
80
81 return ret;
82 }
83
vfio_eeh_as_container(AddressSpace * as)84 static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
85 {
86 VFIOAddressSpace *space = vfio_get_address_space(as);
87 VFIOContainer *container = NULL;
88
89 if (QLIST_EMPTY(&space->containers)) {
90 /* No containers to act on */
91 goto out;
92 }
93
94 container = QLIST_FIRST(&space->containers);
95
96 if (QLIST_NEXT(container, next)) {
97 /*
98 * We don't yet have logic to synchronize EEH state across
99 * multiple containers
100 */
101 container = NULL;
102 goto out;
103 }
104
105 out:
106 vfio_put_address_space(space);
107 return container;
108 }
109
vfio_eeh_as_ok(AddressSpace * as)110 static bool vfio_eeh_as_ok(AddressSpace *as)
111 {
112 VFIOContainer *container = vfio_eeh_as_container(as);
113
114 return (container != NULL) && vfio_eeh_container_ok(container);
115 }
116
vfio_eeh_as_op(AddressSpace * as,uint32_t op)117 static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
118 {
119 VFIOContainer *container = vfio_eeh_as_container(as);
120
121 if (!container) {
122 return -ENODEV;
123 }
124 return vfio_eeh_container_op(container, op);
125 }
126
spapr_phb_eeh_available(SpaprPhbState * sphb)127 bool spapr_phb_eeh_available(SpaprPhbState *sphb)
128 {
129 return vfio_eeh_as_ok(&sphb->iommu_as);
130 }
131
spapr_phb_vfio_eeh_reenable(SpaprPhbState * sphb)132 static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb)
133 {
134 vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
135 }
136
spapr_phb_vfio_reset(DeviceState * qdev)137 void spapr_phb_vfio_reset(DeviceState *qdev)
138 {
139 /*
140 * The PE might be in frozen state. To reenable the EEH
141 * functionality on it will clean the frozen state, which
142 * ensures that the contained PCI devices will work properly
143 * after reboot.
144 */
145 spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
146 }
147
spapr_eeh_pci_find_device(PCIBus * bus,PCIDevice * pdev,void * opaque)148 static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
149 void *opaque)
150 {
151 bool *found = opaque;
152
153 if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
154 *found = true;
155 }
156 }
157
spapr_phb_vfio_eeh_set_option(SpaprPhbState * sphb,unsigned int addr,int option)158 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
159 unsigned int addr, int option)
160 {
161 uint32_t op;
162 int ret;
163
164 switch (option) {
165 case RTAS_EEH_DISABLE:
166 op = VFIO_EEH_PE_DISABLE;
167 break;
168 case RTAS_EEH_ENABLE: {
169 PCIHostState *phb;
170 bool found = false;
171
172 /*
173 * The EEH functionality is enabled per sphb level instead of
174 * per PCI device. We have already identified this specific sphb
175 * based on buid passed as argument to ibm,set-eeh-option rtas
176 * call. Now we just need to check the validity of the PCI
177 * pass-through devices (vfio-pci) under this sphb bus.
178 * We have already validated that all the devices under this sphb
179 * are from same iommu group (within same PE) before coming here.
180 *
181 * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
182 * Rework device EEH PE determination") kernel would call
183 * eeh-set-option for each device in the PE using the device's
184 * config_address as the argument rather than the PE address.
185 * Hence if we check validity of supplied config_addr whether
186 * it matches to this PHB will cause issues with older kernel
187 * versions v5.9 and older. If we return an error from
188 * eeh-set-option when the argument isn't a valid PE address
189 * then older kernels (v5.9 and older) will interpret that as
190 * EEH not being supported.
191 */
192 phb = PCI_HOST_BRIDGE(sphb);
193 pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
194 spapr_eeh_pci_find_device, &found);
195
196 if (!found) {
197 return RTAS_OUT_PARAM_ERROR;
198 }
199
200 op = VFIO_EEH_PE_ENABLE;
201 break;
202 }
203 case RTAS_EEH_THAW_IO:
204 op = VFIO_EEH_PE_UNFREEZE_IO;
205 break;
206 case RTAS_EEH_THAW_DMA:
207 op = VFIO_EEH_PE_UNFREEZE_DMA;
208 break;
209 default:
210 return RTAS_OUT_PARAM_ERROR;
211 }
212
213 ret = vfio_eeh_as_op(&sphb->iommu_as, op);
214 if (ret < 0) {
215 return RTAS_OUT_HW_ERROR;
216 }
217
218 return RTAS_OUT_SUCCESS;
219 }
220
spapr_phb_vfio_eeh_get_state(SpaprPhbState * sphb,int * state)221 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
222 {
223 int ret;
224
225 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
226 if (ret < 0) {
227 return RTAS_OUT_PARAM_ERROR;
228 }
229
230 *state = ret;
231 return RTAS_OUT_SUCCESS;
232 }
233
spapr_phb_vfio_eeh_clear_dev_msix(PCIBus * bus,PCIDevice * pdev,void * opaque)234 static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
235 PCIDevice *pdev,
236 void *opaque)
237 {
238 /* Check if the device is VFIO PCI device */
239 if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
240 return;
241 }
242
243 /*
244 * The MSIx table will be cleaned out by reset. We need
245 * disable it so that it can be reenabled properly. Also,
246 * the cached MSIx table should be cleared as it's not
247 * reflecting the contents in hardware.
248 */
249 if (msix_enabled(pdev)) {
250 uint16_t flags;
251
252 flags = pci_host_config_read_common(pdev,
253 pdev->msix_cap + PCI_MSIX_FLAGS,
254 pci_config_size(pdev), 2);
255 flags &= ~PCI_MSIX_FLAGS_ENABLE;
256 pci_host_config_write_common(pdev,
257 pdev->msix_cap + PCI_MSIX_FLAGS,
258 pci_config_size(pdev), flags, 2);
259 }
260
261 msix_reset(pdev);
262 }
263
spapr_phb_vfio_eeh_clear_bus_msix(PCIBus * bus,void * opaque)264 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
265 {
266 pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
267 NULL);
268 }
269
spapr_phb_vfio_eeh_pre_reset(SpaprPhbState * sphb)270 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
271 {
272 PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
273
274 pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
275 }
276
spapr_phb_vfio_eeh_reset(SpaprPhbState * sphb,int option)277 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
278 {
279 uint32_t op;
280 int ret;
281
282 switch (option) {
283 case RTAS_SLOT_RESET_DEACTIVATE:
284 op = VFIO_EEH_PE_RESET_DEACTIVATE;
285 break;
286 case RTAS_SLOT_RESET_HOT:
287 spapr_phb_vfio_eeh_pre_reset(sphb);
288 op = VFIO_EEH_PE_RESET_HOT;
289 break;
290 case RTAS_SLOT_RESET_FUNDAMENTAL:
291 spapr_phb_vfio_eeh_pre_reset(sphb);
292 op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
293 break;
294 default:
295 return RTAS_OUT_PARAM_ERROR;
296 }
297
298 ret = vfio_eeh_as_op(&sphb->iommu_as, op);
299 if (ret < 0) {
300 return RTAS_OUT_HW_ERROR;
301 }
302
303 return RTAS_OUT_SUCCESS;
304 }
305
spapr_phb_vfio_eeh_configure(SpaprPhbState * sphb)306 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
307 {
308 int ret;
309
310 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
311 if (ret < 0) {
312 return RTAS_OUT_PARAM_ERROR;
313 }
314
315 return RTAS_OUT_SUCCESS;
316 }
317