xref: /openbmc/qemu/hw/ppc/spapr_pci_vfio.c (revision dd7d3e35401f80ffef4e209fa9e27db9087501b0)
1 /*
2  * QEMU sPAPR PCI host for VFIO
3  *
4  * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License,
9  *  or (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include <sys/ioctl.h>
22 #include <linux/vfio.h>
23 #include "hw/ppc/spapr.h"
24 #include "hw/pci-host/spapr.h"
25 #include "hw/pci/msix.h"
26 #include "hw/pci/pci_device.h"
27 #include "hw/vfio/vfio-common.h"
28 #include "qemu/error-report.h"
29 #include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
30 
31 /*
32  * Interfaces for IBM EEH (Enhanced Error Handling)
33  */
34 #ifdef CONFIG_VFIO_PCI
vfio_eeh_container_ok(VFIOContainer * container)35 static bool vfio_eeh_container_ok(VFIOContainer *container)
36 {
37     /*
38      * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
39      * implementation is broken if there are multiple groups in a
40      * container.  The hardware works in units of Partitionable
41      * Endpoints (== IOMMU groups) and the EEH operations naively
42      * iterate across all groups in the container, without any logic
43      * to make sure the groups have their state synchronized.  For
44      * certain operations (ENABLE) that might be ok, until an error
45      * occurs, but for others (GET_STATE) it's clearly broken.
46      */
47 
48     /*
49      * XXX Once fixed kernels exist, test for them here
50      */
51 
52     if (QLIST_EMPTY(&container->group_list)) {
53         return false;
54     }
55 
56     if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
57         return false;
58     }
59 
60     return true;
61 }
62 
vfio_eeh_container_op(VFIOContainer * container,uint32_t op)63 static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
64 {
65     struct vfio_eeh_pe_op pe_op = {
66         .argsz = sizeof(pe_op),
67         .op = op,
68     };
69     int ret;
70 
71     if (!vfio_eeh_container_ok(container)) {
72         error_report("vfio/eeh: EEH_PE_OP 0x%x: "
73                      "kernel requires a container with exactly one group", op);
74         return -EPERM;
75     }
76 
77     ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
78     if (ret < 0) {
79         error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
80         return -errno;
81     }
82 
83     return ret;
84 }
85 
vfio_eeh_as_container(AddressSpace * as)86 static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
87 {
88     VFIOAddressSpace *space = vfio_get_address_space(as);
89     VFIOContainerBase *bcontainer = NULL;
90 
91     if (QLIST_EMPTY(&space->containers)) {
92         /* No containers to act on */
93         goto out;
94     }
95 
96     bcontainer = QLIST_FIRST(&space->containers);
97 
98     if (QLIST_NEXT(bcontainer, next)) {
99         /*
100          * We don't yet have logic to synchronize EEH state across
101          * multiple containers
102          */
103         bcontainer = NULL;
104         goto out;
105     }
106 
107 out:
108     vfio_put_address_space(space);
109     return container_of(bcontainer, VFIOContainer, bcontainer);
110 }
111 
vfio_eeh_as_ok(AddressSpace * as)112 static bool vfio_eeh_as_ok(AddressSpace *as)
113 {
114     VFIOContainer *container = vfio_eeh_as_container(as);
115 
116     return (container != NULL) && vfio_eeh_container_ok(container);
117 }
118 
vfio_eeh_as_op(AddressSpace * as,uint32_t op)119 static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
120 {
121     VFIOContainer *container = vfio_eeh_as_container(as);
122 
123     if (!container) {
124         return -ENODEV;
125     }
126     return vfio_eeh_container_op(container, op);
127 }
128 
spapr_phb_eeh_available(SpaprPhbState * sphb)129 bool spapr_phb_eeh_available(SpaprPhbState *sphb)
130 {
131     return vfio_eeh_as_ok(&sphb->iommu_as);
132 }
133 
spapr_phb_vfio_eeh_reenable(SpaprPhbState * sphb)134 static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb)
135 {
136     vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
137 }
138 
spapr_phb_vfio_reset(DeviceState * qdev)139 void spapr_phb_vfio_reset(DeviceState *qdev)
140 {
141     /*
142      * The PE might be in frozen state. To reenable the EEH
143      * functionality on it will clean the frozen state, which
144      * ensures that the contained PCI devices will work properly
145      * after reboot.
146      */
147     spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
148 }
149 
spapr_eeh_pci_find_device(PCIBus * bus,PCIDevice * pdev,void * opaque)150 static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
151                                       void *opaque)
152 {
153     bool *found = opaque;
154 
155     if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
156         *found = true;
157     }
158 }
159 
spapr_phb_vfio_eeh_set_option(SpaprPhbState * sphb,unsigned int addr,int option)160 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
161                                   unsigned int addr, int option)
162 {
163     uint32_t op;
164     int ret;
165 
166     switch (option) {
167     case RTAS_EEH_DISABLE:
168         op = VFIO_EEH_PE_DISABLE;
169         break;
170     case RTAS_EEH_ENABLE: {
171         PCIHostState *phb;
172         bool found = false;
173 
174         /*
175          * The EEH functionality is enabled per sphb level instead of
176          * per PCI device. We have already identified this specific sphb
177          * based on buid passed as argument to ibm,set-eeh-option rtas
178          * call. Now we just need to check the validity of the PCI
179          * pass-through devices (vfio-pci) under this sphb bus.
180          * We have already validated that all the devices under this sphb
181          * are from same iommu group (within same PE) before coming here.
182          *
183          * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
184          * Rework device EEH PE determination") kernel would call
185          * eeh-set-option for each device in the PE using the device's
186          * config_address as the argument rather than the PE address.
187          * Hence if we check validity of supplied config_addr whether
188          * it matches to this PHB will cause issues with older kernel
189          * versions v5.9 and older. If we return an error from
190          * eeh-set-option when the argument isn't a valid PE address
191          * then older kernels (v5.9 and older) will interpret that as
192          * EEH not being supported.
193          */
194         phb = PCI_HOST_BRIDGE(sphb);
195         pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
196                             spapr_eeh_pci_find_device, &found);
197 
198         if (!found) {
199             return RTAS_OUT_PARAM_ERROR;
200         }
201 
202         op = VFIO_EEH_PE_ENABLE;
203         break;
204     }
205     case RTAS_EEH_THAW_IO:
206         op = VFIO_EEH_PE_UNFREEZE_IO;
207         break;
208     case RTAS_EEH_THAW_DMA:
209         op = VFIO_EEH_PE_UNFREEZE_DMA;
210         break;
211     default:
212         return RTAS_OUT_PARAM_ERROR;
213     }
214 
215     ret = vfio_eeh_as_op(&sphb->iommu_as, op);
216     if (ret < 0) {
217         return RTAS_OUT_HW_ERROR;
218     }
219 
220     return RTAS_OUT_SUCCESS;
221 }
222 
spapr_phb_vfio_eeh_get_state(SpaprPhbState * sphb,int * state)223 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
224 {
225     int ret;
226 
227     ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
228     if (ret < 0) {
229         return RTAS_OUT_PARAM_ERROR;
230     }
231 
232     *state = ret;
233     return RTAS_OUT_SUCCESS;
234 }
235 
spapr_phb_vfio_eeh_clear_dev_msix(PCIBus * bus,PCIDevice * pdev,void * opaque)236 static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
237                                               PCIDevice *pdev,
238                                               void *opaque)
239 {
240     /* Check if the device is VFIO PCI device */
241     if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
242         return;
243     }
244 
245     /*
246      * The MSIx table will be cleaned out by reset. We need
247      * disable it so that it can be reenabled properly. Also,
248      * the cached MSIx table should be cleared as it's not
249      * reflecting the contents in hardware.
250      */
251     if (msix_enabled(pdev)) {
252         uint16_t flags;
253 
254         flags = pci_host_config_read_common(pdev,
255                                             pdev->msix_cap + PCI_MSIX_FLAGS,
256                                             pci_config_size(pdev), 2);
257         flags &= ~PCI_MSIX_FLAGS_ENABLE;
258         pci_host_config_write_common(pdev,
259                                      pdev->msix_cap + PCI_MSIX_FLAGS,
260                                      pci_config_size(pdev), flags, 2);
261     }
262 
263     msix_reset(pdev);
264 }
265 
spapr_phb_vfio_eeh_clear_bus_msix(PCIBus * bus,void * opaque)266 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
267 {
268        pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
269                                      NULL);
270 }
271 
spapr_phb_vfio_eeh_pre_reset(SpaprPhbState * sphb)272 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
273 {
274        PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
275 
276        pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
277 }
278 
spapr_phb_vfio_eeh_reset(SpaprPhbState * sphb,int option)279 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
280 {
281     uint32_t op;
282     int ret;
283 
284     switch (option) {
285     case RTAS_SLOT_RESET_DEACTIVATE:
286         op = VFIO_EEH_PE_RESET_DEACTIVATE;
287         break;
288     case RTAS_SLOT_RESET_HOT:
289         spapr_phb_vfio_eeh_pre_reset(sphb);
290         op = VFIO_EEH_PE_RESET_HOT;
291         break;
292     case RTAS_SLOT_RESET_FUNDAMENTAL:
293         spapr_phb_vfio_eeh_pre_reset(sphb);
294         op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
295         break;
296     default:
297         return RTAS_OUT_PARAM_ERROR;
298     }
299 
300     ret = vfio_eeh_as_op(&sphb->iommu_as, op);
301     if (ret < 0) {
302         return RTAS_OUT_HW_ERROR;
303     }
304 
305     return RTAS_OUT_SUCCESS;
306 }
307 
spapr_phb_vfio_eeh_configure(SpaprPhbState * sphb)308 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
309 {
310     int ret;
311 
312     ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
313     if (ret < 0) {
314         return RTAS_OUT_PARAM_ERROR;
315     }
316 
317     return RTAS_OUT_SUCCESS;
318 }
319 
320 #else
321 
spapr_phb_eeh_available(SpaprPhbState * sphb)322 bool spapr_phb_eeh_available(SpaprPhbState *sphb)
323 {
324     return false;
325 }
326 
spapr_phb_vfio_reset(DeviceState * qdev)327 void spapr_phb_vfio_reset(DeviceState *qdev)
328 {
329 }
330 
spapr_phb_vfio_eeh_set_option(SpaprPhbState * sphb,unsigned int addr,int option)331 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
332                                   unsigned int addr, int option)
333 {
334     return RTAS_OUT_NOT_SUPPORTED;
335 }
336 
spapr_phb_vfio_eeh_get_state(SpaprPhbState * sphb,int * state)337 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
338 {
339     return RTAS_OUT_NOT_SUPPORTED;
340 }
341 
spapr_phb_vfio_eeh_reset(SpaprPhbState * sphb,int option)342 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
343 {
344     return RTAS_OUT_NOT_SUPPORTED;
345 }
346 
spapr_phb_vfio_eeh_configure(SpaprPhbState * sphb)347 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
348 {
349     return RTAS_OUT_NOT_SUPPORTED;
350 }
351 
352 #endif /* CONFIG_VFIO_PCI */
353