xref: /openbmc/qemu/hw/ppc/spapr_pci_vfio.c (revision aa90d775f0f568239f2a6bd3ead7e8d3b4a35a57)
1 /*
2  * QEMU sPAPR PCI host for VFIO
3  *
4  * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License,
9  *  or (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include <sys/ioctl.h>
22 #include <linux/vfio.h>
23 #include "hw/ppc/spapr.h"
24 #include "hw/pci-host/spapr.h"
25 #include "hw/pci/msix.h"
26 #include "hw/pci/pci_device.h"
27 #include "hw/vfio/vfio-common.h"
28 #include "hw/vfio/vfio-container.h"
29 #include "qemu/error-report.h"
30 #include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
31 
32 /*
33  * Interfaces for IBM EEH (Enhanced Error Handling)
34  */
35 #ifdef CONFIG_VFIO_PCI
36 static bool vfio_eeh_container_ok(VFIOContainer *container)
37 {
38     /*
39      * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
40      * implementation is broken if there are multiple groups in a
41      * container.  The hardware works in units of Partitionable
42      * Endpoints (== IOMMU groups) and the EEH operations naively
43      * iterate across all groups in the container, without any logic
44      * to make sure the groups have their state synchronized.  For
45      * certain operations (ENABLE) that might be ok, until an error
46      * occurs, but for others (GET_STATE) it's clearly broken.
47      */
48 
49     /*
50      * XXX Once fixed kernels exist, test for them here
51      */
52 
53     if (QLIST_EMPTY(&container->group_list)) {
54         return false;
55     }
56 
57     if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
58         return false;
59     }
60 
61     return true;
62 }
63 
64 static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
65 {
66     struct vfio_eeh_pe_op pe_op = {
67         .argsz = sizeof(pe_op),
68         .op = op,
69     };
70     int ret;
71 
72     if (!vfio_eeh_container_ok(container)) {
73         error_report("vfio/eeh: EEH_PE_OP 0x%x: "
74                      "kernel requires a container with exactly one group", op);
75         return -EPERM;
76     }
77 
78     ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
79     if (ret < 0) {
80         error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
81         return -errno;
82     }
83 
84     return ret;
85 }
86 
87 static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
88 {
89     VFIOAddressSpace *space = vfio_get_address_space(as);
90     VFIOContainerBase *bcontainer = NULL;
91 
92     if (QLIST_EMPTY(&space->containers)) {
93         /* No containers to act on */
94         goto out;
95     }
96 
97     bcontainer = QLIST_FIRST(&space->containers);
98 
99     if (QLIST_NEXT(bcontainer, next)) {
100         /*
101          * We don't yet have logic to synchronize EEH state across
102          * multiple containers
103          */
104         bcontainer = NULL;
105         goto out;
106     }
107 
108 out:
109     vfio_put_address_space(space);
110     return container_of(bcontainer, VFIOContainer, bcontainer);
111 }
112 
113 static bool vfio_eeh_as_ok(AddressSpace *as)
114 {
115     VFIOContainer *container = vfio_eeh_as_container(as);
116 
117     return (container != NULL) && vfio_eeh_container_ok(container);
118 }
119 
120 static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
121 {
122     VFIOContainer *container = vfio_eeh_as_container(as);
123 
124     if (!container) {
125         return -ENODEV;
126     }
127     return vfio_eeh_container_op(container, op);
128 }
129 
130 bool spapr_phb_eeh_available(SpaprPhbState *sphb)
131 {
132     return vfio_eeh_as_ok(&sphb->iommu_as);
133 }
134 
135 static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb)
136 {
137     vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
138 }
139 
140 void spapr_phb_vfio_reset(DeviceState *qdev)
141 {
142     /*
143      * The PE might be in frozen state. To reenable the EEH
144      * functionality on it will clean the frozen state, which
145      * ensures that the contained PCI devices will work properly
146      * after reboot.
147      */
148     spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
149 }
150 
151 static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
152                                       void *opaque)
153 {
154     bool *found = opaque;
155 
156     if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
157         *found = true;
158     }
159 }
160 
161 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
162                                   unsigned int addr, int option)
163 {
164     uint32_t op;
165     int ret;
166 
167     switch (option) {
168     case RTAS_EEH_DISABLE:
169         op = VFIO_EEH_PE_DISABLE;
170         break;
171     case RTAS_EEH_ENABLE: {
172         PCIHostState *phb;
173         bool found = false;
174 
175         /*
176          * The EEH functionality is enabled per sphb level instead of
177          * per PCI device. We have already identified this specific sphb
178          * based on buid passed as argument to ibm,set-eeh-option rtas
179          * call. Now we just need to check the validity of the PCI
180          * pass-through devices (vfio-pci) under this sphb bus.
181          * We have already validated that all the devices under this sphb
182          * are from same iommu group (within same PE) before coming here.
183          *
184          * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
185          * Rework device EEH PE determination") kernel would call
186          * eeh-set-option for each device in the PE using the device's
187          * config_address as the argument rather than the PE address.
188          * Hence if we check validity of supplied config_addr whether
189          * it matches to this PHB will cause issues with older kernel
190          * versions v5.9 and older. If we return an error from
191          * eeh-set-option when the argument isn't a valid PE address
192          * then older kernels (v5.9 and older) will interpret that as
193          * EEH not being supported.
194          */
195         phb = PCI_HOST_BRIDGE(sphb);
196         pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
197                             spapr_eeh_pci_find_device, &found);
198 
199         if (!found) {
200             return RTAS_OUT_PARAM_ERROR;
201         }
202 
203         op = VFIO_EEH_PE_ENABLE;
204         break;
205     }
206     case RTAS_EEH_THAW_IO:
207         op = VFIO_EEH_PE_UNFREEZE_IO;
208         break;
209     case RTAS_EEH_THAW_DMA:
210         op = VFIO_EEH_PE_UNFREEZE_DMA;
211         break;
212     default:
213         return RTAS_OUT_PARAM_ERROR;
214     }
215 
216     ret = vfio_eeh_as_op(&sphb->iommu_as, op);
217     if (ret < 0) {
218         return RTAS_OUT_HW_ERROR;
219     }
220 
221     return RTAS_OUT_SUCCESS;
222 }
223 
224 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
225 {
226     int ret;
227 
228     ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
229     if (ret < 0) {
230         return RTAS_OUT_PARAM_ERROR;
231     }
232 
233     *state = ret;
234     return RTAS_OUT_SUCCESS;
235 }
236 
237 static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
238                                               PCIDevice *pdev,
239                                               void *opaque)
240 {
241     /* Check if the device is VFIO PCI device */
242     if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
243         return;
244     }
245 
246     /*
247      * The MSIx table will be cleaned out by reset. We need
248      * disable it so that it can be reenabled properly. Also,
249      * the cached MSIx table should be cleared as it's not
250      * reflecting the contents in hardware.
251      */
252     if (msix_enabled(pdev)) {
253         uint16_t flags;
254 
255         flags = pci_host_config_read_common(pdev,
256                                             pdev->msix_cap + PCI_MSIX_FLAGS,
257                                             pci_config_size(pdev), 2);
258         flags &= ~PCI_MSIX_FLAGS_ENABLE;
259         pci_host_config_write_common(pdev,
260                                      pdev->msix_cap + PCI_MSIX_FLAGS,
261                                      pci_config_size(pdev), flags, 2);
262     }
263 
264     msix_reset(pdev);
265 }
266 
267 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
268 {
269        pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
270                                      NULL);
271 }
272 
273 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
274 {
275        PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
276 
277        pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
278 }
279 
280 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
281 {
282     uint32_t op;
283     int ret;
284 
285     switch (option) {
286     case RTAS_SLOT_RESET_DEACTIVATE:
287         op = VFIO_EEH_PE_RESET_DEACTIVATE;
288         break;
289     case RTAS_SLOT_RESET_HOT:
290         spapr_phb_vfio_eeh_pre_reset(sphb);
291         op = VFIO_EEH_PE_RESET_HOT;
292         break;
293     case RTAS_SLOT_RESET_FUNDAMENTAL:
294         spapr_phb_vfio_eeh_pre_reset(sphb);
295         op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
296         break;
297     default:
298         return RTAS_OUT_PARAM_ERROR;
299     }
300 
301     ret = vfio_eeh_as_op(&sphb->iommu_as, op);
302     if (ret < 0) {
303         return RTAS_OUT_HW_ERROR;
304     }
305 
306     return RTAS_OUT_SUCCESS;
307 }
308 
309 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
310 {
311     int ret;
312 
313     ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
314     if (ret < 0) {
315         return RTAS_OUT_PARAM_ERROR;
316     }
317 
318     return RTAS_OUT_SUCCESS;
319 }
320 
321 #else
322 
323 bool spapr_phb_eeh_available(SpaprPhbState *sphb)
324 {
325     return false;
326 }
327 
328 void spapr_phb_vfio_reset(DeviceState *qdev)
329 {
330 }
331 
332 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
333                                   unsigned int addr, int option)
334 {
335     return RTAS_OUT_NOT_SUPPORTED;
336 }
337 
338 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
339 {
340     return RTAS_OUT_NOT_SUPPORTED;
341 }
342 
343 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
344 {
345     return RTAS_OUT_NOT_SUPPORTED;
346 }
347 
348 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
349 {
350     return RTAS_OUT_NOT_SUPPORTED;
351 }
352 
353 #endif /* CONFIG_VFIO_PCI */
354