xref: /openbmc/qemu/hw/ppc/spapr_pci_vfio.c (revision cc9d10b9e89f0325c1a14955534d6b28ea586fba)
1 /*
2  * QEMU sPAPR PCI host for VFIO
3  *
4  * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License,
9  *  or (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include <sys/ioctl.h>
22 #include <linux/vfio.h>
23 #include "hw/ppc/spapr.h"
24 #include "hw/pci-host/spapr.h"
25 #include "hw/pci/msix.h"
26 #include "hw/pci/pci_device.h"
27 #include "hw/vfio/vfio-common.h"
28 #include "qemu/error-report.h"
29 
30 /*
31  * Interfaces for IBM EEH (Enhanced Error Handling)
32  */
33 static bool vfio_eeh_container_ok(VFIOContainer *container)
34 {
35     /*
36      * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
37      * implementation is broken if there are multiple groups in a
38      * container.  The hardware works in units of Partitionable
39      * Endpoints (== IOMMU groups) and the EEH operations naively
40      * iterate across all groups in the container, without any logic
41      * to make sure the groups have their state synchronized.  For
42      * certain operations (ENABLE) that might be ok, until an error
43      * occurs, but for others (GET_STATE) it's clearly broken.
44      */
45 
46     /*
47      * XXX Once fixed kernels exist, test for them here
48      */
49 
50     if (QLIST_EMPTY(&container->group_list)) {
51         return false;
52     }
53 
54     if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
55         return false;
56     }
57 
58     return true;
59 }
60 
61 static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
62 {
63     struct vfio_eeh_pe_op pe_op = {
64         .argsz = sizeof(pe_op),
65         .op = op,
66     };
67     int ret;
68 
69     if (!vfio_eeh_container_ok(container)) {
70         error_report("vfio/eeh: EEH_PE_OP 0x%x: "
71                      "kernel requires a container with exactly one group", op);
72         return -EPERM;
73     }
74 
75     ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
76     if (ret < 0) {
77         error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
78         return -errno;
79     }
80 
81     return ret;
82 }
83 
84 static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
85 {
86     VFIOAddressSpace *space = vfio_get_address_space(as);
87     VFIOContainer *container = NULL;
88 
89     if (QLIST_EMPTY(&space->containers)) {
90         /* No containers to act on */
91         goto out;
92     }
93 
94     container = QLIST_FIRST(&space->containers);
95 
96     if (QLIST_NEXT(container, next)) {
97         /*
98          * We don't yet have logic to synchronize EEH state across
99          * multiple containers
100          */
101         container = NULL;
102         goto out;
103     }
104 
105 out:
106     vfio_put_address_space(space);
107     return container;
108 }
109 
110 static bool vfio_eeh_as_ok(AddressSpace *as)
111 {
112     VFIOContainer *container = vfio_eeh_as_container(as);
113 
114     return (container != NULL) && vfio_eeh_container_ok(container);
115 }
116 
117 static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
118 {
119     VFIOContainer *container = vfio_eeh_as_container(as);
120 
121     if (!container) {
122         return -ENODEV;
123     }
124     return vfio_eeh_container_op(container, op);
125 }
126 
127 bool spapr_phb_eeh_available(SpaprPhbState *sphb)
128 {
129     return vfio_eeh_as_ok(&sphb->iommu_as);
130 }
131 
132 static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb)
133 {
134     vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
135 }
136 
137 void spapr_phb_vfio_reset(DeviceState *qdev)
138 {
139     /*
140      * The PE might be in frozen state. To reenable the EEH
141      * functionality on it will clean the frozen state, which
142      * ensures that the contained PCI devices will work properly
143      * after reboot.
144      */
145     spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
146 }
147 
148 static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
149                                       void *opaque)
150 {
151     bool *found = opaque;
152 
153     if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
154         *found = true;
155     }
156 }
157 
158 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
159                                   unsigned int addr, int option)
160 {
161     uint32_t op;
162     int ret;
163 
164     switch (option) {
165     case RTAS_EEH_DISABLE:
166         op = VFIO_EEH_PE_DISABLE;
167         break;
168     case RTAS_EEH_ENABLE: {
169         PCIHostState *phb;
170         bool found = false;
171 
172         /*
173          * The EEH functionality is enabled per sphb level instead of
174          * per PCI device. We have already identified this specific sphb
175          * based on buid passed as argument to ibm,set-eeh-option rtas
176          * call. Now we just need to check the validity of the PCI
177          * pass-through devices (vfio-pci) under this sphb bus.
178          * We have already validated that all the devices under this sphb
179          * are from same iommu group (within same PE) before coming here.
180          *
181          * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
182          * Rework device EEH PE determination") kernel would call
183          * eeh-set-option for each device in the PE using the device's
184          * config_address as the argument rather than the PE address.
185          * Hence if we check validity of supplied config_addr whether
186          * it matches to this PHB will cause issues with older kernel
187          * versions v5.9 and older. If we return an error from
188          * eeh-set-option when the argument isn't a valid PE address
189          * then older kernels (v5.9 and older) will interpret that as
190          * EEH not being supported.
191          */
192         phb = PCI_HOST_BRIDGE(sphb);
193         pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
194                             spapr_eeh_pci_find_device, &found);
195 
196         if (!found) {
197             return RTAS_OUT_PARAM_ERROR;
198         }
199 
200         op = VFIO_EEH_PE_ENABLE;
201         break;
202     }
203     case RTAS_EEH_THAW_IO:
204         op = VFIO_EEH_PE_UNFREEZE_IO;
205         break;
206     case RTAS_EEH_THAW_DMA:
207         op = VFIO_EEH_PE_UNFREEZE_DMA;
208         break;
209     default:
210         return RTAS_OUT_PARAM_ERROR;
211     }
212 
213     ret = vfio_eeh_as_op(&sphb->iommu_as, op);
214     if (ret < 0) {
215         return RTAS_OUT_HW_ERROR;
216     }
217 
218     return RTAS_OUT_SUCCESS;
219 }
220 
221 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
222 {
223     int ret;
224 
225     ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
226     if (ret < 0) {
227         return RTAS_OUT_PARAM_ERROR;
228     }
229 
230     *state = ret;
231     return RTAS_OUT_SUCCESS;
232 }
233 
234 static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
235                                               PCIDevice *pdev,
236                                               void *opaque)
237 {
238     /* Check if the device is VFIO PCI device */
239     if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
240         return;
241     }
242 
243     /*
244      * The MSIx table will be cleaned out by reset. We need
245      * disable it so that it can be reenabled properly. Also,
246      * the cached MSIx table should be cleared as it's not
247      * reflecting the contents in hardware.
248      */
249     if (msix_enabled(pdev)) {
250         uint16_t flags;
251 
252         flags = pci_host_config_read_common(pdev,
253                                             pdev->msix_cap + PCI_MSIX_FLAGS,
254                                             pci_config_size(pdev), 2);
255         flags &= ~PCI_MSIX_FLAGS_ENABLE;
256         pci_host_config_write_common(pdev,
257                                      pdev->msix_cap + PCI_MSIX_FLAGS,
258                                      pci_config_size(pdev), flags, 2);
259     }
260 
261     msix_reset(pdev);
262 }
263 
264 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
265 {
266        pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
267                                      NULL);
268 }
269 
270 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
271 {
272        PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
273 
274        pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
275 }
276 
277 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
278 {
279     uint32_t op;
280     int ret;
281 
282     switch (option) {
283     case RTAS_SLOT_RESET_DEACTIVATE:
284         op = VFIO_EEH_PE_RESET_DEACTIVATE;
285         break;
286     case RTAS_SLOT_RESET_HOT:
287         spapr_phb_vfio_eeh_pre_reset(sphb);
288         op = VFIO_EEH_PE_RESET_HOT;
289         break;
290     case RTAS_SLOT_RESET_FUNDAMENTAL:
291         spapr_phb_vfio_eeh_pre_reset(sphb);
292         op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
293         break;
294     default:
295         return RTAS_OUT_PARAM_ERROR;
296     }
297 
298     ret = vfio_eeh_as_op(&sphb->iommu_as, op);
299     if (ret < 0) {
300         return RTAS_OUT_HW_ERROR;
301     }
302 
303     return RTAS_OUT_SUCCESS;
304 }
305 
306 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
307 {
308     int ret;
309 
310     ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
311     if (ret < 0) {
312         return RTAS_OUT_PARAM_ERROR;
313     }
314 
315     return RTAS_OUT_SUCCESS;
316 }
317