1 /* 2 * QEMU sPAPR PCI host for VFIO 3 * 4 * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, 9 * or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <sys/ioctl.h> 22 #include <linux/vfio.h> 23 #include "hw/ppc/spapr.h" 24 #include "hw/pci-host/spapr.h" 25 #include "hw/pci/msix.h" 26 #include "hw/pci/pci_device.h" 27 #include "hw/vfio/vfio-common.h" 28 #include "qemu/error-report.h" 29 30 /* 31 * Interfaces for IBM EEH (Enhanced Error Handling) 32 */ 33 static bool vfio_eeh_container_ok(VFIOContainer *container) 34 { 35 /* 36 * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO 37 * implementation is broken if there are multiple groups in a 38 * container. The hardware works in units of Partitionable 39 * Endpoints (== IOMMU groups) and the EEH operations naively 40 * iterate across all groups in the container, without any logic 41 * to make sure the groups have their state synchronized. For 42 * certain operations (ENABLE) that might be ok, until an error 43 * occurs, but for others (GET_STATE) it's clearly broken. 44 */ 45 46 /* 47 * XXX Once fixed kernels exist, test for them here 48 */ 49 50 if (QLIST_EMPTY(&container->group_list)) { 51 return false; 52 } 53 54 if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { 55 return false; 56 } 57 58 return true; 59 } 60 61 static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) 62 { 63 struct vfio_eeh_pe_op pe_op = { 64 .argsz = sizeof(pe_op), 65 .op = op, 66 }; 67 int ret; 68 69 if (!vfio_eeh_container_ok(container)) { 70 error_report("vfio/eeh: EEH_PE_OP 0x%x: " 71 "kernel requires a container with exactly one group", op); 72 return -EPERM; 73 } 74 75 ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); 76 if (ret < 0) { 77 error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); 78 return -errno; 79 } 80 81 return ret; 82 } 83 84 static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) 85 { 86 VFIOAddressSpace *space = vfio_get_address_space(as); 87 VFIOContainer *container = NULL; 88 89 if (QLIST_EMPTY(&space->containers)) { 90 /* No containers to act on */ 91 goto out; 92 } 93 94 container = QLIST_FIRST(&space->containers); 95 96 if (QLIST_NEXT(container, next)) { 97 /* 98 * We don't yet have logic to synchronize EEH state across 99 * multiple containers 100 */ 101 container = NULL; 102 goto out; 103 } 104 105 out: 106 vfio_put_address_space(space); 107 return container; 108 } 109 110 static bool vfio_eeh_as_ok(AddressSpace *as) 111 { 112 VFIOContainer *container = vfio_eeh_as_container(as); 113 114 return (container != NULL) && vfio_eeh_container_ok(container); 115 } 116 117 static int vfio_eeh_as_op(AddressSpace *as, uint32_t op) 118 { 119 VFIOContainer *container = vfio_eeh_as_container(as); 120 121 if (!container) { 122 return -ENODEV; 123 } 124 return vfio_eeh_container_op(container, op); 125 } 126 127 bool spapr_phb_eeh_available(SpaprPhbState *sphb) 128 { 129 return vfio_eeh_as_ok(&sphb->iommu_as); 130 } 131 132 static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb) 133 { 134 vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE); 135 } 136 137 void spapr_phb_vfio_reset(DeviceState *qdev) 138 { 139 /* 140 * The PE might be in frozen state. To reenable the EEH 141 * functionality on it will clean the frozen state, which 142 * ensures that the contained PCI devices will work properly 143 * after reboot. 144 */ 145 spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); 146 } 147 148 static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, 149 void *opaque) 150 { 151 bool *found = opaque; 152 153 if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { 154 *found = true; 155 } 156 } 157 158 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, 159 unsigned int addr, int option) 160 { 161 uint32_t op; 162 int ret; 163 164 switch (option) { 165 case RTAS_EEH_DISABLE: 166 op = VFIO_EEH_PE_DISABLE; 167 break; 168 case RTAS_EEH_ENABLE: { 169 PCIHostState *phb; 170 bool found = false; 171 172 /* 173 * The EEH functionality is enabled per sphb level instead of 174 * per PCI device. We have already identified this specific sphb 175 * based on buid passed as argument to ibm,set-eeh-option rtas 176 * call. Now we just need to check the validity of the PCI 177 * pass-through devices (vfio-pci) under this sphb bus. 178 * We have already validated that all the devices under this sphb 179 * are from same iommu group (within same PE) before coming here. 180 * 181 * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: 182 * Rework device EEH PE determination") kernel would call 183 * eeh-set-option for each device in the PE using the device's 184 * config_address as the argument rather than the PE address. 185 * Hence if we check validity of supplied config_addr whether 186 * it matches to this PHB will cause issues with older kernel 187 * versions v5.9 and older. If we return an error from 188 * eeh-set-option when the argument isn't a valid PE address 189 * then older kernels (v5.9 and older) will interpret that as 190 * EEH not being supported. 191 */ 192 phb = PCI_HOST_BRIDGE(sphb); 193 pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, 194 spapr_eeh_pci_find_device, &found); 195 196 if (!found) { 197 return RTAS_OUT_PARAM_ERROR; 198 } 199 200 op = VFIO_EEH_PE_ENABLE; 201 break; 202 } 203 case RTAS_EEH_THAW_IO: 204 op = VFIO_EEH_PE_UNFREEZE_IO; 205 break; 206 case RTAS_EEH_THAW_DMA: 207 op = VFIO_EEH_PE_UNFREEZE_DMA; 208 break; 209 default: 210 return RTAS_OUT_PARAM_ERROR; 211 } 212 213 ret = vfio_eeh_as_op(&sphb->iommu_as, op); 214 if (ret < 0) { 215 return RTAS_OUT_HW_ERROR; 216 } 217 218 return RTAS_OUT_SUCCESS; 219 } 220 221 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) 222 { 223 int ret; 224 225 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE); 226 if (ret < 0) { 227 return RTAS_OUT_PARAM_ERROR; 228 } 229 230 *state = ret; 231 return RTAS_OUT_SUCCESS; 232 } 233 234 static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, 235 PCIDevice *pdev, 236 void *opaque) 237 { 238 /* Check if the device is VFIO PCI device */ 239 if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { 240 return; 241 } 242 243 /* 244 * The MSIx table will be cleaned out by reset. We need 245 * disable it so that it can be reenabled properly. Also, 246 * the cached MSIx table should be cleared as it's not 247 * reflecting the contents in hardware. 248 */ 249 if (msix_enabled(pdev)) { 250 uint16_t flags; 251 252 flags = pci_host_config_read_common(pdev, 253 pdev->msix_cap + PCI_MSIX_FLAGS, 254 pci_config_size(pdev), 2); 255 flags &= ~PCI_MSIX_FLAGS_ENABLE; 256 pci_host_config_write_common(pdev, 257 pdev->msix_cap + PCI_MSIX_FLAGS, 258 pci_config_size(pdev), flags, 2); 259 } 260 261 msix_reset(pdev); 262 } 263 264 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) 265 { 266 pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix, 267 NULL); 268 } 269 270 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb) 271 { 272 PCIHostState *phb = PCI_HOST_BRIDGE(sphb); 273 274 pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); 275 } 276 277 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) 278 { 279 uint32_t op; 280 int ret; 281 282 switch (option) { 283 case RTAS_SLOT_RESET_DEACTIVATE: 284 op = VFIO_EEH_PE_RESET_DEACTIVATE; 285 break; 286 case RTAS_SLOT_RESET_HOT: 287 spapr_phb_vfio_eeh_pre_reset(sphb); 288 op = VFIO_EEH_PE_RESET_HOT; 289 break; 290 case RTAS_SLOT_RESET_FUNDAMENTAL: 291 spapr_phb_vfio_eeh_pre_reset(sphb); 292 op = VFIO_EEH_PE_RESET_FUNDAMENTAL; 293 break; 294 default: 295 return RTAS_OUT_PARAM_ERROR; 296 } 297 298 ret = vfio_eeh_as_op(&sphb->iommu_as, op); 299 if (ret < 0) { 300 return RTAS_OUT_HW_ERROR; 301 } 302 303 return RTAS_OUT_SUCCESS; 304 } 305 306 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) 307 { 308 int ret; 309 310 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE); 311 if (ret < 0) { 312 return RTAS_OUT_PARAM_ERROR; 313 } 314 315 return RTAS_OUT_SUCCESS; 316 } 317