1 /* 2 * QEMU sPAPR PCI host for VFIO 3 * 4 * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, 9 * or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <sys/ioctl.h> 22 #include <linux/vfio.h> 23 #include "hw/ppc/spapr.h" 24 #include "hw/pci-host/spapr.h" 25 #include "hw/pci/msix.h" 26 #include "hw/pci/pci_device.h" 27 #include "hw/vfio/vfio-common.h" 28 #include "qemu/error-report.h" 29 #include CONFIG_DEVICES /* CONFIG_VFIO_PCI */ 30 31 /* 32 * Interfaces for IBM EEH (Enhanced Error Handling) 33 */ 34 #ifdef CONFIG_VFIO_PCI 35 static bool vfio_eeh_container_ok(VFIOContainer *container) 36 { 37 /* 38 * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO 39 * implementation is broken if there are multiple groups in a 40 * container. The hardware works in units of Partitionable 41 * Endpoints (== IOMMU groups) and the EEH operations naively 42 * iterate across all groups in the container, without any logic 43 * to make sure the groups have their state synchronized. For 44 * certain operations (ENABLE) that might be ok, until an error 45 * occurs, but for others (GET_STATE) it's clearly broken. 46 */ 47 48 /* 49 * XXX Once fixed kernels exist, test for them here 50 */ 51 52 if (QLIST_EMPTY(&container->group_list)) { 53 return false; 54 } 55 56 if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { 57 return false; 58 } 59 60 return true; 61 } 62 63 static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) 64 { 65 struct vfio_eeh_pe_op pe_op = { 66 .argsz = sizeof(pe_op), 67 .op = op, 68 }; 69 int ret; 70 71 if (!vfio_eeh_container_ok(container)) { 72 error_report("vfio/eeh: EEH_PE_OP 0x%x: " 73 "kernel requires a container with exactly one group", op); 74 return -EPERM; 75 } 76 77 ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); 78 if (ret < 0) { 79 error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); 80 return -errno; 81 } 82 83 return ret; 84 } 85 86 static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) 87 { 88 VFIOAddressSpace *space = vfio_get_address_space(as); 89 VFIOContainerBase *bcontainer = NULL; 90 91 if (QLIST_EMPTY(&space->containers)) { 92 /* No containers to act on */ 93 goto out; 94 } 95 96 bcontainer = QLIST_FIRST(&space->containers); 97 98 if (QLIST_NEXT(bcontainer, next)) { 99 /* 100 * We don't yet have logic to synchronize EEH state across 101 * multiple containers 102 */ 103 bcontainer = NULL; 104 goto out; 105 } 106 107 out: 108 vfio_put_address_space(space); 109 return container_of(bcontainer, VFIOContainer, bcontainer); 110 } 111 112 static bool vfio_eeh_as_ok(AddressSpace *as) 113 { 114 VFIOContainer *container = vfio_eeh_as_container(as); 115 116 return (container != NULL) && vfio_eeh_container_ok(container); 117 } 118 119 static int vfio_eeh_as_op(AddressSpace *as, uint32_t op) 120 { 121 VFIOContainer *container = vfio_eeh_as_container(as); 122 123 if (!container) { 124 return -ENODEV; 125 } 126 return vfio_eeh_container_op(container, op); 127 } 128 129 bool spapr_phb_eeh_available(SpaprPhbState *sphb) 130 { 131 return vfio_eeh_as_ok(&sphb->iommu_as); 132 } 133 134 static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb) 135 { 136 vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE); 137 } 138 139 void spapr_phb_vfio_reset(DeviceState *qdev) 140 { 141 /* 142 * The PE might be in frozen state. To reenable the EEH 143 * functionality on it will clean the frozen state, which 144 * ensures that the contained PCI devices will work properly 145 * after reboot. 146 */ 147 spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); 148 } 149 150 static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, 151 void *opaque) 152 { 153 bool *found = opaque; 154 155 if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { 156 *found = true; 157 } 158 } 159 160 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, 161 unsigned int addr, int option) 162 { 163 uint32_t op; 164 int ret; 165 166 switch (option) { 167 case RTAS_EEH_DISABLE: 168 op = VFIO_EEH_PE_DISABLE; 169 break; 170 case RTAS_EEH_ENABLE: { 171 PCIHostState *phb; 172 bool found = false; 173 174 /* 175 * The EEH functionality is enabled per sphb level instead of 176 * per PCI device. We have already identified this specific sphb 177 * based on buid passed as argument to ibm,set-eeh-option rtas 178 * call. Now we just need to check the validity of the PCI 179 * pass-through devices (vfio-pci) under this sphb bus. 180 * We have already validated that all the devices under this sphb 181 * are from same iommu group (within same PE) before coming here. 182 * 183 * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: 184 * Rework device EEH PE determination") kernel would call 185 * eeh-set-option for each device in the PE using the device's 186 * config_address as the argument rather than the PE address. 187 * Hence if we check validity of supplied config_addr whether 188 * it matches to this PHB will cause issues with older kernel 189 * versions v5.9 and older. If we return an error from 190 * eeh-set-option when the argument isn't a valid PE address 191 * then older kernels (v5.9 and older) will interpret that as 192 * EEH not being supported. 193 */ 194 phb = PCI_HOST_BRIDGE(sphb); 195 pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, 196 spapr_eeh_pci_find_device, &found); 197 198 if (!found) { 199 return RTAS_OUT_PARAM_ERROR; 200 } 201 202 op = VFIO_EEH_PE_ENABLE; 203 break; 204 } 205 case RTAS_EEH_THAW_IO: 206 op = VFIO_EEH_PE_UNFREEZE_IO; 207 break; 208 case RTAS_EEH_THAW_DMA: 209 op = VFIO_EEH_PE_UNFREEZE_DMA; 210 break; 211 default: 212 return RTAS_OUT_PARAM_ERROR; 213 } 214 215 ret = vfio_eeh_as_op(&sphb->iommu_as, op); 216 if (ret < 0) { 217 return RTAS_OUT_HW_ERROR; 218 } 219 220 return RTAS_OUT_SUCCESS; 221 } 222 223 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) 224 { 225 int ret; 226 227 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE); 228 if (ret < 0) { 229 return RTAS_OUT_PARAM_ERROR; 230 } 231 232 *state = ret; 233 return RTAS_OUT_SUCCESS; 234 } 235 236 static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, 237 PCIDevice *pdev, 238 void *opaque) 239 { 240 /* Check if the device is VFIO PCI device */ 241 if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { 242 return; 243 } 244 245 /* 246 * The MSIx table will be cleaned out by reset. We need 247 * disable it so that it can be reenabled properly. Also, 248 * the cached MSIx table should be cleared as it's not 249 * reflecting the contents in hardware. 250 */ 251 if (msix_enabled(pdev)) { 252 uint16_t flags; 253 254 flags = pci_host_config_read_common(pdev, 255 pdev->msix_cap + PCI_MSIX_FLAGS, 256 pci_config_size(pdev), 2); 257 flags &= ~PCI_MSIX_FLAGS_ENABLE; 258 pci_host_config_write_common(pdev, 259 pdev->msix_cap + PCI_MSIX_FLAGS, 260 pci_config_size(pdev), flags, 2); 261 } 262 263 msix_reset(pdev); 264 } 265 266 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) 267 { 268 pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix, 269 NULL); 270 } 271 272 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb) 273 { 274 PCIHostState *phb = PCI_HOST_BRIDGE(sphb); 275 276 pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); 277 } 278 279 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) 280 { 281 uint32_t op; 282 int ret; 283 284 switch (option) { 285 case RTAS_SLOT_RESET_DEACTIVATE: 286 op = VFIO_EEH_PE_RESET_DEACTIVATE; 287 break; 288 case RTAS_SLOT_RESET_HOT: 289 spapr_phb_vfio_eeh_pre_reset(sphb); 290 op = VFIO_EEH_PE_RESET_HOT; 291 break; 292 case RTAS_SLOT_RESET_FUNDAMENTAL: 293 spapr_phb_vfio_eeh_pre_reset(sphb); 294 op = VFIO_EEH_PE_RESET_FUNDAMENTAL; 295 break; 296 default: 297 return RTAS_OUT_PARAM_ERROR; 298 } 299 300 ret = vfio_eeh_as_op(&sphb->iommu_as, op); 301 if (ret < 0) { 302 return RTAS_OUT_HW_ERROR; 303 } 304 305 return RTAS_OUT_SUCCESS; 306 } 307 308 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) 309 { 310 int ret; 311 312 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE); 313 if (ret < 0) { 314 return RTAS_OUT_PARAM_ERROR; 315 } 316 317 return RTAS_OUT_SUCCESS; 318 } 319 320 #else 321 322 bool spapr_phb_eeh_available(SpaprPhbState *sphb) 323 { 324 return false; 325 } 326 327 void spapr_phb_vfio_reset(DeviceState *qdev) 328 { 329 } 330 331 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, 332 unsigned int addr, int option) 333 { 334 return RTAS_OUT_NOT_SUPPORTED; 335 } 336 337 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) 338 { 339 return RTAS_OUT_NOT_SUPPORTED; 340 } 341 342 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) 343 { 344 return RTAS_OUT_NOT_SUPPORTED; 345 } 346 347 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) 348 { 349 return RTAS_OUT_NOT_SUPPORTED; 350 } 351 352 #endif /* CONFIG_VFIO_PCI */ 353