1 /* 2 * QEMU sPAPR PCI host for VFIO 3 * 4 * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, 9 * or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <sys/ioctl.h> 22 #include <linux/vfio.h> 23 #include "hw/ppc/spapr.h" 24 #include "hw/pci-host/spapr.h" 25 #include "hw/pci/msix.h" 26 #include "hw/pci/pci_device.h" 27 #include "hw/vfio/vfio-common.h" 28 #include "hw/vfio/vfio-container.h" 29 #include "qemu/error-report.h" 30 #include CONFIG_DEVICES /* CONFIG_VFIO_PCI */ 31 32 /* 33 * Interfaces for IBM EEH (Enhanced Error Handling) 34 */ 35 #ifdef CONFIG_VFIO_PCI 36 static bool vfio_eeh_container_ok(VFIOContainer *container) 37 { 38 /* 39 * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO 40 * implementation is broken if there are multiple groups in a 41 * container. The hardware works in units of Partitionable 42 * Endpoints (== IOMMU groups) and the EEH operations naively 43 * iterate across all groups in the container, without any logic 44 * to make sure the groups have their state synchronized. For 45 * certain operations (ENABLE) that might be ok, until an error 46 * occurs, but for others (GET_STATE) it's clearly broken. 47 */ 48 49 /* 50 * XXX Once fixed kernels exist, test for them here 51 */ 52 53 if (QLIST_EMPTY(&container->group_list)) { 54 return false; 55 } 56 57 if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { 58 return false; 59 } 60 61 return true; 62 } 63 64 static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) 65 { 66 struct vfio_eeh_pe_op pe_op = { 67 .argsz = sizeof(pe_op), 68 .op = op, 69 }; 70 int ret; 71 72 if (!vfio_eeh_container_ok(container)) { 73 error_report("vfio/eeh: EEH_PE_OP 0x%x: " 74 "kernel requires a container with exactly one group", op); 75 return -EPERM; 76 } 77 78 ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); 79 if (ret < 0) { 80 error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); 81 return -errno; 82 } 83 84 return ret; 85 } 86 87 static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) 88 { 89 VFIOAddressSpace *space = vfio_get_address_space(as); 90 VFIOContainerBase *bcontainer = NULL; 91 92 if (QLIST_EMPTY(&space->containers)) { 93 /* No containers to act on */ 94 goto out; 95 } 96 97 bcontainer = QLIST_FIRST(&space->containers); 98 99 if (QLIST_NEXT(bcontainer, next)) { 100 /* 101 * We don't yet have logic to synchronize EEH state across 102 * multiple containers 103 */ 104 bcontainer = NULL; 105 goto out; 106 } 107 108 out: 109 vfio_put_address_space(space); 110 return container_of(bcontainer, VFIOContainer, bcontainer); 111 } 112 113 static bool vfio_eeh_as_ok(AddressSpace *as) 114 { 115 VFIOContainer *container = vfio_eeh_as_container(as); 116 117 return (container != NULL) && vfio_eeh_container_ok(container); 118 } 119 120 static int vfio_eeh_as_op(AddressSpace *as, uint32_t op) 121 { 122 VFIOContainer *container = vfio_eeh_as_container(as); 123 124 if (!container) { 125 return -ENODEV; 126 } 127 return vfio_eeh_container_op(container, op); 128 } 129 130 bool spapr_phb_eeh_available(SpaprPhbState *sphb) 131 { 132 return vfio_eeh_as_ok(&sphb->iommu_as); 133 } 134 135 static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb) 136 { 137 vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE); 138 } 139 140 void spapr_phb_vfio_reset(DeviceState *qdev) 141 { 142 /* 143 * The PE might be in frozen state. To reenable the EEH 144 * functionality on it will clean the frozen state, which 145 * ensures that the contained PCI devices will work properly 146 * after reboot. 147 */ 148 spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); 149 } 150 151 static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, 152 void *opaque) 153 { 154 bool *found = opaque; 155 156 if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { 157 *found = true; 158 } 159 } 160 161 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, 162 unsigned int addr, int option) 163 { 164 uint32_t op; 165 int ret; 166 167 switch (option) { 168 case RTAS_EEH_DISABLE: 169 op = VFIO_EEH_PE_DISABLE; 170 break; 171 case RTAS_EEH_ENABLE: { 172 PCIHostState *phb; 173 bool found = false; 174 175 /* 176 * The EEH functionality is enabled per sphb level instead of 177 * per PCI device. We have already identified this specific sphb 178 * based on buid passed as argument to ibm,set-eeh-option rtas 179 * call. Now we just need to check the validity of the PCI 180 * pass-through devices (vfio-pci) under this sphb bus. 181 * We have already validated that all the devices under this sphb 182 * are from same iommu group (within same PE) before coming here. 183 * 184 * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: 185 * Rework device EEH PE determination") kernel would call 186 * eeh-set-option for each device in the PE using the device's 187 * config_address as the argument rather than the PE address. 188 * Hence if we check validity of supplied config_addr whether 189 * it matches to this PHB will cause issues with older kernel 190 * versions v5.9 and older. If we return an error from 191 * eeh-set-option when the argument isn't a valid PE address 192 * then older kernels (v5.9 and older) will interpret that as 193 * EEH not being supported. 194 */ 195 phb = PCI_HOST_BRIDGE(sphb); 196 pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, 197 spapr_eeh_pci_find_device, &found); 198 199 if (!found) { 200 return RTAS_OUT_PARAM_ERROR; 201 } 202 203 op = VFIO_EEH_PE_ENABLE; 204 break; 205 } 206 case RTAS_EEH_THAW_IO: 207 op = VFIO_EEH_PE_UNFREEZE_IO; 208 break; 209 case RTAS_EEH_THAW_DMA: 210 op = VFIO_EEH_PE_UNFREEZE_DMA; 211 break; 212 default: 213 return RTAS_OUT_PARAM_ERROR; 214 } 215 216 ret = vfio_eeh_as_op(&sphb->iommu_as, op); 217 if (ret < 0) { 218 return RTAS_OUT_HW_ERROR; 219 } 220 221 return RTAS_OUT_SUCCESS; 222 } 223 224 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) 225 { 226 int ret; 227 228 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE); 229 if (ret < 0) { 230 return RTAS_OUT_PARAM_ERROR; 231 } 232 233 *state = ret; 234 return RTAS_OUT_SUCCESS; 235 } 236 237 static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, 238 PCIDevice *pdev, 239 void *opaque) 240 { 241 /* Check if the device is VFIO PCI device */ 242 if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { 243 return; 244 } 245 246 /* 247 * The MSIx table will be cleaned out by reset. We need 248 * disable it so that it can be reenabled properly. Also, 249 * the cached MSIx table should be cleared as it's not 250 * reflecting the contents in hardware. 251 */ 252 if (msix_enabled(pdev)) { 253 uint16_t flags; 254 255 flags = pci_host_config_read_common(pdev, 256 pdev->msix_cap + PCI_MSIX_FLAGS, 257 pci_config_size(pdev), 2); 258 flags &= ~PCI_MSIX_FLAGS_ENABLE; 259 pci_host_config_write_common(pdev, 260 pdev->msix_cap + PCI_MSIX_FLAGS, 261 pci_config_size(pdev), flags, 2); 262 } 263 264 msix_reset(pdev); 265 } 266 267 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) 268 { 269 pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix, 270 NULL); 271 } 272 273 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb) 274 { 275 PCIHostState *phb = PCI_HOST_BRIDGE(sphb); 276 277 pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); 278 } 279 280 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) 281 { 282 uint32_t op; 283 int ret; 284 285 switch (option) { 286 case RTAS_SLOT_RESET_DEACTIVATE: 287 op = VFIO_EEH_PE_RESET_DEACTIVATE; 288 break; 289 case RTAS_SLOT_RESET_HOT: 290 spapr_phb_vfio_eeh_pre_reset(sphb); 291 op = VFIO_EEH_PE_RESET_HOT; 292 break; 293 case RTAS_SLOT_RESET_FUNDAMENTAL: 294 spapr_phb_vfio_eeh_pre_reset(sphb); 295 op = VFIO_EEH_PE_RESET_FUNDAMENTAL; 296 break; 297 default: 298 return RTAS_OUT_PARAM_ERROR; 299 } 300 301 ret = vfio_eeh_as_op(&sphb->iommu_as, op); 302 if (ret < 0) { 303 return RTAS_OUT_HW_ERROR; 304 } 305 306 return RTAS_OUT_SUCCESS; 307 } 308 309 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) 310 { 311 int ret; 312 313 ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE); 314 if (ret < 0) { 315 return RTAS_OUT_PARAM_ERROR; 316 } 317 318 return RTAS_OUT_SUCCESS; 319 } 320 321 #else 322 323 bool spapr_phb_eeh_available(SpaprPhbState *sphb) 324 { 325 return false; 326 } 327 328 void spapr_phb_vfio_reset(DeviceState *qdev) 329 { 330 } 331 332 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, 333 unsigned int addr, int option) 334 { 335 return RTAS_OUT_NOT_SUPPORTED; 336 } 337 338 int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) 339 { 340 return RTAS_OUT_NOT_SUPPORTED; 341 } 342 343 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) 344 { 345 return RTAS_OUT_NOT_SUPPORTED; 346 } 347 348 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) 349 { 350 return RTAS_OUT_NOT_SUPPORTED; 351 } 352 353 #endif /* CONFIG_VFIO_PCI */ 354