1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2012 4 * 5 * Author(s): 6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 */ 8 9 #define KMSG_COMPONENT "zpci" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/pci.h> 14 #include <asm/pci_debug.h> 15 #include <asm/pci_dma.h> 16 #include <asm/sclp.h> 17 18 #include "pci_bus.h" 19 20 /* Content Code Description for PCI Function Error */ 21 struct zpci_ccdf_err { 22 u32 reserved1; 23 u32 fh; /* function handle */ 24 u32 fid; /* function id */ 25 u32 ett : 4; /* expected table type */ 26 u32 mvn : 12; /* MSI vector number */ 27 u32 dmaas : 8; /* DMA address space */ 28 u32 : 6; 29 u32 q : 1; /* event qualifier */ 30 u32 rw : 1; /* read/write */ 31 u64 faddr; /* failing address */ 32 u32 reserved3; 33 u16 reserved4; 34 u16 pec; /* PCI event code */ 35 } __packed; 36 37 /* Content Code Description for PCI Function Availability */ 38 struct zpci_ccdf_avail { 39 u32 reserved1; 40 u32 fh; /* function handle */ 41 u32 fid; /* function id */ 42 u32 reserved2; 43 u32 reserved3; 44 u32 reserved4; 45 u32 reserved5; 46 u16 reserved6; 47 u16 pec; /* PCI event code */ 48 } __packed; 49 50 static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) 51 { 52 switch (ers_res) { 53 case PCI_ERS_RESULT_CAN_RECOVER: 54 case PCI_ERS_RESULT_RECOVERED: 55 case PCI_ERS_RESULT_NEED_RESET: 56 return false; 57 default: 58 return true; 59 } 60 } 61 62 static bool is_passed_through(struct zpci_dev *zdev) 63 { 64 return zdev->s390_domain; 65 } 66 67 static bool is_driver_supported(struct pci_driver *driver) 68 { 69 if (!driver || !driver->err_handler) 70 return false; 71 if (!driver->err_handler->error_detected) 72 return false; 73 if (!driver->err_handler->slot_reset) 74 return false; 75 if (!driver->err_handler->resume) 76 return false; 77 return true; 78 } 79 80 static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, 81 struct pci_driver *driver) 82 { 83 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 84 85 ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); 86 if (ers_result_indicates_abort(ers_res)) 87 pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); 88 else if (ers_res == PCI_ERS_RESULT_NEED_RESET) 89 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); 90 91 return ers_res; 92 } 93 94 static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, 95 struct pci_driver *driver) 96 { 97 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 98 struct zpci_dev *zdev = to_zpci(pdev); 99 int rc; 100 101 pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); 102 rc = zpci_reset_load_store_blocked(zdev); 103 if (rc) { 104 pr_err("%s: Unblocking device access failed\n", pci_name(pdev)); 105 /* Let's try a full reset instead */ 106 return PCI_ERS_RESULT_NEED_RESET; 107 } 108 109 if (driver->err_handler->mmio_enabled) { 110 ers_res = driver->err_handler->mmio_enabled(pdev); 111 if (ers_result_indicates_abort(ers_res)) { 112 pr_info("%s: Automatic recovery failed after MMIO re-enable\n", 113 pci_name(pdev)); 114 return ers_res; 115 } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { 116 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); 117 return ers_res; 118 } 119 } 120 121 pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); 122 rc = zpci_clear_error_state(zdev); 123 if (!rc) { 124 pdev->error_state = pci_channel_io_normal; 125 } else { 126 pr_err("%s: Unblocking DMA failed\n", pci_name(pdev)); 127 /* Let's try a full reset instead */ 128 return PCI_ERS_RESULT_NEED_RESET; 129 } 130 131 return ers_res; 132 } 133 134 static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, 135 struct pci_driver *driver) 136 { 137 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 138 139 pr_info("%s: Initiating reset\n", pci_name(pdev)); 140 if (zpci_hot_reset_device(to_zpci(pdev))) { 141 pr_err("%s: The reset request failed\n", pci_name(pdev)); 142 return ers_res; 143 } 144 pdev->error_state = pci_channel_io_normal; 145 ers_res = driver->err_handler->slot_reset(pdev); 146 if (ers_result_indicates_abort(ers_res)) { 147 pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); 148 return ers_res; 149 } 150 151 return ers_res; 152 } 153 154 /* zpci_event_attempt_error_recovery - Try to recover the given PCI function 155 * @pdev: PCI function to recover currently in the error state 156 * 157 * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst. 158 * With the simplification that recovery always happens per function 159 * and the platform determines which functions are affected for 160 * multi-function devices. 161 */ 162 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) 163 { 164 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 165 struct pci_driver *driver; 166 167 /* 168 * Ensure that the PCI function is not removed concurrently, no driver 169 * is unbound or probed and that userspace can't access its 170 * configuration space while we perform recovery. 171 */ 172 pci_dev_lock(pdev); 173 if (pdev->error_state == pci_channel_io_perm_failure) { 174 ers_res = PCI_ERS_RESULT_DISCONNECT; 175 goto out_unlock; 176 } 177 pdev->error_state = pci_channel_io_frozen; 178 179 if (is_passed_through(to_zpci(pdev))) { 180 pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", 181 pci_name(pdev)); 182 goto out_unlock; 183 } 184 185 driver = to_pci_driver(pdev->dev.driver); 186 if (!is_driver_supported(driver)) { 187 if (!driver) 188 pr_info("%s: Cannot be recovered because no driver is bound to the device\n", 189 pci_name(pdev)); 190 else 191 pr_info("%s: The %s driver bound to the device does not support error recovery\n", 192 pci_name(pdev), 193 driver->name); 194 goto out_unlock; 195 } 196 197 ers_res = zpci_event_notify_error_detected(pdev, driver); 198 if (ers_result_indicates_abort(ers_res)) 199 goto out_unlock; 200 201 if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { 202 ers_res = zpci_event_do_error_state_clear(pdev, driver); 203 if (ers_result_indicates_abort(ers_res)) 204 goto out_unlock; 205 } 206 207 if (ers_res == PCI_ERS_RESULT_NEED_RESET) 208 ers_res = zpci_event_do_reset(pdev, driver); 209 210 if (ers_res != PCI_ERS_RESULT_RECOVERED) { 211 pr_err("%s: Automatic recovery failed; operator intervention is required\n", 212 pci_name(pdev)); 213 goto out_unlock; 214 } 215 216 pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); 217 if (driver->err_handler->resume) 218 driver->err_handler->resume(pdev); 219 out_unlock: 220 pci_dev_unlock(pdev); 221 222 return ers_res; 223 } 224 225 /* zpci_event_io_failure - Report PCI channel failure state to driver 226 * @pdev: PCI function for which to report 227 * @es: PCI channel failure state to report 228 */ 229 static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) 230 { 231 struct pci_driver *driver; 232 233 pci_dev_lock(pdev); 234 pdev->error_state = es; 235 /** 236 * While vfio-pci's error_detected callback notifies user-space QEMU 237 * reacts to this by freezing the guest. In an s390 environment PCI 238 * errors are rarely fatal so this is overkill. Instead in the future 239 * we will inject the error event and let the guest recover the device 240 * itself. 241 */ 242 if (is_passed_through(to_zpci(pdev))) 243 goto out; 244 driver = to_pci_driver(pdev->dev.driver); 245 if (driver && driver->err_handler && driver->err_handler->error_detected) 246 driver->err_handler->error_detected(pdev, pdev->error_state); 247 out: 248 pci_dev_unlock(pdev); 249 } 250 251 static void __zpci_event_error(struct zpci_ccdf_err *ccdf) 252 { 253 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); 254 struct pci_dev *pdev = NULL; 255 pci_ers_result_t ers_res; 256 257 zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", 258 ccdf->fid, ccdf->fh, ccdf->pec); 259 zpci_err("error CCDF:\n"); 260 zpci_err_hex(ccdf, sizeof(*ccdf)); 261 262 if (zdev) { 263 zpci_update_fh(zdev, ccdf->fh); 264 if (zdev->zbus->bus) 265 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); 266 } 267 268 pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", 269 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); 270 271 if (!pdev) 272 goto no_pdev; 273 274 switch (ccdf->pec) { 275 case 0x002a: /* Error event concerns FMB */ 276 case 0x002b: 277 case 0x002c: 278 break; 279 case 0x0040: /* Service Action or Error Recovery Failed */ 280 case 0x003b: 281 zpci_event_io_failure(pdev, pci_channel_io_perm_failure); 282 break; 283 default: /* PCI function left in the error state attempt to recover */ 284 ers_res = zpci_event_attempt_error_recovery(pdev); 285 if (ers_res != PCI_ERS_RESULT_RECOVERED) 286 zpci_event_io_failure(pdev, pci_channel_io_perm_failure); 287 break; 288 } 289 pci_dev_put(pdev); 290 no_pdev: 291 zpci_zdev_put(zdev); 292 } 293 294 void zpci_event_error(void *data) 295 { 296 if (zpci_is_enabled()) 297 __zpci_event_error(data); 298 } 299 300 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) 301 { 302 zpci_update_fh(zdev, fh); 303 /* Give the driver a hint that the function is 304 * already unusable. 305 */ 306 zpci_bus_remove_device(zdev, true); 307 /* Even though the device is already gone we still 308 * need to free zPCI resources as part of the disable. 309 */ 310 if (zdev->dma_table) 311 zpci_dma_exit_device(zdev); 312 if (zdev_enabled(zdev)) 313 zpci_disable_device(zdev); 314 zdev->state = ZPCI_FN_STATE_STANDBY; 315 } 316 317 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) 318 { 319 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); 320 bool existing_zdev = !!zdev; 321 enum zpci_state state; 322 323 zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", 324 ccdf->fid, ccdf->fh, ccdf->pec); 325 switch (ccdf->pec) { 326 case 0x0301: /* Reserved|Standby -> Configured */ 327 if (!zdev) { 328 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); 329 if (IS_ERR(zdev)) 330 break; 331 } else { 332 /* the configuration request may be stale */ 333 if (zdev->state != ZPCI_FN_STATE_STANDBY) 334 break; 335 zdev->state = ZPCI_FN_STATE_CONFIGURED; 336 } 337 zpci_scan_configured_device(zdev, ccdf->fh); 338 break; 339 case 0x0302: /* Reserved -> Standby */ 340 if (!zdev) 341 zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); 342 else 343 zpci_update_fh(zdev, ccdf->fh); 344 break; 345 case 0x0303: /* Deconfiguration requested */ 346 if (zdev) { 347 /* The event may have been queued before we confirgured 348 * the device. 349 */ 350 if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 351 break; 352 zpci_update_fh(zdev, ccdf->fh); 353 zpci_deconfigure_device(zdev); 354 } 355 break; 356 case 0x0304: /* Configured -> Standby|Reserved */ 357 if (zdev) { 358 /* The event may have been queued before we confirgured 359 * the device.: 360 */ 361 if (zdev->state == ZPCI_FN_STATE_CONFIGURED) 362 zpci_event_hard_deconfigured(zdev, ccdf->fh); 363 /* The 0x0304 event may immediately reserve the device */ 364 if (!clp_get_state(zdev->fid, &state) && 365 state == ZPCI_FN_STATE_RESERVED) { 366 zpci_device_reserved(zdev); 367 } 368 } 369 break; 370 case 0x0306: /* 0x308 or 0x302 for multiple devices */ 371 zpci_remove_reserved_devices(); 372 clp_scan_pci_devices(); 373 break; 374 case 0x0308: /* Standby -> Reserved */ 375 if (!zdev) 376 break; 377 zpci_device_reserved(zdev); 378 break; 379 default: 380 break; 381 } 382 if (existing_zdev) 383 zpci_zdev_put(zdev); 384 } 385 386 void zpci_event_availability(void *data) 387 { 388 if (zpci_is_enabled()) 389 __zpci_event_availability(data); 390 } 391