1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2012 4 * 5 * Author(s): 6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 */ 8 9 #define KMSG_COMPONENT "zpci" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/pci.h> 14 #include <asm/pci_debug.h> 15 #include <asm/pci_dma.h> 16 #include <asm/sclp.h> 17 18 #include "pci_bus.h" 19 20 /* Content Code Description for PCI Function Error */ 21 struct zpci_ccdf_err { 22 u32 reserved1; 23 u32 fh; /* function handle */ 24 u32 fid; /* function id */ 25 u32 ett : 4; /* expected table type */ 26 u32 mvn : 12; /* MSI vector number */ 27 u32 dmaas : 8; /* DMA address space */ 28 u32 : 6; 29 u32 q : 1; /* event qualifier */ 30 u32 rw : 1; /* read/write */ 31 u64 faddr; /* failing address */ 32 u32 reserved3; 33 u16 reserved4; 34 u16 pec; /* PCI event code */ 35 } __packed; 36 37 /* Content Code Description for PCI Function Availability */ 38 struct zpci_ccdf_avail { 39 u32 reserved1; 40 u32 fh; /* function handle */ 41 u32 fid; /* function id */ 42 u32 reserved2; 43 u32 reserved3; 44 u32 reserved4; 45 u32 reserved5; 46 u16 reserved6; 47 u16 pec; /* PCI event code */ 48 } __packed; 49 50 static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) 51 { 52 switch (ers_res) { 53 case PCI_ERS_RESULT_CAN_RECOVER: 54 case PCI_ERS_RESULT_RECOVERED: 55 case PCI_ERS_RESULT_NEED_RESET: 56 return false; 57 default: 58 return true; 59 } 60 } 61 62 static bool is_passed_through(struct zpci_dev *zdev) 63 { 64 return zdev->s390_domain; 65 } 66 67 static bool is_driver_supported(struct pci_driver *driver) 68 { 69 if (!driver || !driver->err_handler) 70 return false; 71 if (!driver->err_handler->error_detected) 72 return false; 73 if (!driver->err_handler->slot_reset) 74 return false; 75 if (!driver->err_handler->resume) 76 return false; 77 return true; 78 } 79 80 static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, 81 struct pci_driver *driver) 82 { 83 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 84 85 ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); 86 if (ers_result_indicates_abort(ers_res)) 87 pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); 88 else if (ers_res == PCI_ERS_RESULT_NEED_RESET) 89 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); 90 91 return ers_res; 92 } 93 94 static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, 95 struct pci_driver *driver) 96 { 97 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 98 struct zpci_dev *zdev = to_zpci(pdev); 99 int rc; 100 101 pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); 102 rc = zpci_reset_load_store_blocked(zdev); 103 if (rc) { 104 pr_err("%s: Unblocking device access failed\n", pci_name(pdev)); 105 /* Let's try a full reset instead */ 106 return PCI_ERS_RESULT_NEED_RESET; 107 } 108 109 if (driver->err_handler->mmio_enabled) { 110 ers_res = driver->err_handler->mmio_enabled(pdev); 111 if (ers_result_indicates_abort(ers_res)) { 112 pr_info("%s: Automatic recovery failed after MMIO re-enable\n", 113 pci_name(pdev)); 114 return ers_res; 115 } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { 116 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); 117 return ers_res; 118 } 119 } 120 121 pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); 122 rc = zpci_clear_error_state(zdev); 123 if (!rc) { 124 pdev->error_state = pci_channel_io_normal; 125 } else { 126 pr_err("%s: Unblocking DMA failed\n", pci_name(pdev)); 127 /* Let's try a full reset instead */ 128 return PCI_ERS_RESULT_NEED_RESET; 129 } 130 131 return ers_res; 132 } 133 134 static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, 135 struct pci_driver *driver) 136 { 137 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 138 139 pr_info("%s: Initiating reset\n", pci_name(pdev)); 140 if (zpci_hot_reset_device(to_zpci(pdev))) { 141 pr_err("%s: The reset request failed\n", pci_name(pdev)); 142 return ers_res; 143 } 144 pdev->error_state = pci_channel_io_normal; 145 ers_res = driver->err_handler->slot_reset(pdev); 146 if (ers_result_indicates_abort(ers_res)) { 147 pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); 148 return ers_res; 149 } 150 151 return ers_res; 152 } 153 154 /* zpci_event_attempt_error_recovery - Try to recover the given PCI function 155 * @pdev: PCI function to recover currently in the error state 156 * 157 * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst. 158 * With the simplification that recovery always happens per function 159 * and the platform determines which functions are affected for 160 * multi-function devices. 161 */ 162 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) 163 { 164 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 165 struct pci_driver *driver; 166 167 /* 168 * Ensure that the PCI function is not removed concurrently, no driver 169 * is unbound or probed and that userspace can't access its 170 * configuration space while we perform recovery. 171 */ 172 pci_dev_lock(pdev); 173 if (pdev->error_state == pci_channel_io_perm_failure) { 174 ers_res = PCI_ERS_RESULT_DISCONNECT; 175 goto out_unlock; 176 } 177 pdev->error_state = pci_channel_io_frozen; 178 179 if (is_passed_through(to_zpci(pdev))) { 180 pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", 181 pci_name(pdev)); 182 goto out_unlock; 183 } 184 185 driver = to_pci_driver(pdev->dev.driver); 186 if (!is_driver_supported(driver)) { 187 if (!driver) 188 pr_info("%s: Cannot be recovered because no driver is bound to the device\n", 189 pci_name(pdev)); 190 else 191 pr_info("%s: The %s driver bound to the device does not support error recovery\n", 192 pci_name(pdev), 193 driver->name); 194 goto out_unlock; 195 } 196 197 ers_res = zpci_event_notify_error_detected(pdev, driver); 198 if (ers_result_indicates_abort(ers_res)) 199 goto out_unlock; 200 201 if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { 202 ers_res = zpci_event_do_error_state_clear(pdev, driver); 203 if (ers_result_indicates_abort(ers_res)) 204 goto out_unlock; 205 } 206 207 if (ers_res == PCI_ERS_RESULT_NEED_RESET) 208 ers_res = zpci_event_do_reset(pdev, driver); 209 210 if (ers_res != PCI_ERS_RESULT_RECOVERED) { 211 pr_err("%s: Automatic recovery failed; operator intervention is required\n", 212 pci_name(pdev)); 213 goto out_unlock; 214 } 215 216 pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); 217 if (driver->err_handler->resume) 218 driver->err_handler->resume(pdev); 219 out_unlock: 220 pci_dev_unlock(pdev); 221 222 return ers_res; 223 } 224 225 /* zpci_event_io_failure - Report PCI channel failure state to driver 226 * @pdev: PCI function for which to report 227 * @es: PCI channel failure state to report 228 */ 229 static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) 230 { 231 struct pci_driver *driver; 232 233 pci_dev_lock(pdev); 234 pdev->error_state = es; 235 /** 236 * While vfio-pci's error_detected callback notifies user-space QEMU 237 * reacts to this by freezing the guest. In an s390 environment PCI 238 * errors are rarely fatal so this is overkill. Instead in the future 239 * we will inject the error event and let the guest recover the device 240 * itself. 241 */ 242 if (is_passed_through(to_zpci(pdev))) 243 goto out; 244 driver = to_pci_driver(pdev->dev.driver); 245 if (driver && driver->err_handler && driver->err_handler->error_detected) 246 driver->err_handler->error_detected(pdev, pdev->error_state); 247 out: 248 pci_dev_unlock(pdev); 249 } 250 251 static void __zpci_event_error(struct zpci_ccdf_err *ccdf) 252 { 253 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); 254 struct pci_dev *pdev = NULL; 255 pci_ers_result_t ers_res; 256 257 zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", 258 ccdf->fid, ccdf->fh, ccdf->pec); 259 zpci_err("error CCDF:\n"); 260 zpci_err_hex(ccdf, sizeof(*ccdf)); 261 262 if (zdev) { 263 zpci_update_fh(zdev, ccdf->fh); 264 if (zdev->zbus->bus) 265 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); 266 } 267 268 pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", 269 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); 270 271 if (!pdev) 272 goto no_pdev; 273 274 switch (ccdf->pec) { 275 case 0x003a: /* Service Action or Error Recovery Successful */ 276 ers_res = zpci_event_attempt_error_recovery(pdev); 277 if (ers_res != PCI_ERS_RESULT_RECOVERED) 278 zpci_event_io_failure(pdev, pci_channel_io_perm_failure); 279 break; 280 default: 281 /* 282 * Mark as frozen not permanently failed because the device 283 * could be subsequently recovered by the platform. 284 */ 285 zpci_event_io_failure(pdev, pci_channel_io_frozen); 286 break; 287 } 288 pci_dev_put(pdev); 289 no_pdev: 290 zpci_zdev_put(zdev); 291 } 292 293 void zpci_event_error(void *data) 294 { 295 if (zpci_is_enabled()) 296 __zpci_event_error(data); 297 } 298 299 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) 300 { 301 zpci_update_fh(zdev, fh); 302 /* Give the driver a hint that the function is 303 * already unusable. 304 */ 305 zpci_bus_remove_device(zdev, true); 306 /* Even though the device is already gone we still 307 * need to free zPCI resources as part of the disable. 308 */ 309 if (zdev->dma_table) 310 zpci_dma_exit_device(zdev); 311 if (zdev_enabled(zdev)) 312 zpci_disable_device(zdev); 313 zdev->state = ZPCI_FN_STATE_STANDBY; 314 } 315 316 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) 317 { 318 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); 319 bool existing_zdev = !!zdev; 320 enum zpci_state state; 321 322 zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", 323 ccdf->fid, ccdf->fh, ccdf->pec); 324 zpci_err("avail CCDF:\n"); 325 zpci_err_hex(ccdf, sizeof(*ccdf)); 326 327 switch (ccdf->pec) { 328 case 0x0301: /* Reserved|Standby -> Configured */ 329 if (!zdev) { 330 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); 331 if (IS_ERR(zdev)) 332 break; 333 } else { 334 /* the configuration request may be stale */ 335 if (zdev->state != ZPCI_FN_STATE_STANDBY) 336 break; 337 zdev->state = ZPCI_FN_STATE_CONFIGURED; 338 } 339 zpci_scan_configured_device(zdev, ccdf->fh); 340 break; 341 case 0x0302: /* Reserved -> Standby */ 342 if (!zdev) 343 zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); 344 else 345 zpci_update_fh(zdev, ccdf->fh); 346 break; 347 case 0x0303: /* Deconfiguration requested */ 348 if (zdev) { 349 /* The event may have been queued before we confirgured 350 * the device. 351 */ 352 if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 353 break; 354 zpci_update_fh(zdev, ccdf->fh); 355 zpci_deconfigure_device(zdev); 356 } 357 break; 358 case 0x0304: /* Configured -> Standby|Reserved */ 359 if (zdev) { 360 /* The event may have been queued before we confirgured 361 * the device.: 362 */ 363 if (zdev->state == ZPCI_FN_STATE_CONFIGURED) 364 zpci_event_hard_deconfigured(zdev, ccdf->fh); 365 /* The 0x0304 event may immediately reserve the device */ 366 if (!clp_get_state(zdev->fid, &state) && 367 state == ZPCI_FN_STATE_RESERVED) { 368 zpci_device_reserved(zdev); 369 } 370 } 371 break; 372 case 0x0306: /* 0x308 or 0x302 for multiple devices */ 373 zpci_remove_reserved_devices(); 374 clp_scan_pci_devices(); 375 break; 376 case 0x0308: /* Standby -> Reserved */ 377 if (!zdev) 378 break; 379 zpci_device_reserved(zdev); 380 break; 381 default: 382 break; 383 } 384 if (existing_zdev) 385 zpci_zdev_put(zdev); 386 } 387 388 void zpci_event_availability(void *data) 389 { 390 if (zpci_is_enabled()) 391 __zpci_event_availability(data); 392 } 393