1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file implements the error recovery as a core part of PCIe error 4 * reporting. When a PCIe error is delivered, an error message will be 5 * collected and printed to console, then, an error recovery procedure 6 * will be executed by following the PCI error recovery rules. 7 * 8 * Copyright (C) 2006 Intel Corp. 9 * Tom Long Nguyen (tom.l.nguyen@intel.com) 10 * Zhang Yanmin (yanmin.zhang@intel.com) 11 */ 12 13 #include <linux/pci.h> 14 #include <linux/module.h> 15 #include <linux/kernel.h> 16 #include <linux/errno.h> 17 #include <linux/aer.h> 18 #include "portdrv.h" 19 #include "../pci.h" 20 21 static pci_ers_result_t merge_result(enum pci_ers_result orig, 22 enum pci_ers_result new) 23 { 24 if (new == PCI_ERS_RESULT_NO_AER_DRIVER) 25 return PCI_ERS_RESULT_NO_AER_DRIVER; 26 27 if (new == PCI_ERS_RESULT_NONE) 28 return orig; 29 30 switch (orig) { 31 case PCI_ERS_RESULT_CAN_RECOVER: 32 case PCI_ERS_RESULT_RECOVERED: 33 orig = new; 34 break; 35 case PCI_ERS_RESULT_DISCONNECT: 36 if (new == PCI_ERS_RESULT_NEED_RESET) 37 orig = PCI_ERS_RESULT_NEED_RESET; 38 break; 39 default: 40 break; 41 } 42 43 return orig; 44 } 45 46 static int report_error_detected(struct pci_dev *dev, 47 enum pci_channel_state state, 48 enum pci_ers_result *result) 49 { 50 pci_ers_result_t vote; 51 const struct pci_error_handlers *err_handler; 52 53 device_lock(&dev->dev); 54 if (!pci_dev_set_io_state(dev, state) || 55 !dev->driver || 56 !dev->driver->err_handler || 57 !dev->driver->err_handler->error_detected) { 58 /* 59 * If any device in the subtree does not have an error_detected 60 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent 61 * error callbacks of "any" device in the subtree, and will 62 * exit in the disconnected error state. 63 */ 64 if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) 65 vote = PCI_ERS_RESULT_NO_AER_DRIVER; 66 else 67 vote = PCI_ERS_RESULT_NONE; 68 } else { 69 err_handler = dev->driver->err_handler; 70 vote = err_handler->error_detected(dev, state); 71 } 72 pci_uevent_ers(dev, vote); 73 *result = merge_result(*result, vote); 74 device_unlock(&dev->dev); 75 return 0; 76 } 77 78 static int report_frozen_detected(struct pci_dev *dev, void *data) 79 { 80 return report_error_detected(dev, pci_channel_io_frozen, data); 81 } 82 83 static int report_normal_detected(struct pci_dev *dev, void *data) 84 { 85 return report_error_detected(dev, pci_channel_io_normal, data); 86 } 87 88 static int report_mmio_enabled(struct pci_dev *dev, void *data) 89 { 90 pci_ers_result_t vote, *result = data; 91 const struct pci_error_handlers *err_handler; 92 93 device_lock(&dev->dev); 94 if (!dev->driver || 95 !dev->driver->err_handler || 96 !dev->driver->err_handler->mmio_enabled) 97 goto out; 98 99 err_handler = dev->driver->err_handler; 100 vote = err_handler->mmio_enabled(dev); 101 *result = merge_result(*result, vote); 102 out: 103 device_unlock(&dev->dev); 104 return 0; 105 } 106 107 static int report_slot_reset(struct pci_dev *dev, void *data) 108 { 109 pci_ers_result_t vote, *result = data; 110 const struct pci_error_handlers *err_handler; 111 112 device_lock(&dev->dev); 113 if (!dev->driver || 114 !dev->driver->err_handler || 115 !dev->driver->err_handler->slot_reset) 116 goto out; 117 118 err_handler = dev->driver->err_handler; 119 vote = err_handler->slot_reset(dev); 120 *result = merge_result(*result, vote); 121 out: 122 device_unlock(&dev->dev); 123 return 0; 124 } 125 126 static int report_resume(struct pci_dev *dev, void *data) 127 { 128 const struct pci_error_handlers *err_handler; 129 130 device_lock(&dev->dev); 131 if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || 132 !dev->driver || 133 !dev->driver->err_handler || 134 !dev->driver->err_handler->resume) 135 goto out; 136 137 err_handler = dev->driver->err_handler; 138 err_handler->resume(dev); 139 out: 140 pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); 141 device_unlock(&dev->dev); 142 return 0; 143 } 144 145 /** 146 * default_reset_link - default reset function 147 * @dev: pointer to pci_dev data structure 148 * 149 * Invoked when performing link reset on a Downstream Port or a 150 * Root Port with no aer driver. 151 */ 152 static pci_ers_result_t default_reset_link(struct pci_dev *dev) 153 { 154 int rc; 155 156 rc = pci_bus_error_reset(dev); 157 pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); 158 return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 159 } 160 161 static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) 162 { 163 pci_ers_result_t status; 164 struct pcie_port_service_driver *driver = NULL; 165 166 driver = pcie_port_find_service(dev, service); 167 if (driver && driver->reset_link) { 168 status = driver->reset_link(dev); 169 } else if (pcie_downstream_port(dev)) { 170 status = default_reset_link(dev); 171 } else { 172 pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", 173 pci_name(dev)); 174 return PCI_ERS_RESULT_DISCONNECT; 175 } 176 177 if (status != PCI_ERS_RESULT_RECOVERED) { 178 pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", 179 pci_name(dev)); 180 return PCI_ERS_RESULT_DISCONNECT; 181 } 182 183 return status; 184 } 185 186 void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, 187 u32 service) 188 { 189 pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; 190 struct pci_bus *bus; 191 192 /* 193 * Error recovery runs on all subordinates of the first downstream port. 194 * If the downstream port detected the error, it is cleared at the end. 195 */ 196 if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || 197 pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) 198 dev = dev->bus->self; 199 bus = dev->subordinate; 200 201 pci_dbg(dev, "broadcast error_detected message\n"); 202 if (state == pci_channel_io_frozen) 203 pci_walk_bus(bus, report_frozen_detected, &status); 204 else 205 pci_walk_bus(bus, report_normal_detected, &status); 206 207 if (state == pci_channel_io_frozen && 208 reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED) 209 goto failed; 210 211 if (status == PCI_ERS_RESULT_CAN_RECOVER) { 212 status = PCI_ERS_RESULT_RECOVERED; 213 pci_dbg(dev, "broadcast mmio_enabled message\n"); 214 pci_walk_bus(bus, report_mmio_enabled, &status); 215 } 216 217 if (status == PCI_ERS_RESULT_NEED_RESET) { 218 /* 219 * TODO: Should call platform-specific 220 * functions to reset slot before calling 221 * drivers' slot_reset callbacks? 222 */ 223 status = PCI_ERS_RESULT_RECOVERED; 224 pci_dbg(dev, "broadcast slot_reset message\n"); 225 pci_walk_bus(bus, report_slot_reset, &status); 226 } 227 228 if (status != PCI_ERS_RESULT_RECOVERED) 229 goto failed; 230 231 pci_dbg(dev, "broadcast resume message\n"); 232 pci_walk_bus(bus, report_resume, &status); 233 234 pci_aer_clear_device_status(dev); 235 pci_cleanup_aer_uncorrect_error_status(dev); 236 pci_info(dev, "AER: Device recovery successful\n"); 237 return; 238 239 failed: 240 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); 241 242 /* TODO: Should kernel panic here? */ 243 pci_info(dev, "AER: Device recovery failed\n"); 244 } 245