1 // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) 2 /* Copyright(c) 2014 - 2020 Intel Corporation */ 3 #include <linux/kernel.h> 4 #include <linux/pci.h> 5 #include <linux/completion.h> 6 #include <linux/workqueue.h> 7 #include <linux/delay.h> 8 #include "adf_accel_devices.h" 9 #include "adf_common_drv.h" 10 11 static struct workqueue_struct *device_reset_wq; 12 13 static pci_ers_result_t adf_error_detected(struct pci_dev *pdev, 14 pci_channel_state_t state) 15 { 16 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); 17 18 dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n"); 19 if (!accel_dev) { 20 dev_err(&pdev->dev, "Can't find acceleration device\n"); 21 return PCI_ERS_RESULT_DISCONNECT; 22 } 23 24 if (state == pci_channel_io_perm_failure) { 25 dev_err(&pdev->dev, "Can't recover from device error\n"); 26 return PCI_ERS_RESULT_DISCONNECT; 27 } 28 29 return PCI_ERS_RESULT_NEED_RESET; 30 } 31 32 /* reset dev data */ 33 struct adf_reset_dev_data { 34 int mode; 35 struct adf_accel_dev *accel_dev; 36 struct completion compl; 37 struct work_struct reset_work; 38 }; 39 40 void adf_reset_sbr(struct adf_accel_dev *accel_dev) 41 { 42 struct pci_dev *pdev = accel_to_pci_dev(accel_dev); 43 struct pci_dev *parent = pdev->bus->self; 44 u16 bridge_ctl = 0; 45 46 if (!parent) 47 parent = pdev; 48 49 if (!pci_wait_for_pending_transaction(pdev)) 50 dev_info(&GET_DEV(accel_dev), 51 "Transaction still in progress. Proceeding\n"); 52 53 dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n"); 54 55 pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); 56 bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; 57 pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); 58 msleep(100); 59 bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET; 60 pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); 61 msleep(100); 62 } 63 EXPORT_SYMBOL_GPL(adf_reset_sbr); 64 65 void adf_reset_flr(struct adf_accel_dev *accel_dev) 66 { 67 pcie_flr(accel_to_pci_dev(accel_dev)); 68 } 69 EXPORT_SYMBOL_GPL(adf_reset_flr); 70 71 void adf_dev_restore(struct adf_accel_dev *accel_dev) 72 { 73 struct adf_hw_device_data *hw_device = accel_dev->hw_device; 74 struct pci_dev *pdev = accel_to_pci_dev(accel_dev); 75 76 if (hw_device->reset_device) { 77 dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", 78 accel_dev->accel_id); 79 hw_device->reset_device(accel_dev); 80 pci_restore_state(pdev); 81 pci_save_state(pdev); 82 } 83 } 84 85 static void adf_device_reset_worker(struct work_struct *work) 86 { 87 struct adf_reset_dev_data *reset_data = 88 container_of(work, struct adf_reset_dev_data, reset_work); 89 struct adf_accel_dev *accel_dev = reset_data->accel_dev; 90 91 adf_dev_restarting_notify(accel_dev); 92 if (adf_dev_restart(accel_dev)) { 93 /* The device hanged and we can't restart it so stop here */ 94 dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); 95 if (reset_data->mode == ADF_DEV_RESET_ASYNC || 96 completion_done(&reset_data->compl)) 97 kfree(reset_data); 98 WARN(1, "QAT: device restart failed. Device is unusable\n"); 99 return; 100 } 101 adf_dev_restarted_notify(accel_dev); 102 clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); 103 104 /* 105 * The dev is back alive. Notify the caller if in sync mode 106 * 107 * If device restart will take a more time than expected, 108 * the schedule_reset() function can timeout and exit. This can be 109 * detected by calling the completion_done() function. In this case 110 * the reset_data structure needs to be freed here. 111 */ 112 if (reset_data->mode == ADF_DEV_RESET_ASYNC || 113 completion_done(&reset_data->compl)) 114 kfree(reset_data); 115 else 116 complete(&reset_data->compl); 117 } 118 119 static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, 120 enum adf_dev_reset_mode mode) 121 { 122 struct adf_reset_dev_data *reset_data; 123 124 if (!adf_dev_started(accel_dev) || 125 test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) 126 return 0; 127 128 set_bit(ADF_STATUS_RESTARTING, &accel_dev->status); 129 reset_data = kzalloc(sizeof(*reset_data), GFP_KERNEL); 130 if (!reset_data) 131 return -ENOMEM; 132 reset_data->accel_dev = accel_dev; 133 init_completion(&reset_data->compl); 134 reset_data->mode = mode; 135 INIT_WORK(&reset_data->reset_work, adf_device_reset_worker); 136 queue_work(device_reset_wq, &reset_data->reset_work); 137 138 /* If in sync mode wait for the result */ 139 if (mode == ADF_DEV_RESET_SYNC) { 140 int ret = 0; 141 /* Maximum device reset time is 10 seconds */ 142 unsigned long wait_jiffies = msecs_to_jiffies(10000); 143 unsigned long timeout = wait_for_completion_timeout( 144 &reset_data->compl, wait_jiffies); 145 if (!timeout) { 146 dev_err(&GET_DEV(accel_dev), 147 "Reset device timeout expired\n"); 148 ret = -EFAULT; 149 } else { 150 kfree(reset_data); 151 } 152 return ret; 153 } 154 return 0; 155 } 156 157 static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev) 158 { 159 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); 160 161 if (!accel_dev) { 162 pr_err("QAT: Can't find acceleration device\n"); 163 return PCI_ERS_RESULT_DISCONNECT; 164 } 165 if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC)) 166 return PCI_ERS_RESULT_DISCONNECT; 167 168 return PCI_ERS_RESULT_RECOVERED; 169 } 170 171 static void adf_resume(struct pci_dev *pdev) 172 { 173 dev_info(&pdev->dev, "Acceleration driver reset completed\n"); 174 dev_info(&pdev->dev, "Device is up and running\n"); 175 } 176 177 const struct pci_error_handlers adf_err_handler = { 178 .error_detected = adf_error_detected, 179 .slot_reset = adf_slot_reset, 180 .resume = adf_resume, 181 }; 182 EXPORT_SYMBOL_GPL(adf_err_handler); 183 184 int adf_init_aer(void) 185 { 186 device_reset_wq = alloc_workqueue("qat_device_reset_wq", 187 WQ_MEM_RECLAIM, 0); 188 return !device_reset_wq ? -EFAULT : 0; 189 } 190 191 void adf_exit_aer(void) 192 { 193 if (device_reset_wq) 194 destroy_workqueue(device_reset_wq); 195 device_reset_wq = NULL; 196 } 197