1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 // Copyright (c) 2018 Mellanox Technologies 3 4 #include <linux/mlx5/driver.h> 5 6 #include "mlx5_core.h" 7 #include "lib/eq.h" 8 #include "lib/mlx5.h" 9 10 struct mlx5_event_nb { 11 struct mlx5_nb nb; 12 void *ctx; 13 }; 14 15 /* General events handlers for the low level mlx5_core driver 16 * 17 * Other Major feature specific events such as 18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with 19 * separate notifiers callbacks, specifically by those mlx5 components. 20 */ 21 static int any_notifier(struct notifier_block *, unsigned long, void *); 22 static int temp_warn(struct notifier_block *, unsigned long, void *); 23 static int port_module(struct notifier_block *, unsigned long, void *); 24 static int pcie_core(struct notifier_block *, unsigned long, void *); 25 26 /* handler which forwards the event to events->nh, driver notifiers */ 27 static int forward_event(struct notifier_block *, unsigned long, void *); 28 29 static struct mlx5_nb events_nbs_ref[] = { 30 /* Events to be proccessed by mlx5_core */ 31 {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, 32 {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, 33 {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, 34 {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, 35 36 /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ 37 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, 38 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, 39 /* QP/WQ resource events to forward */ 40 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, 41 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, 42 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, 43 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, 44 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, 45 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, 46 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, 47 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, 48 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, 49 /* SRQ events */ 50 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, 51 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, 52 }; 53 54 struct mlx5_events { 55 struct mlx5_core_dev *dev; 56 struct workqueue_struct *wq; 57 struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; 58 /* driver notifier chain */ 59 struct atomic_notifier_head nh; 60 /* port module events stats */ 61 struct mlx5_pme_stats pme_stats; 62 /*pcie_core*/ 63 struct work_struct pcie_core_work; 64 }; 65 66 static const char *eqe_type_str(u8 type) 67 { 68 switch (type) { 69 case MLX5_EVENT_TYPE_COMP: 70 return "MLX5_EVENT_TYPE_COMP"; 71 case MLX5_EVENT_TYPE_PATH_MIG: 72 return "MLX5_EVENT_TYPE_PATH_MIG"; 73 case MLX5_EVENT_TYPE_COMM_EST: 74 return "MLX5_EVENT_TYPE_COMM_EST"; 75 case MLX5_EVENT_TYPE_SQ_DRAINED: 76 return "MLX5_EVENT_TYPE_SQ_DRAINED"; 77 case MLX5_EVENT_TYPE_SRQ_LAST_WQE: 78 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; 79 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: 80 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; 81 case MLX5_EVENT_TYPE_CQ_ERROR: 82 return "MLX5_EVENT_TYPE_CQ_ERROR"; 83 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: 84 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; 85 case MLX5_EVENT_TYPE_PATH_MIG_FAILED: 86 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; 87 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 88 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; 89 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: 90 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; 91 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: 92 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; 93 case MLX5_EVENT_TYPE_INTERNAL_ERROR: 94 return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; 95 case MLX5_EVENT_TYPE_PORT_CHANGE: 96 return "MLX5_EVENT_TYPE_PORT_CHANGE"; 97 case MLX5_EVENT_TYPE_GPIO_EVENT: 98 return "MLX5_EVENT_TYPE_GPIO_EVENT"; 99 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: 100 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; 101 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: 102 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; 103 case MLX5_EVENT_TYPE_REMOTE_CONFIG: 104 return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; 105 case MLX5_EVENT_TYPE_DB_BF_CONGESTION: 106 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; 107 case MLX5_EVENT_TYPE_STALL_EVENT: 108 return "MLX5_EVENT_TYPE_STALL_EVENT"; 109 case MLX5_EVENT_TYPE_CMD: 110 return "MLX5_EVENT_TYPE_CMD"; 111 case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: 112 return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; 113 case MLX5_EVENT_TYPE_PAGE_REQUEST: 114 return "MLX5_EVENT_TYPE_PAGE_REQUEST"; 115 case MLX5_EVENT_TYPE_PAGE_FAULT: 116 return "MLX5_EVENT_TYPE_PAGE_FAULT"; 117 case MLX5_EVENT_TYPE_PPS_EVENT: 118 return "MLX5_EVENT_TYPE_PPS_EVENT"; 119 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: 120 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; 121 case MLX5_EVENT_TYPE_FPGA_ERROR: 122 return "MLX5_EVENT_TYPE_FPGA_ERROR"; 123 case MLX5_EVENT_TYPE_FPGA_QP_ERROR: 124 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; 125 case MLX5_EVENT_TYPE_GENERAL_EVENT: 126 return "MLX5_EVENT_TYPE_GENERAL_EVENT"; 127 case MLX5_EVENT_TYPE_MONITOR_COUNTER: 128 return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; 129 case MLX5_EVENT_TYPE_DEVICE_TRACER: 130 return "MLX5_EVENT_TYPE_DEVICE_TRACER"; 131 default: 132 return "Unrecognized event"; 133 } 134 } 135 136 /* handles all FW events, type == eqe->type */ 137 static int any_notifier(struct notifier_block *nb, 138 unsigned long type, void *data) 139 { 140 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 141 struct mlx5_events *events = event_nb->ctx; 142 struct mlx5_eqe *eqe = data; 143 144 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", 145 eqe_type_str(eqe->type), eqe->sub_type); 146 return NOTIFY_OK; 147 } 148 149 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ 150 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) 151 { 152 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 153 struct mlx5_events *events = event_nb->ctx; 154 struct mlx5_eqe *eqe = data; 155 u64 value_lsb; 156 u64 value_msb; 157 158 value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); 159 value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); 160 161 mlx5_core_warn(events->dev, 162 "High temperature on sensors with bit set %llx %llx", 163 value_msb, value_lsb); 164 165 return NOTIFY_OK; 166 } 167 168 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ 169 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) 170 { 171 switch (status) { 172 case MLX5_MODULE_STATUS_PLUGGED: 173 return "Cable plugged"; 174 case MLX5_MODULE_STATUS_UNPLUGGED: 175 return "Cable unplugged"; 176 case MLX5_MODULE_STATUS_ERROR: 177 return "Cable error"; 178 case MLX5_MODULE_STATUS_DISABLED: 179 return "Cable disabled"; 180 default: 181 return "Unknown status"; 182 } 183 } 184 185 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) 186 { 187 switch (error) { 188 case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: 189 return "Power budget exceeded"; 190 case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: 191 return "Long Range for non MLNX cable"; 192 case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: 193 return "Bus stuck (I2C or data shorted)"; 194 case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: 195 return "No EEPROM/retry timeout"; 196 case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: 197 return "Enforce part number list"; 198 case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: 199 return "Unknown identifier"; 200 case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: 201 return "High Temperature"; 202 case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: 203 return "Bad or shorted cable/module"; 204 case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: 205 return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; 206 default: 207 return "Unknown error"; 208 } 209 } 210 211 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ 212 static int port_module(struct notifier_block *nb, unsigned long type, void *data) 213 { 214 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 215 struct mlx5_events *events = event_nb->ctx; 216 struct mlx5_eqe *eqe = data; 217 218 enum port_module_event_status_type module_status; 219 enum port_module_event_error_type error_type; 220 struct mlx5_eqe_port_module *module_event_eqe; 221 const char *status_str; 222 u8 module_num; 223 224 module_event_eqe = &eqe->data.port_module; 225 module_status = module_event_eqe->module_status & 226 PORT_MODULE_EVENT_MODULE_STATUS_MASK; 227 error_type = module_event_eqe->error_type & 228 PORT_MODULE_EVENT_ERROR_TYPE_MASK; 229 230 if (module_status < MLX5_MODULE_STATUS_NUM) 231 events->pme_stats.status_counters[module_status]++; 232 233 if (module_status == MLX5_MODULE_STATUS_ERROR) 234 if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) 235 events->pme_stats.error_counters[error_type]++; 236 237 if (!printk_ratelimit()) 238 return NOTIFY_OK; 239 240 module_num = module_event_eqe->module; 241 status_str = mlx5_pme_status_to_string(module_status); 242 if (module_status == MLX5_MODULE_STATUS_ERROR) { 243 const char *error_str = mlx5_pme_error_to_string(error_type); 244 245 mlx5_core_err(events->dev, 246 "Port module event[error]: module %u, %s, %s\n", 247 module_num, status_str, error_str); 248 } else { 249 mlx5_core_info(events->dev, 250 "Port module event: module %u, %s\n", 251 module_num, status_str); 252 } 253 254 return NOTIFY_OK; 255 } 256 257 enum { 258 MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0, 259 MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1, 260 MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2, 261 }; 262 263 static void mlx5_pcie_event(struct work_struct *work) 264 { 265 u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0}; 266 u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0}; 267 struct mlx5_events *events; 268 struct mlx5_core_dev *dev; 269 u8 power_status; 270 u16 pci_power; 271 272 events = container_of(work, struct mlx5_events, pcie_core_work); 273 dev = events->dev; 274 275 if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power)) 276 return; 277 278 mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), 279 MLX5_REG_MPEIN, 0, 0); 280 power_status = MLX5_GET(mpein_reg, out, pwr_status); 281 pci_power = MLX5_GET(mpein_reg, out, pci_power); 282 283 switch (power_status) { 284 case MLX5_PCI_POWER_COULD_NOT_BE_READ: 285 mlx5_core_info_rl(dev, 286 "PCIe slot power capability was not advertised.\n"); 287 break; 288 case MLX5_PCI_POWER_INSUFFICIENT_REPORTED: 289 mlx5_core_warn_rl(dev, 290 "Detected insufficient power on the PCIe slot (%uW).\n", 291 pci_power); 292 break; 293 case MLX5_PCI_POWER_SUFFICIENT_REPORTED: 294 mlx5_core_info_rl(dev, 295 "PCIe slot advertised sufficient power (%uW).\n", 296 pci_power); 297 break; 298 } 299 } 300 301 static int pcie_core(struct notifier_block *nb, unsigned long type, void *data) 302 { 303 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, 304 struct mlx5_event_nb, 305 nb); 306 struct mlx5_events *events = event_nb->ctx; 307 struct mlx5_eqe *eqe = data; 308 309 switch (eqe->sub_type) { 310 case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT: 311 queue_work(events->wq, &events->pcie_core_work); 312 break; 313 default: 314 return NOTIFY_DONE; 315 } 316 317 return NOTIFY_OK; 318 } 319 320 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) 321 { 322 *stats = dev->priv.events->pme_stats; 323 } 324 325 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ 326 static int forward_event(struct notifier_block *nb, unsigned long event, void *data) 327 { 328 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 329 struct mlx5_events *events = event_nb->ctx; 330 struct mlx5_eqe *eqe = data; 331 332 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", 333 eqe_type_str(eqe->type), eqe->sub_type); 334 atomic_notifier_call_chain(&events->nh, event, data); 335 return NOTIFY_OK; 336 } 337 338 int mlx5_events_init(struct mlx5_core_dev *dev) 339 { 340 struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); 341 342 if (!events) 343 return -ENOMEM; 344 345 ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); 346 events->dev = dev; 347 dev->priv.events = events; 348 events->wq = create_singlethread_workqueue("mlx5_events"); 349 if (!events->wq) 350 return -ENOMEM; 351 INIT_WORK(&events->pcie_core_work, mlx5_pcie_event); 352 353 return 0; 354 } 355 356 void mlx5_events_cleanup(struct mlx5_core_dev *dev) 357 { 358 destroy_workqueue(dev->priv.events->wq); 359 kvfree(dev->priv.events); 360 } 361 362 void mlx5_events_start(struct mlx5_core_dev *dev) 363 { 364 struct mlx5_events *events = dev->priv.events; 365 int i; 366 367 for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { 368 events->notifiers[i].nb = events_nbs_ref[i]; 369 events->notifiers[i].ctx = events; 370 mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); 371 } 372 } 373 374 void mlx5_events_stop(struct mlx5_core_dev *dev) 375 { 376 struct mlx5_events *events = dev->priv.events; 377 int i; 378 379 for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) 380 mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); 381 flush_workqueue(events->wq); 382 } 383 384 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) 385 { 386 struct mlx5_events *events = dev->priv.events; 387 388 return atomic_notifier_chain_register(&events->nh, nb); 389 } 390 EXPORT_SYMBOL(mlx5_notifier_register); 391 392 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) 393 { 394 struct mlx5_events *events = dev->priv.events; 395 396 return atomic_notifier_chain_unregister(&events->nh, nb); 397 } 398 EXPORT_SYMBOL(mlx5_notifier_unregister); 399 400 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) 401 { 402 return atomic_notifier_call_chain(&events->nh, event, data); 403 } 404