1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 // Copyright (c) 2018 Mellanox Technologies 3 4 #include <linux/mlx5/driver.h> 5 6 #include "mlx5_core.h" 7 #include "lib/eq.h" 8 #include "lib/mlx5.h" 9 10 struct mlx5_event_nb { 11 struct mlx5_nb nb; 12 void *ctx; 13 }; 14 15 /* General events handlers for the low level mlx5_core driver 16 * 17 * Other Major feature specific events such as 18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with 19 * separate notifiers callbacks, specifically by those mlx5 components. 20 */ 21 static int any_notifier(struct notifier_block *, unsigned long, void *); 22 static int temp_warn(struct notifier_block *, unsigned long, void *); 23 static int port_module(struct notifier_block *, unsigned long, void *); 24 static int pcie_core(struct notifier_block *, unsigned long, void *); 25 26 /* handler which forwards the event to events->fw_nh, driver notifiers */ 27 static int forward_event(struct notifier_block *, unsigned long, void *); 28 29 static struct mlx5_nb events_nbs_ref[] = { 30 /* Events to be processed by mlx5_core */ 31 {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, 32 {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, 33 {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, 34 {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, 35 36 /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ 37 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, 38 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, 39 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE }, 40 /* QP/WQ resource events to forward */ 41 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, 42 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, 43 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, 44 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, 45 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, 46 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, 47 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, 48 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, 49 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, 50 /* SRQ events */ 51 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, 52 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, 53 }; 54 55 struct mlx5_events { 56 struct mlx5_core_dev *dev; 57 struct workqueue_struct *wq; 58 struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; 59 /* driver notifier chain for fw events */ 60 struct atomic_notifier_head fw_nh; 61 /* port module events stats */ 62 struct mlx5_pme_stats pme_stats; 63 /*pcie_core*/ 64 struct work_struct pcie_core_work; 65 /* driver notifier chain for sw events */ 66 struct blocking_notifier_head sw_nh; 67 }; 68 69 static const char *eqe_type_str(u8 type) 70 { 71 switch (type) { 72 case MLX5_EVENT_TYPE_COMP: 73 return "MLX5_EVENT_TYPE_COMP"; 74 case MLX5_EVENT_TYPE_PATH_MIG: 75 return "MLX5_EVENT_TYPE_PATH_MIG"; 76 case MLX5_EVENT_TYPE_COMM_EST: 77 return "MLX5_EVENT_TYPE_COMM_EST"; 78 case MLX5_EVENT_TYPE_SQ_DRAINED: 79 return "MLX5_EVENT_TYPE_SQ_DRAINED"; 80 case MLX5_EVENT_TYPE_SRQ_LAST_WQE: 81 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; 82 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: 83 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; 84 case MLX5_EVENT_TYPE_CQ_ERROR: 85 return "MLX5_EVENT_TYPE_CQ_ERROR"; 86 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: 87 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; 88 case MLX5_EVENT_TYPE_PATH_MIG_FAILED: 89 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; 90 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 91 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; 92 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: 93 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; 94 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: 95 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; 96 case MLX5_EVENT_TYPE_INTERNAL_ERROR: 97 return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; 98 case MLX5_EVENT_TYPE_PORT_CHANGE: 99 return "MLX5_EVENT_TYPE_PORT_CHANGE"; 100 case MLX5_EVENT_TYPE_GPIO_EVENT: 101 return "MLX5_EVENT_TYPE_GPIO_EVENT"; 102 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: 103 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; 104 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: 105 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; 106 case MLX5_EVENT_TYPE_REMOTE_CONFIG: 107 return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; 108 case MLX5_EVENT_TYPE_DB_BF_CONGESTION: 109 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; 110 case MLX5_EVENT_TYPE_STALL_EVENT: 111 return "MLX5_EVENT_TYPE_STALL_EVENT"; 112 case MLX5_EVENT_TYPE_CMD: 113 return "MLX5_EVENT_TYPE_CMD"; 114 case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: 115 return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; 116 case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE: 117 return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE"; 118 case MLX5_EVENT_TYPE_PAGE_REQUEST: 119 return "MLX5_EVENT_TYPE_PAGE_REQUEST"; 120 case MLX5_EVENT_TYPE_PAGE_FAULT: 121 return "MLX5_EVENT_TYPE_PAGE_FAULT"; 122 case MLX5_EVENT_TYPE_PPS_EVENT: 123 return "MLX5_EVENT_TYPE_PPS_EVENT"; 124 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: 125 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; 126 case MLX5_EVENT_TYPE_FPGA_ERROR: 127 return "MLX5_EVENT_TYPE_FPGA_ERROR"; 128 case MLX5_EVENT_TYPE_FPGA_QP_ERROR: 129 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; 130 case MLX5_EVENT_TYPE_GENERAL_EVENT: 131 return "MLX5_EVENT_TYPE_GENERAL_EVENT"; 132 case MLX5_EVENT_TYPE_MONITOR_COUNTER: 133 return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; 134 case MLX5_EVENT_TYPE_DEVICE_TRACER: 135 return "MLX5_EVENT_TYPE_DEVICE_TRACER"; 136 case MLX5_EVENT_TYPE_OBJECT_CHANGE: 137 return "MLX5_EVENT_TYPE_OBJECT_CHANGE"; 138 default: 139 return "Unrecognized event"; 140 } 141 } 142 143 /* handles all FW events, type == eqe->type */ 144 static int any_notifier(struct notifier_block *nb, 145 unsigned long type, void *data) 146 { 147 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 148 struct mlx5_events *events = event_nb->ctx; 149 struct mlx5_eqe *eqe = data; 150 151 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", 152 eqe_type_str(eqe->type), eqe->sub_type); 153 return NOTIFY_OK; 154 } 155 156 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ 157 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) 158 { 159 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 160 struct mlx5_events *events = event_nb->ctx; 161 struct mlx5_eqe *eqe = data; 162 u64 value_lsb; 163 u64 value_msb; 164 165 value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); 166 value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); 167 168 mlx5_core_warn(events->dev, 169 "High temperature on sensors with bit set %llx %llx", 170 value_msb, value_lsb); 171 172 return NOTIFY_OK; 173 } 174 175 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ 176 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) 177 { 178 switch (status) { 179 case MLX5_MODULE_STATUS_PLUGGED: 180 return "Cable plugged"; 181 case MLX5_MODULE_STATUS_UNPLUGGED: 182 return "Cable unplugged"; 183 case MLX5_MODULE_STATUS_ERROR: 184 return "Cable error"; 185 case MLX5_MODULE_STATUS_DISABLED: 186 return "Cable disabled"; 187 default: 188 return "Unknown status"; 189 } 190 } 191 192 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) 193 { 194 switch (error) { 195 case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: 196 return "Power budget exceeded"; 197 case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: 198 return "Long Range for non MLNX cable"; 199 case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: 200 return "Bus stuck (I2C or data shorted)"; 201 case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: 202 return "No EEPROM/retry timeout"; 203 case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: 204 return "Enforce part number list"; 205 case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: 206 return "Unknown identifier"; 207 case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: 208 return "High Temperature"; 209 case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: 210 return "Bad or shorted cable/module"; 211 case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: 212 return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; 213 default: 214 return "Unknown error"; 215 } 216 } 217 218 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ 219 static int port_module(struct notifier_block *nb, unsigned long type, void *data) 220 { 221 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 222 struct mlx5_events *events = event_nb->ctx; 223 struct mlx5_eqe *eqe = data; 224 225 enum port_module_event_status_type module_status; 226 enum port_module_event_error_type error_type; 227 struct mlx5_eqe_port_module *module_event_eqe; 228 const char *status_str; 229 u8 module_num; 230 231 module_event_eqe = &eqe->data.port_module; 232 module_status = module_event_eqe->module_status & 233 PORT_MODULE_EVENT_MODULE_STATUS_MASK; 234 error_type = module_event_eqe->error_type & 235 PORT_MODULE_EVENT_ERROR_TYPE_MASK; 236 237 if (module_status < MLX5_MODULE_STATUS_NUM) 238 events->pme_stats.status_counters[module_status]++; 239 240 if (module_status == MLX5_MODULE_STATUS_ERROR) 241 if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) 242 events->pme_stats.error_counters[error_type]++; 243 244 if (!printk_ratelimit()) 245 return NOTIFY_OK; 246 247 module_num = module_event_eqe->module; 248 status_str = mlx5_pme_status_to_string(module_status); 249 if (module_status == MLX5_MODULE_STATUS_ERROR) { 250 const char *error_str = mlx5_pme_error_to_string(error_type); 251 252 mlx5_core_err(events->dev, 253 "Port module event[error]: module %u, %s, %s\n", 254 module_num, status_str, error_str); 255 } else { 256 mlx5_core_info(events->dev, 257 "Port module event: module %u, %s\n", 258 module_num, status_str); 259 } 260 261 return NOTIFY_OK; 262 } 263 264 enum { 265 MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0, 266 MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1, 267 MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2, 268 }; 269 270 static void mlx5_pcie_event(struct work_struct *work) 271 { 272 u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0}; 273 u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0}; 274 struct mlx5_events *events; 275 struct mlx5_core_dev *dev; 276 u8 power_status; 277 u16 pci_power; 278 279 events = container_of(work, struct mlx5_events, pcie_core_work); 280 dev = events->dev; 281 282 if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power)) 283 return; 284 285 mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), 286 MLX5_REG_MPEIN, 0, 0); 287 power_status = MLX5_GET(mpein_reg, out, pwr_status); 288 pci_power = MLX5_GET(mpein_reg, out, pci_power); 289 290 switch (power_status) { 291 case MLX5_PCI_POWER_COULD_NOT_BE_READ: 292 mlx5_core_info_rl(dev, 293 "PCIe slot power capability was not advertised.\n"); 294 break; 295 case MLX5_PCI_POWER_INSUFFICIENT_REPORTED: 296 mlx5_core_warn_rl(dev, 297 "Detected insufficient power on the PCIe slot (%uW).\n", 298 pci_power); 299 break; 300 case MLX5_PCI_POWER_SUFFICIENT_REPORTED: 301 mlx5_core_info_rl(dev, 302 "PCIe slot advertised sufficient power (%uW).\n", 303 pci_power); 304 break; 305 } 306 } 307 308 static int pcie_core(struct notifier_block *nb, unsigned long type, void *data) 309 { 310 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, 311 struct mlx5_event_nb, 312 nb); 313 struct mlx5_events *events = event_nb->ctx; 314 struct mlx5_eqe *eqe = data; 315 316 switch (eqe->sub_type) { 317 case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT: 318 queue_work(events->wq, &events->pcie_core_work); 319 break; 320 default: 321 return NOTIFY_DONE; 322 } 323 324 return NOTIFY_OK; 325 } 326 327 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) 328 { 329 *stats = dev->priv.events->pme_stats; 330 } 331 332 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ 333 static int forward_event(struct notifier_block *nb, unsigned long event, void *data) 334 { 335 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 336 struct mlx5_events *events = event_nb->ctx; 337 struct mlx5_eqe *eqe = data; 338 339 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", 340 eqe_type_str(eqe->type), eqe->sub_type); 341 atomic_notifier_call_chain(&events->fw_nh, event, data); 342 return NOTIFY_OK; 343 } 344 345 int mlx5_events_init(struct mlx5_core_dev *dev) 346 { 347 struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); 348 349 if (!events) 350 return -ENOMEM; 351 352 ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh); 353 events->dev = dev; 354 dev->priv.events = events; 355 events->wq = create_singlethread_workqueue("mlx5_events"); 356 if (!events->wq) { 357 kfree(events); 358 return -ENOMEM; 359 } 360 INIT_WORK(&events->pcie_core_work, mlx5_pcie_event); 361 BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh); 362 363 return 0; 364 } 365 366 void mlx5_events_cleanup(struct mlx5_core_dev *dev) 367 { 368 destroy_workqueue(dev->priv.events->wq); 369 kvfree(dev->priv.events); 370 } 371 372 void mlx5_events_start(struct mlx5_core_dev *dev) 373 { 374 struct mlx5_events *events = dev->priv.events; 375 int i; 376 377 for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { 378 events->notifiers[i].nb = events_nbs_ref[i]; 379 events->notifiers[i].ctx = events; 380 mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); 381 } 382 } 383 384 void mlx5_events_stop(struct mlx5_core_dev *dev) 385 { 386 struct mlx5_events *events = dev->priv.events; 387 int i; 388 389 for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) 390 mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); 391 flush_workqueue(events->wq); 392 } 393 394 /* This API is used only for processing and forwarding firmware 395 * events to mlx5 consumer. 396 */ 397 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) 398 { 399 struct mlx5_events *events = dev->priv.events; 400 401 return atomic_notifier_chain_register(&events->fw_nh, nb); 402 } 403 EXPORT_SYMBOL(mlx5_notifier_register); 404 405 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) 406 { 407 struct mlx5_events *events = dev->priv.events; 408 409 return atomic_notifier_chain_unregister(&events->fw_nh, nb); 410 } 411 EXPORT_SYMBOL(mlx5_notifier_unregister); 412 413 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) 414 { 415 return atomic_notifier_call_chain(&events->fw_nh, event, data); 416 } 417 418 /* This API is used only for processing and forwarding driver-specific 419 * events to mlx5 consumers. 420 */ 421 int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) 422 { 423 struct mlx5_events *events = dev->priv.events; 424 425 return blocking_notifier_chain_register(&events->sw_nh, nb); 426 } 427 428 int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) 429 { 430 struct mlx5_events *events = dev->priv.events; 431 432 return blocking_notifier_chain_unregister(&events->sw_nh, nb); 433 } 434 435 int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event, 436 void *data) 437 { 438 struct mlx5_events *events = dev->priv.events; 439 440 return blocking_notifier_call_chain(&events->sw_nh, event, data); 441 } 442 443 void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work) 444 { 445 queue_work(dev->priv.events->wq, work); 446 } 447