1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 // Copyright (c) 2018 Mellanox Technologies 3 4 #include <linux/mlx5/driver.h> 5 6 #include "mlx5_core.h" 7 #include "lib/eq.h" 8 #include "lib/mlx5.h" 9 10 struct mlx5_event_nb { 11 struct mlx5_nb nb; 12 void *ctx; 13 }; 14 15 /* General events handlers for the low level mlx5_core driver 16 * 17 * Other Major feature specific events such as 18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with 19 * separate notifiers callbacks, specifically by those mlx5 components. 20 */ 21 static int any_notifier(struct notifier_block *, unsigned long, void *); 22 static int temp_warn(struct notifier_block *, unsigned long, void *); 23 static int port_module(struct notifier_block *, unsigned long, void *); 24 25 /* handler which forwards the event to events->nh, driver notifiers */ 26 static int forward_event(struct notifier_block *, unsigned long, void *); 27 28 static struct mlx5_nb events_nbs_ref[] = { 29 /* Events to be proccessed by mlx5_core */ 30 {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, 31 {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, 32 {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, 33 34 /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ 35 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, 36 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, 37 /* QP/WQ resource events to forward */ 38 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, 39 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, 40 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, 41 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, 42 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, 43 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, 44 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, 45 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, 46 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, 47 /* SRQ events */ 48 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, 49 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, 50 }; 51 52 struct mlx5_events { 53 struct mlx5_core_dev *dev; 54 struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; 55 /* driver notifier chain */ 56 struct atomic_notifier_head nh; 57 /* port module events stats */ 58 struct mlx5_pme_stats pme_stats; 59 }; 60 61 static const char *eqe_type_str(u8 type) 62 { 63 switch (type) { 64 case MLX5_EVENT_TYPE_COMP: 65 return "MLX5_EVENT_TYPE_COMP"; 66 case MLX5_EVENT_TYPE_PATH_MIG: 67 return "MLX5_EVENT_TYPE_PATH_MIG"; 68 case MLX5_EVENT_TYPE_COMM_EST: 69 return "MLX5_EVENT_TYPE_COMM_EST"; 70 case MLX5_EVENT_TYPE_SQ_DRAINED: 71 return "MLX5_EVENT_TYPE_SQ_DRAINED"; 72 case MLX5_EVENT_TYPE_SRQ_LAST_WQE: 73 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; 74 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: 75 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; 76 case MLX5_EVENT_TYPE_CQ_ERROR: 77 return "MLX5_EVENT_TYPE_CQ_ERROR"; 78 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: 79 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; 80 case MLX5_EVENT_TYPE_PATH_MIG_FAILED: 81 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; 82 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 83 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; 84 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: 85 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; 86 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: 87 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; 88 case MLX5_EVENT_TYPE_INTERNAL_ERROR: 89 return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; 90 case MLX5_EVENT_TYPE_PORT_CHANGE: 91 return "MLX5_EVENT_TYPE_PORT_CHANGE"; 92 case MLX5_EVENT_TYPE_GPIO_EVENT: 93 return "MLX5_EVENT_TYPE_GPIO_EVENT"; 94 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: 95 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; 96 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: 97 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; 98 case MLX5_EVENT_TYPE_REMOTE_CONFIG: 99 return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; 100 case MLX5_EVENT_TYPE_DB_BF_CONGESTION: 101 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; 102 case MLX5_EVENT_TYPE_STALL_EVENT: 103 return "MLX5_EVENT_TYPE_STALL_EVENT"; 104 case MLX5_EVENT_TYPE_CMD: 105 return "MLX5_EVENT_TYPE_CMD"; 106 case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: 107 return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; 108 case MLX5_EVENT_TYPE_PAGE_REQUEST: 109 return "MLX5_EVENT_TYPE_PAGE_REQUEST"; 110 case MLX5_EVENT_TYPE_PAGE_FAULT: 111 return "MLX5_EVENT_TYPE_PAGE_FAULT"; 112 case MLX5_EVENT_TYPE_PPS_EVENT: 113 return "MLX5_EVENT_TYPE_PPS_EVENT"; 114 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: 115 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; 116 case MLX5_EVENT_TYPE_FPGA_ERROR: 117 return "MLX5_EVENT_TYPE_FPGA_ERROR"; 118 case MLX5_EVENT_TYPE_FPGA_QP_ERROR: 119 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; 120 case MLX5_EVENT_TYPE_GENERAL_EVENT: 121 return "MLX5_EVENT_TYPE_GENERAL_EVENT"; 122 case MLX5_EVENT_TYPE_MONITOR_COUNTER: 123 return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; 124 case MLX5_EVENT_TYPE_DEVICE_TRACER: 125 return "MLX5_EVENT_TYPE_DEVICE_TRACER"; 126 default: 127 return "Unrecognized event"; 128 } 129 } 130 131 /* handles all FW events, type == eqe->type */ 132 static int any_notifier(struct notifier_block *nb, 133 unsigned long type, void *data) 134 { 135 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 136 struct mlx5_events *events = event_nb->ctx; 137 struct mlx5_eqe *eqe = data; 138 139 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", 140 eqe_type_str(eqe->type), eqe->sub_type); 141 return NOTIFY_OK; 142 } 143 144 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ 145 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) 146 { 147 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 148 struct mlx5_events *events = event_nb->ctx; 149 struct mlx5_eqe *eqe = data; 150 u64 value_lsb; 151 u64 value_msb; 152 153 value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); 154 value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); 155 156 mlx5_core_warn(events->dev, 157 "High temperature on sensors with bit set %llx %llx", 158 value_msb, value_lsb); 159 160 return NOTIFY_OK; 161 } 162 163 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ 164 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) 165 { 166 switch (status) { 167 case MLX5_MODULE_STATUS_PLUGGED: 168 return "Cable plugged"; 169 case MLX5_MODULE_STATUS_UNPLUGGED: 170 return "Cable unplugged"; 171 case MLX5_MODULE_STATUS_ERROR: 172 return "Cable error"; 173 case MLX5_MODULE_STATUS_DISABLED: 174 return "Cable disabled"; 175 default: 176 return "Unknown status"; 177 } 178 } 179 180 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) 181 { 182 switch (error) { 183 case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: 184 return "Power budget exceeded"; 185 case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: 186 return "Long Range for non MLNX cable"; 187 case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: 188 return "Bus stuck (I2C or data shorted)"; 189 case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: 190 return "No EEPROM/retry timeout"; 191 case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: 192 return "Enforce part number list"; 193 case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: 194 return "Unknown identifier"; 195 case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: 196 return "High Temperature"; 197 case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: 198 return "Bad or shorted cable/module"; 199 case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: 200 return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; 201 default: 202 return "Unknown error"; 203 } 204 } 205 206 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ 207 static int port_module(struct notifier_block *nb, unsigned long type, void *data) 208 { 209 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 210 struct mlx5_events *events = event_nb->ctx; 211 struct mlx5_eqe *eqe = data; 212 213 enum port_module_event_status_type module_status; 214 enum port_module_event_error_type error_type; 215 struct mlx5_eqe_port_module *module_event_eqe; 216 const char *status_str; 217 u8 module_num; 218 219 module_event_eqe = &eqe->data.port_module; 220 module_status = module_event_eqe->module_status & 221 PORT_MODULE_EVENT_MODULE_STATUS_MASK; 222 error_type = module_event_eqe->error_type & 223 PORT_MODULE_EVENT_ERROR_TYPE_MASK; 224 225 if (module_status < MLX5_MODULE_STATUS_NUM) 226 events->pme_stats.status_counters[module_status]++; 227 228 if (module_status == MLX5_MODULE_STATUS_ERROR) 229 if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) 230 events->pme_stats.error_counters[error_type]++; 231 232 if (!printk_ratelimit()) 233 return NOTIFY_OK; 234 235 module_num = module_event_eqe->module; 236 status_str = mlx5_pme_status_to_string(module_status); 237 if (module_status == MLX5_MODULE_STATUS_ERROR) { 238 const char *error_str = mlx5_pme_error_to_string(error_type); 239 240 mlx5_core_err(events->dev, 241 "Port module event[error]: module %u, %s, %s\n", 242 module_num, status_str, error_str); 243 } else { 244 mlx5_core_info(events->dev, 245 "Port module event: module %u, %s\n", 246 module_num, status_str); 247 } 248 249 return NOTIFY_OK; 250 } 251 252 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) 253 { 254 *stats = dev->priv.events->pme_stats; 255 } 256 257 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ 258 static int forward_event(struct notifier_block *nb, unsigned long event, void *data) 259 { 260 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); 261 struct mlx5_events *events = event_nb->ctx; 262 struct mlx5_eqe *eqe = data; 263 264 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", 265 eqe_type_str(eqe->type), eqe->sub_type); 266 atomic_notifier_call_chain(&events->nh, event, data); 267 return NOTIFY_OK; 268 } 269 270 int mlx5_events_init(struct mlx5_core_dev *dev) 271 { 272 struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); 273 274 if (!events) 275 return -ENOMEM; 276 277 ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); 278 events->dev = dev; 279 dev->priv.events = events; 280 return 0; 281 } 282 283 void mlx5_events_cleanup(struct mlx5_core_dev *dev) 284 { 285 kvfree(dev->priv.events); 286 } 287 288 void mlx5_events_start(struct mlx5_core_dev *dev) 289 { 290 struct mlx5_events *events = dev->priv.events; 291 int i; 292 293 for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { 294 events->notifiers[i].nb = events_nbs_ref[i]; 295 events->notifiers[i].ctx = events; 296 mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); 297 } 298 } 299 300 void mlx5_events_stop(struct mlx5_core_dev *dev) 301 { 302 struct mlx5_events *events = dev->priv.events; 303 int i; 304 305 for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) 306 mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); 307 } 308 309 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) 310 { 311 struct mlx5_events *events = dev->priv.events; 312 313 return atomic_notifier_chain_register(&events->nh, nb); 314 } 315 EXPORT_SYMBOL(mlx5_notifier_register); 316 317 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) 318 { 319 struct mlx5_events *events = dev->priv.events; 320 321 return atomic_notifier_chain_unregister(&events->nh, nb); 322 } 323 EXPORT_SYMBOL(mlx5_notifier_unregister); 324 325 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) 326 { 327 return atomic_notifier_call_chain(&events->nh, event, data); 328 } 329