1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2018 Mellanox Technologies
3 
4 #include <linux/mlx5/driver.h>
5 
6 #include "mlx5_core.h"
7 #include "lib/eq.h"
8 #include "lib/mlx5.h"
9 
10 struct mlx5_event_nb {
11 	struct mlx5_nb  nb;
12 	void           *ctx;
13 };
14 
15 /* General events handlers for the low level mlx5_core driver
16  *
17  * Other Major feature specific events such as
18  * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
19  * separate notifiers callbacks, specifically by those mlx5 components.
20  */
21 static int any_notifier(struct notifier_block *, unsigned long, void *);
22 static int temp_warn(struct notifier_block *, unsigned long, void *);
23 static int port_module(struct notifier_block *, unsigned long, void *);
24 
25 /* handler which forwards the event to events->nh, driver notifiers */
26 static int forward_event(struct notifier_block *, unsigned long, void *);
27 
28 static struct mlx5_nb events_nbs_ref[] = {
29 	/* Events to be proccessed by mlx5_core */
30 	{.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
31 	{.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
32 	{.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
33 
34 	/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
35 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
36 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
37 	/* QP/WQ resource events to forward */
38 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
39 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
40 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
41 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
42 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
43 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
44 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
45 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
46 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
47 	/* SRQ events */
48 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
49 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
50 };
51 
52 struct mlx5_events {
53 	struct mlx5_core_dev *dev;
54 	struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
55 	/* driver notifier chain */
56 	struct atomic_notifier_head nh;
57 	/* port module events stats */
58 	struct mlx5_pme_stats pme_stats;
59 };
60 
61 static const char *eqe_type_str(u8 type)
62 {
63 	switch (type) {
64 	case MLX5_EVENT_TYPE_COMP:
65 		return "MLX5_EVENT_TYPE_COMP";
66 	case MLX5_EVENT_TYPE_PATH_MIG:
67 		return "MLX5_EVENT_TYPE_PATH_MIG";
68 	case MLX5_EVENT_TYPE_COMM_EST:
69 		return "MLX5_EVENT_TYPE_COMM_EST";
70 	case MLX5_EVENT_TYPE_SQ_DRAINED:
71 		return "MLX5_EVENT_TYPE_SQ_DRAINED";
72 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
73 		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
74 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
75 		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
76 	case MLX5_EVENT_TYPE_CQ_ERROR:
77 		return "MLX5_EVENT_TYPE_CQ_ERROR";
78 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
79 		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
80 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
81 		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
82 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
83 		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
84 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
85 		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
86 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
87 		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
88 	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
89 		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
90 	case MLX5_EVENT_TYPE_PORT_CHANGE:
91 		return "MLX5_EVENT_TYPE_PORT_CHANGE";
92 	case MLX5_EVENT_TYPE_GPIO_EVENT:
93 		return "MLX5_EVENT_TYPE_GPIO_EVENT";
94 	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
95 		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
96 	case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
97 		return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
98 	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
99 		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
100 	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
101 		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
102 	case MLX5_EVENT_TYPE_STALL_EVENT:
103 		return "MLX5_EVENT_TYPE_STALL_EVENT";
104 	case MLX5_EVENT_TYPE_CMD:
105 		return "MLX5_EVENT_TYPE_CMD";
106 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
107 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
108 	case MLX5_EVENT_TYPE_PAGE_FAULT:
109 		return "MLX5_EVENT_TYPE_PAGE_FAULT";
110 	case MLX5_EVENT_TYPE_PPS_EVENT:
111 		return "MLX5_EVENT_TYPE_PPS_EVENT";
112 	case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
113 		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
114 	case MLX5_EVENT_TYPE_FPGA_ERROR:
115 		return "MLX5_EVENT_TYPE_FPGA_ERROR";
116 	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
117 		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
118 	case MLX5_EVENT_TYPE_GENERAL_EVENT:
119 		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
120 	case MLX5_EVENT_TYPE_MONITOR_COUNTER:
121 		return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
122 	case MLX5_EVENT_TYPE_DEVICE_TRACER:
123 		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
124 	default:
125 		return "Unrecognized event";
126 	}
127 }
128 
129 /* handles all FW events, type == eqe->type */
130 static int any_notifier(struct notifier_block *nb,
131 			unsigned long type, void *data)
132 {
133 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
134 	struct mlx5_events   *events   = event_nb->ctx;
135 	struct mlx5_eqe      *eqe      = data;
136 
137 	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
138 		      eqe_type_str(eqe->type), eqe->sub_type);
139 	return NOTIFY_OK;
140 }
141 
142 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
143 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
144 {
145 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
146 	struct mlx5_events   *events   = event_nb->ctx;
147 	struct mlx5_eqe      *eqe      = data;
148 	u64 value_lsb;
149 	u64 value_msb;
150 
151 	value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
152 	value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
153 
154 	mlx5_core_warn(events->dev,
155 		       "High temperature on sensors with bit set %llx %llx",
156 		       value_msb, value_lsb);
157 
158 	return NOTIFY_OK;
159 }
160 
161 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
162 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
163 {
164 	switch (status) {
165 	case MLX5_MODULE_STATUS_PLUGGED:
166 		return "Cable plugged";
167 	case MLX5_MODULE_STATUS_UNPLUGGED:
168 		return "Cable unplugged";
169 	case MLX5_MODULE_STATUS_ERROR:
170 		return "Cable error";
171 	case MLX5_MODULE_STATUS_DISABLED:
172 		return "Cable disabled";
173 	default:
174 		return "Unknown status";
175 	}
176 }
177 
178 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
179 {
180 	switch (error) {
181 	case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
182 		return "Power budget exceeded";
183 	case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
184 		return "Long Range for non MLNX cable";
185 	case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
186 		return "Bus stuck (I2C or data shorted)";
187 	case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
188 		return "No EEPROM/retry timeout";
189 	case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
190 		return "Enforce part number list";
191 	case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
192 		return "Unknown identifier";
193 	case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
194 		return "High Temperature";
195 	case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
196 		return "Bad or shorted cable/module";
197 	case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
198 		return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
199 	default:
200 		return "Unknown error";
201 	}
202 }
203 
204 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
205 static int port_module(struct notifier_block *nb, unsigned long type, void *data)
206 {
207 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
208 	struct mlx5_events   *events   = event_nb->ctx;
209 	struct mlx5_eqe      *eqe      = data;
210 
211 	enum port_module_event_status_type module_status;
212 	enum port_module_event_error_type error_type;
213 	struct mlx5_eqe_port_module *module_event_eqe;
214 	const char *status_str, *error_str;
215 	u8 module_num;
216 
217 	module_event_eqe = &eqe->data.port_module;
218 	module_num = module_event_eqe->module;
219 	module_status = module_event_eqe->module_status &
220 			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
221 	error_type = module_event_eqe->error_type &
222 		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
223 
224 	if (module_status < MLX5_MODULE_STATUS_NUM)
225 		events->pme_stats.status_counters[module_status]++;
226 	status_str = mlx5_pme_status_to_string(module_status);
227 
228 	if (module_status == MLX5_MODULE_STATUS_ERROR) {
229 		if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
230 			events->pme_stats.error_counters[error_type]++;
231 		error_str = mlx5_pme_error_to_string(error_type);
232 	}
233 
234 	if (!printk_ratelimit())
235 		return NOTIFY_OK;
236 
237 	if (module_status == MLX5_MODULE_STATUS_ERROR)
238 		mlx5_core_err(events->dev,
239 			      "Port module event[error]: module %u, %s, %s\n",
240 			      module_num, status_str, error_str);
241 	else
242 		mlx5_core_info(events->dev,
243 			       "Port module event: module %u, %s\n",
244 			       module_num, status_str);
245 
246 	return NOTIFY_OK;
247 }
248 
249 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
250 {
251 	*stats = dev->priv.events->pme_stats;
252 }
253 
254 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
255 static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
256 {
257 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
258 	struct mlx5_events   *events   = event_nb->ctx;
259 	struct mlx5_eqe      *eqe      = data;
260 
261 	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
262 		      eqe_type_str(eqe->type), eqe->sub_type);
263 	atomic_notifier_call_chain(&events->nh, event, data);
264 	return NOTIFY_OK;
265 }
266 
267 int mlx5_events_init(struct mlx5_core_dev *dev)
268 {
269 	struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
270 
271 	if (!events)
272 		return -ENOMEM;
273 
274 	ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
275 	events->dev = dev;
276 	dev->priv.events = events;
277 	return 0;
278 }
279 
280 void mlx5_events_cleanup(struct mlx5_core_dev *dev)
281 {
282 	kvfree(dev->priv.events);
283 }
284 
285 void mlx5_events_start(struct mlx5_core_dev *dev)
286 {
287 	struct mlx5_events *events = dev->priv.events;
288 	int i;
289 
290 	for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
291 		events->notifiers[i].nb  = events_nbs_ref[i];
292 		events->notifiers[i].ctx = events;
293 		mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
294 	}
295 }
296 
297 void mlx5_events_stop(struct mlx5_core_dev *dev)
298 {
299 	struct mlx5_events *events = dev->priv.events;
300 	int i;
301 
302 	for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
303 		mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
304 }
305 
306 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
307 {
308 	struct mlx5_events *events = dev->priv.events;
309 
310 	return atomic_notifier_chain_register(&events->nh, nb);
311 }
312 EXPORT_SYMBOL(mlx5_notifier_register);
313 
314 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
315 {
316 	struct mlx5_events *events = dev->priv.events;
317 
318 	return atomic_notifier_chain_unregister(&events->nh, nb);
319 }
320 EXPORT_SYMBOL(mlx5_notifier_unregister);
321 
322 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
323 {
324 	return atomic_notifier_call_chain(&events->nh, event, data);
325 }
326