1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */
2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */
3de8650a8SEran Ben Elisha 
44edc17fdSAya Levin #include "health.h"
5145e5637SEran Ben Elisha #include "en/ptp.h"
6c27971d0SRoi Dayan #include "en/devlink.h"
732def412SAmir Tzin #include "lib/tout.h"
8de8650a8SEran Ben Elisha 
9de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
10de8650a8SEran Ben Elisha {
1132def412SAmir Tzin 	struct mlx5_core_dev *dev = sq->mdev;
1232def412SAmir Tzin 	unsigned long exp_time;
1332def412SAmir Tzin 
1432def412SAmir Tzin 	exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
15de8650a8SEran Ben Elisha 
16de8650a8SEran Ben Elisha 	while (time_before(jiffies, exp_time)) {
17de8650a8SEran Ben Elisha 		if (sq->cc == sq->pc)
18de8650a8SEran Ben Elisha 			return 0;
19de8650a8SEran Ben Elisha 
20de8650a8SEran Ben Elisha 		msleep(20);
21de8650a8SEran Ben Elisha 	}
22de8650a8SEran Ben Elisha 
234ad40d8eSEran Ben Elisha 	netdev_err(sq->netdev,
24de8650a8SEran Ben Elisha 		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
25de8650a8SEran Ben Elisha 		   sq->sqn, sq->cc, sq->pc);
26de8650a8SEran Ben Elisha 
27de8650a8SEran Ben Elisha 	return -ETIMEDOUT;
28de8650a8SEran Ben Elisha }
29de8650a8SEran Ben Elisha 
30de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
31de8650a8SEran Ben Elisha {
32de8650a8SEran Ben Elisha 	WARN_ONCE(sq->cc != sq->pc,
33de8650a8SEran Ben Elisha 		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
34de8650a8SEran Ben Elisha 		  sq->sqn, sq->cc, sq->pc);
35de8650a8SEran Ben Elisha 	sq->cc = 0;
36de8650a8SEran Ben Elisha 	sq->dma_fifo_cc = 0;
37de8650a8SEran Ben Elisha 	sq->pc = 0;
38de8650a8SEran Ben Elisha }
39de8650a8SEran Ben Elisha 
40c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
41de8650a8SEran Ben Elisha {
42c50de4afSAya Levin 	struct mlx5_core_dev *mdev;
43c50de4afSAya Levin 	struct net_device *dev;
44c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
45de8650a8SEran Ben Elisha 	u8 state;
46de8650a8SEran Ben Elisha 	int err;
47de8650a8SEran Ben Elisha 
48c50de4afSAya Levin 	sq = ctx;
494ad40d8eSEran Ben Elisha 	mdev = sq->mdev;
504ad40d8eSEran Ben Elisha 	dev = sq->netdev;
51c50de4afSAya Levin 
52c50de4afSAya Levin 	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
53c50de4afSAya Levin 		return 0;
54c50de4afSAya Levin 
55de8650a8SEran Ben Elisha 	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
56de8650a8SEran Ben Elisha 	if (err) {
57de8650a8SEran Ben Elisha 		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
58de8650a8SEran Ben Elisha 			   sq->sqn, err);
59276d197eSAya Levin 		goto out;
60de8650a8SEran Ben Elisha 	}
61de8650a8SEran Ben Elisha 
62d9a2fcf5SAya Levin 	if (state != MLX5_SQC_STATE_ERR)
63276d197eSAya Levin 		goto out;
64de8650a8SEran Ben Elisha 
65de8650a8SEran Ben Elisha 	mlx5e_tx_disable_queue(sq->txq);
66de8650a8SEran Ben Elisha 
67de8650a8SEran Ben Elisha 	err = mlx5e_wait_for_sq_flush(sq);
68de8650a8SEran Ben Elisha 	if (err)
69276d197eSAya Levin 		goto out;
70de8650a8SEran Ben Elisha 
71de8650a8SEran Ben Elisha 	/* At this point, no new packets will arrive from the stack as TXQ is
72de8650a8SEran Ben Elisha 	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
73de8650a8SEran Ben Elisha 	 * pending WQEs. SQ can safely reset the SQ.
74de8650a8SEran Ben Elisha 	 */
75de8650a8SEran Ben Elisha 
764ad40d8eSEran Ben Elisha 	err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn);
77de8650a8SEran Ben Elisha 	if (err)
78276d197eSAya Levin 		goto out;
79de8650a8SEran Ben Elisha 
80de8650a8SEran Ben Elisha 	mlx5e_reset_txqsq_cc_pc(sq);
81de8650a8SEran Ben Elisha 	sq->stats->recover++;
82276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
83de8650a8SEran Ben Elisha 	mlx5e_activate_txqsq(sq);
8479efecb4SMaxim Mikityanskiy 	if (sq->channel)
8579efecb4SMaxim Mikityanskiy 		mlx5e_trigger_napi_icosq(sq->channel);
8679efecb4SMaxim Mikityanskiy 	else
8779efecb4SMaxim Mikityanskiy 		mlx5e_trigger_napi_sched(sq->cq.napi);
88de8650a8SEran Ben Elisha 
89de8650a8SEran Ben Elisha 	return 0;
90276d197eSAya Levin out:
91276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
92276d197eSAya Levin 	return err;
93de8650a8SEran Ben Elisha }
94de8650a8SEran Ben Elisha 
95e6205564SAya Levin struct mlx5e_tx_timeout_ctx {
96e6205564SAya Levin 	struct mlx5e_txqsq *sq;
97e6205564SAya Levin 	signed int status;
98e6205564SAya Levin };
99e6205564SAya Levin 
100c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx)
1017d91126bSEran Ben Elisha {
102e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx *to_ctx;
103e6205564SAya Levin 	struct mlx5e_priv *priv;
104c50de4afSAya Levin 	struct mlx5_eq_comp *eq;
105c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
106c50de4afSAya Levin 	int err;
1077d91126bSEran Ben Elisha 
108e6205564SAya Levin 	to_ctx = ctx;
109e6205564SAya Levin 	sq = to_ctx->sq;
110c50de4afSAya Levin 	eq = sq->cq.mcq.eq;
1114ad40d8eSEran Ben Elisha 	priv = sq->priv;
1124ad40d8eSEran Ben Elisha 	err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
113e6205564SAya Levin 	if (!err) {
114e6205564SAya Levin 		to_ctx->status = 0; /* this sq recovered */
115e6205564SAya Levin 		return err;
116e6205564SAya Levin 	}
117e6205564SAya Levin 
118e6205564SAya Levin 	err = mlx5e_safe_reopen_channels(priv);
119e6205564SAya Levin 	if (!err) {
120e6205564SAya Levin 		to_ctx->status = 1; /* all channels recovered */
121e6205564SAya Levin 		return err;
122e6205564SAya Levin 	}
123e6205564SAya Levin 
124e6205564SAya Levin 	to_ctx->status = err;
1257d91126bSEran Ben Elisha 	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
126e6205564SAya Levin 	netdev_err(priv->netdev,
127e6205564SAya Levin 		   "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
128e6205564SAya Levin 		   err);
1297d91126bSEran Ben Elisha 
130c50de4afSAya Levin 	return err;
1317d91126bSEran Ben Elisha }
1327d91126bSEran Ben Elisha 
133de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function.
134de8650a8SEran Ben Elisha  * It can cause a dead lock or a read-after-free.
135de8650a8SEran Ben Elisha  */
136c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
137de8650a8SEran Ben Elisha {
138c50de4afSAya Levin 	return err_ctx->recover(err_ctx->ctx);
139de8650a8SEran Ben Elisha }
140de8650a8SEran Ben Elisha 
141de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
142e7a98105SJiri Pirko 				     void *context,
143e7a98105SJiri Pirko 				     struct netlink_ext_ack *extack)
144de8650a8SEran Ben Elisha {
145de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
146c50de4afSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
147de8650a8SEran Ben Elisha 
148de8650a8SEran Ben Elisha 	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
149c50de4afSAya Levin 			 mlx5e_health_recover_channels(priv);
150de8650a8SEran Ben Elisha }
151de8650a8SEran Ben Elisha 
152de8650a8SEran Ben Elisha static int
153145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
1542d708887SAya Levin 						  struct mlx5e_txqsq *sq, int tc)
155de8650a8SEran Ben Elisha {
156dd921fd2SAya Levin 	bool stopped = netif_xmit_stopped(sq->txq);
1574ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
158dd921fd2SAya Levin 	u8 state;
159de8650a8SEran Ben Elisha 	int err;
160de8650a8SEran Ben Elisha 
161dd921fd2SAya Levin 	err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
162dd921fd2SAya Levin 	if (err)
163dd921fd2SAya Levin 		return err;
164dd921fd2SAya Levin 
1652d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
1662d708887SAya Levin 	if (err)
1672d708887SAya Levin 		return err;
1682d708887SAya Levin 
1692d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
1702d708887SAya Levin 	if (err)
1712d708887SAya Levin 		return err;
1722d708887SAya Levin 
173dd921fd2SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
174de8650a8SEran Ben Elisha 	if (err)
175de8650a8SEran Ben Elisha 		return err;
176de8650a8SEran Ben Elisha 
177de8650a8SEran Ben Elisha 	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
178de8650a8SEran Ben Elisha 	if (err)
179de8650a8SEran Ben Elisha 		return err;
180de8650a8SEran Ben Elisha 
181de8650a8SEran Ben Elisha 	err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
182de8650a8SEran Ben Elisha 	if (err)
183de8650a8SEran Ben Elisha 		return err;
184de8650a8SEran Ben Elisha 
1852d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
1862d708887SAya Levin 	if (err)
1872d708887SAya Levin 		return err;
1882d708887SAya Levin 
1892d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
1902d708887SAya Levin 	if (err)
1912d708887SAya Levin 		return err;
1922d708887SAya Levin 
193d5cbedd7SAya Levin 	err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
1942bf09e60SAya Levin 	if (err)
1952bf09e60SAya Levin 		return err;
1962bf09e60SAya Levin 
197145e5637SEran Ben Elisha 	return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
198145e5637SEran Ben Elisha }
199145e5637SEran Ben Elisha 
200145e5637SEran Ben Elisha static int
201145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
202145e5637SEran Ben Elisha 					struct mlx5e_txqsq *sq, int tc)
203145e5637SEran Ben Elisha {
204145e5637SEran Ben Elisha 	int err;
205145e5637SEran Ben Elisha 
206145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
207145e5637SEran Ben Elisha 	if (err)
208145e5637SEran Ben Elisha 		return err;
209145e5637SEran Ben Elisha 
210145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
211145e5637SEran Ben Elisha 	if (err)
212145e5637SEran Ben Elisha 		return err;
213145e5637SEran Ben Elisha 
214145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc);
21556837c2aSAya Levin 	if (err)
21656837c2aSAya Levin 		return err;
21756837c2aSAya Levin 
218de8650a8SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
219de8650a8SEran Ben Elisha 	if (err)
220de8650a8SEran Ben Elisha 		return err;
221de8650a8SEran Ben Elisha 
222de8650a8SEran Ben Elisha 	return 0;
223de8650a8SEran Ben Elisha }
224de8650a8SEran Ben Elisha 
225145e5637SEran Ben Elisha static int
226145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg,
227145e5637SEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq, int tc)
228145e5637SEran Ben Elisha {
229145e5637SEran Ben Elisha 	int err;
230145e5637SEran Ben Elisha 
231145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
232145e5637SEran Ben Elisha 	if (err)
233145e5637SEran Ben Elisha 		return err;
234145e5637SEran Ben Elisha 
235145e5637SEran Ben Elisha 	err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
236145e5637SEran Ben Elisha 	if (err)
237145e5637SEran Ben Elisha 		return err;
238145e5637SEran Ben Elisha 
2391880bc4eSEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc);
2401880bc4eSEran Ben Elisha 	if (err)
2411880bc4eSEran Ben Elisha 		return err;
2421880bc4eSEran Ben Elisha 
2431880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
2441880bc4eSEran Ben Elisha 	if (err)
2451880bc4eSEran Ben Elisha 		return err;
2461880bc4eSEran Ben Elisha 
2471880bc4eSEran Ben Elisha 	err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg);
2481880bc4eSEran Ben Elisha 	if (err)
2491880bc4eSEran Ben Elisha 		return err;
2501880bc4eSEran Ben Elisha 
2511880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
252145e5637SEran Ben Elisha 	if (err)
253145e5637SEran Ben Elisha 		return err;
254145e5637SEran Ben Elisha 
255145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
256145e5637SEran Ben Elisha 	if (err)
257145e5637SEran Ben Elisha 		return err;
258145e5637SEran Ben Elisha 
259145e5637SEran Ben Elisha 	return 0;
260145e5637SEran Ben Elisha }
261145e5637SEran Ben Elisha 
262145e5637SEran Ben Elisha static int
263145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
264145e5637SEran Ben Elisha 					 struct mlx5e_txqsq *txqsq)
265145e5637SEran Ben Elisha {
266145e5637SEran Ben Elisha 	u32 sq_stride, sq_sz;
26795742c1cSAya Levin 	bool real_time;
268145e5637SEran Ben Elisha 	int err;
269145e5637SEran Ben Elisha 
270145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
271145e5637SEran Ben Elisha 	if (err)
272145e5637SEran Ben Elisha 		return err;
273145e5637SEran Ben Elisha 
27495742c1cSAya Levin 	real_time =  mlx5_is_real_time_sq(txqsq->mdev);
275145e5637SEran Ben Elisha 	sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
276145e5637SEran Ben Elisha 	sq_stride = MLX5_SEND_WQE_BB;
277145e5637SEran Ben Elisha 
278145e5637SEran Ben Elisha 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
279145e5637SEran Ben Elisha 	if (err)
280145e5637SEran Ben Elisha 		return err;
281145e5637SEran Ben Elisha 
282145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
283145e5637SEran Ben Elisha 	if (err)
284145e5637SEran Ben Elisha 		return err;
285145e5637SEran Ben Elisha 
28695742c1cSAya Levin 	err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
28795742c1cSAya Levin 	if (err)
28895742c1cSAya Levin 		return err;
28995742c1cSAya Levin 
290145e5637SEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
291145e5637SEran Ben Elisha 	if (err)
292145e5637SEran Ben Elisha 		return err;
293145e5637SEran Ben Elisha 
294145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
295145e5637SEran Ben Elisha }
296145e5637SEran Ben Elisha 
297145e5637SEran Ben Elisha static int
2981880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg,
2991880bc4eSEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq)
3001880bc4eSEran Ben Elisha {
3011880bc4eSEran Ben Elisha 	int err;
3021880bc4eSEran Ben Elisha 
3031880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
3041880bc4eSEran Ben Elisha 	if (err)
3051880bc4eSEran Ben Elisha 		return err;
3061880bc4eSEran Ben Elisha 
3071880bc4eSEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg);
3081880bc4eSEran Ben Elisha 	if (err)
3091880bc4eSEran Ben Elisha 		return err;
3101880bc4eSEran Ben Elisha 
3111880bc4eSEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3121880bc4eSEran Ben Elisha }
3131880bc4eSEran Ben Elisha 
3141880bc4eSEran Ben Elisha static int
315145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
316145e5637SEran Ben Elisha 					 struct devlink_fmsg *fmsg)
317145e5637SEran Ben Elisha {
318145e5637SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
319145e5637SEran Ben Elisha 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
32024c22dd0SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
321145e5637SEran Ben Elisha 	struct mlx5e_ptpsq *generic_ptpsq;
322145e5637SEran Ben Elisha 	int err;
323145e5637SEran Ben Elisha 
324145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
325145e5637SEran Ben Elisha 	if (err)
326145e5637SEran Ben Elisha 		return err;
327145e5637SEran Ben Elisha 
328145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq);
329145e5637SEran Ben Elisha 	if (err)
330145e5637SEran Ben Elisha 		return err;
331145e5637SEran Ben Elisha 
33224c22dd0SAya Levin 	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
333145e5637SEran Ben Elisha 		goto out;
334145e5637SEran Ben Elisha 
33524c22dd0SAya Levin 	generic_ptpsq = &ptp_ch->ptpsq[0];
33624c22dd0SAya Levin 
337145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
338145e5637SEran Ben Elisha 	if (err)
339145e5637SEran Ben Elisha 		return err;
340145e5637SEran Ben Elisha 
341145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq);
342145e5637SEran Ben Elisha 	if (err)
343145e5637SEran Ben Elisha 		return err;
344145e5637SEran Ben Elisha 
3451880bc4eSEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq);
3461880bc4eSEran Ben Elisha 	if (err)
3471880bc4eSEran Ben Elisha 		return err;
3481880bc4eSEran Ben Elisha 
349145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
350145e5637SEran Ben Elisha 	if (err)
351145e5637SEran Ben Elisha 		return err;
352145e5637SEran Ben Elisha 
353145e5637SEran Ben Elisha out:
354145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
355145e5637SEran Ben Elisha }
356145e5637SEran Ben Elisha 
357de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
358e7a98105SJiri Pirko 				      struct devlink_fmsg *fmsg,
359e7a98105SJiri Pirko 				      struct netlink_ext_ack *extack)
360de8650a8SEran Ben Elisha {
361de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
362b0d35de4SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
3632d708887SAya Levin 
3642d708887SAya Levin 	int i, tc, err = 0;
365de8650a8SEran Ben Elisha 
366de8650a8SEran Ben Elisha 	mutex_lock(&priv->state_lock);
367de8650a8SEran Ben Elisha 
368de8650a8SEran Ben Elisha 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
369de8650a8SEran Ben Elisha 		goto unlock;
370de8650a8SEran Ben Elisha 
371145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg);
3722d708887SAya Levin 	if (err)
3732d708887SAya Levin 		goto unlock;
3742d708887SAya Levin 
375de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
376de8650a8SEran Ben Elisha 	if (err)
377de8650a8SEran Ben Elisha 		goto unlock;
378de8650a8SEran Ben Elisha 
3792d708887SAya Levin 	for (i = 0; i < priv->channels.num; i++) {
3802d708887SAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
381de8650a8SEran Ben Elisha 
38286d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
3832d708887SAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
3842d708887SAya Levin 
3852d708887SAya Levin 			err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
386de8650a8SEran Ben Elisha 			if (err)
38799d31cbdSAya Levin 				goto unlock;
388de8650a8SEran Ben Elisha 		}
3892d708887SAya Levin 	}
390145e5637SEran Ben Elisha 
39124c22dd0SAya Levin 	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
392145e5637SEran Ben Elisha 		goto close_sqs_nest;
393145e5637SEran Ben Elisha 
39486d747a3STariq Toukan 	for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
395145e5637SEran Ben Elisha 		err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
396145e5637SEran Ben Elisha 								    &ptp_ch->ptpsq[tc],
397145e5637SEran Ben Elisha 								    tc);
398145e5637SEran Ben Elisha 		if (err)
399145e5637SEran Ben Elisha 			goto unlock;
400145e5637SEran Ben Elisha 	}
401145e5637SEran Ben Elisha 
402145e5637SEran Ben Elisha close_sqs_nest:
403de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
404de8650a8SEran Ben Elisha 	if (err)
405de8650a8SEran Ben Elisha 		goto unlock;
406de8650a8SEran Ben Elisha 
407de8650a8SEran Ben Elisha unlock:
408de8650a8SEran Ben Elisha 	mutex_unlock(&priv->state_lock);
409de8650a8SEran Ben Elisha 	return err;
410de8650a8SEran Ben Elisha }
411de8650a8SEran Ben Elisha 
4125f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
4135f29458bSAya Levin 				     void *ctx)
4145f29458bSAya Levin {
4155f29458bSAya Levin 	struct mlx5_rsc_key key = {};
4165f29458bSAya Levin 	struct mlx5e_txqsq *sq = ctx;
4175f29458bSAya Levin 	int err;
4185f29458bSAya Levin 
4195f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4205f29458bSAya Levin 		return 0;
4215f29458bSAya Levin 
422d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
4235f29458bSAya Levin 	if (err)
4245f29458bSAya Levin 		return err;
4255f29458bSAya Levin 
4265f29458bSAya Levin 	key.size = PAGE_SIZE;
4275f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
4285f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4295f29458bSAya Levin 	if (err)
4305f29458bSAya Levin 		return err;
4315f29458bSAya Levin 
432d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4335f29458bSAya Levin 	if (err)
4345f29458bSAya Levin 		return err;
4355f29458bSAya Levin 
436d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
4375f29458bSAya Levin 	if (err)
4385f29458bSAya Levin 		return err;
4395f29458bSAya Levin 
440d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
4415f29458bSAya Levin 	if (err)
4425f29458bSAya Levin 		return err;
4435f29458bSAya Levin 
4445f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
4455f29458bSAya Levin 	key.index1 = sq->sqn;
4465f29458bSAya Levin 	key.num_of_obj1 = 1;
4475f29458bSAya Levin 
4485f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4495f29458bSAya Levin 	if (err)
4505f29458bSAya Levin 		return err;
4515f29458bSAya Levin 
452d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4535f29458bSAya Levin 	if (err)
4545f29458bSAya Levin 		return err;
4555f29458bSAya Levin 
456d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
4575f29458bSAya Levin 	if (err)
4585f29458bSAya Levin 		return err;
4595f29458bSAya Levin 
4605f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
4615f29458bSAya Levin 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
4625f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4635f29458bSAya Levin 	if (err)
4645f29458bSAya Levin 		return err;
4655f29458bSAya Levin 
466d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4675f29458bSAya Levin 	if (err)
4685f29458bSAya Levin 		return err;
4695f29458bSAya Levin 
470d5cbedd7SAya Levin 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4715f29458bSAya Levin }
4725f29458bSAya Levin 
473918fc385SAmir Tzin static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
474918fc385SAmir Tzin 					  void *ctx)
475918fc385SAmir Tzin {
476918fc385SAmir Tzin 	struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
477918fc385SAmir Tzin 
478918fc385SAmir Tzin 	return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
479918fc385SAmir Tzin }
480918fc385SAmir Tzin 
4815f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
4825f29458bSAya Levin 					  struct devlink_fmsg *fmsg)
4835f29458bSAya Levin {
484b0d35de4SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
4855f29458bSAya Levin 	struct mlx5_rsc_key key = {};
4865f29458bSAya Levin 	int i, tc, err;
4875f29458bSAya Levin 
4885f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4895f29458bSAya Levin 		return 0;
4905f29458bSAya Levin 
491d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
4925f29458bSAya Levin 	if (err)
4935f29458bSAya Levin 		return err;
4945f29458bSAya Levin 
4955f29458bSAya Levin 	key.size = PAGE_SIZE;
4965f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
4975f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4985f29458bSAya Levin 	if (err)
4995f29458bSAya Levin 		return err;
5005f29458bSAya Levin 
501d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
5025f29458bSAya Levin 	if (err)
5035f29458bSAya Levin 		return err;
5045f29458bSAya Levin 
5055f29458bSAya Levin 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
5065f29458bSAya Levin 	if (err)
5075f29458bSAya Levin 		return err;
5085f29458bSAya Levin 
5095f29458bSAya Levin 	for (i = 0; i < priv->channels.num; i++) {
5105f29458bSAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
5115f29458bSAya Levin 
51286d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
5135f29458bSAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
5145f29458bSAya Levin 
5155f29458bSAya Levin 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
5165f29458bSAya Levin 			if (err)
5175f29458bSAya Levin 				return err;
5185f29458bSAya Levin 		}
5195f29458bSAya Levin 	}
520145e5637SEran Ben Elisha 
52124c22dd0SAya Levin 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
52286d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
523145e5637SEran Ben Elisha 			struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
524145e5637SEran Ben Elisha 
525145e5637SEran Ben Elisha 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
526145e5637SEran Ben Elisha 			if (err)
527145e5637SEran Ben Elisha 				return err;
528145e5637SEran Ben Elisha 		}
529145e5637SEran Ben Elisha 	}
530145e5637SEran Ben Elisha 
5315f29458bSAya Levin 	return devlink_fmsg_arr_pair_nest_end(fmsg);
5325f29458bSAya Levin }
5335f29458bSAya Levin 
5345f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
5355f29458bSAya Levin 					   struct mlx5e_err_ctx *err_ctx,
5365f29458bSAya Levin 					   struct devlink_fmsg *fmsg)
5375f29458bSAya Levin {
5385f29458bSAya Levin 	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
5395f29458bSAya Levin }
5405f29458bSAya Levin 
5415f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
5425f29458bSAya Levin 				  struct devlink_fmsg *fmsg, void *context,
5435f29458bSAya Levin 				  struct netlink_ext_ack *extack)
5445f29458bSAya Levin {
5455f29458bSAya Levin 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
5465f29458bSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
5475f29458bSAya Levin 
5485f29458bSAya Levin 	return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
5495f29458bSAya Levin 			 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
5505f29458bSAya Levin }
5515f29458bSAya Levin 
5520a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
5530a56be3cSAya Levin {
5540a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
5554ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
5560a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
5570a56be3cSAya Levin 
5580a56be3cSAya Levin 	err_ctx.ctx = sq;
5590a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
5605f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
561b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
5620a56be3cSAya Levin 
5630a56be3cSAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
5640a56be3cSAya Levin }
5650a56be3cSAya Levin 
5660a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
5670a56be3cSAya Levin {
5680a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
569e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx to_ctx = {};
5704ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
5710a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
5720a56be3cSAya Levin 
573e6205564SAya Levin 	to_ctx.sq = sq;
574e6205564SAya Levin 	err_ctx.ctx = &to_ctx;
5750a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
576918fc385SAmir Tzin 	err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
577b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str),
578b21aef7eSJoe Perches 		 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
5794ad40d8eSEran Ben Elisha 		 sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
5805337824fSEric Dumazet 		 jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
5810a56be3cSAya Levin 
582e6205564SAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
583e6205564SAya Levin 	return to_ctx.status;
5840a56be3cSAya Levin }
5850a56be3cSAya Levin 
586de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
587de8650a8SEran Ben Elisha 		.name = "tx",
588de8650a8SEran Ben Elisha 		.recover = mlx5e_tx_reporter_recover,
589de8650a8SEran Ben Elisha 		.diagnose = mlx5e_tx_reporter_diagnose,
5905f29458bSAya Levin 		.dump = mlx5e_tx_reporter_dump,
591de8650a8SEran Ben Elisha };
592de8650a8SEran Ben Elisha 
593de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
594de8650a8SEran Ben Elisha 
595b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
596de8650a8SEran Ben Elisha {
597baf6dfdbSAya Levin 	struct devlink_health_reporter *reporter;
598de8650a8SEran Ben Elisha 
599*bc1536f3SJiri Pirko 	reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port,
600*bc1536f3SJiri Pirko 						       &mlx5_tx_reporter_ops,
601b7e93bb6SVladyslav Tarasiuk 						       MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
602baf6dfdbSAya Levin 	if (IS_ERR(reporter)) {
603de8650a8SEran Ben Elisha 		netdev_warn(priv->netdev,
604de8650a8SEran Ben Elisha 			    "Failed to create tx reporter, err = %ld\n",
605baf6dfdbSAya Levin 			    PTR_ERR(reporter));
606b3ea4c4fSEran Ben Elisha 		return;
6077f7cc235SAya Levin 	}
608baf6dfdbSAya Levin 	priv->tx_reporter = reporter;
609de8650a8SEran Ben Elisha }
610de8650a8SEran Ben Elisha 
61106293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
612de8650a8SEran Ben Elisha {
613baf6dfdbSAya Levin 	if (!priv->tx_reporter)
614de8650a8SEran Ben Elisha 		return;
615de8650a8SEran Ben Elisha 
6169f167327SJiri Pirko 	devlink_health_reporter_destroy(priv->tx_reporter);
6177a9fb35eSRoi Dayan 	priv->tx_reporter = NULL;
618de8650a8SEran Ben Elisha }
619