1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */
2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */
3de8650a8SEran Ben Elisha 
44edc17fdSAya Levin #include "health.h"
5*145e5637SEran Ben Elisha #include "en/ptp.h"
6de8650a8SEran Ben Elisha 
7de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
8de8650a8SEran Ben Elisha {
9e74e28aeSAya Levin 	unsigned long exp_time = jiffies +
10e74e28aeSAya Levin 				 msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
11de8650a8SEran Ben Elisha 
12de8650a8SEran Ben Elisha 	while (time_before(jiffies, exp_time)) {
13de8650a8SEran Ben Elisha 		if (sq->cc == sq->pc)
14de8650a8SEran Ben Elisha 			return 0;
15de8650a8SEran Ben Elisha 
16de8650a8SEran Ben Elisha 		msleep(20);
17de8650a8SEran Ben Elisha 	}
18de8650a8SEran Ben Elisha 
194ad40d8eSEran Ben Elisha 	netdev_err(sq->netdev,
20de8650a8SEran Ben Elisha 		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
21de8650a8SEran Ben Elisha 		   sq->sqn, sq->cc, sq->pc);
22de8650a8SEran Ben Elisha 
23de8650a8SEran Ben Elisha 	return -ETIMEDOUT;
24de8650a8SEran Ben Elisha }
25de8650a8SEran Ben Elisha 
26de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
27de8650a8SEran Ben Elisha {
28de8650a8SEran Ben Elisha 	WARN_ONCE(sq->cc != sq->pc,
29de8650a8SEran Ben Elisha 		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
30de8650a8SEran Ben Elisha 		  sq->sqn, sq->cc, sq->pc);
31de8650a8SEran Ben Elisha 	sq->cc = 0;
32de8650a8SEran Ben Elisha 	sq->dma_fifo_cc = 0;
33de8650a8SEran Ben Elisha 	sq->pc = 0;
34de8650a8SEran Ben Elisha }
35de8650a8SEran Ben Elisha 
36c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
37de8650a8SEran Ben Elisha {
38c50de4afSAya Levin 	struct mlx5_core_dev *mdev;
39c50de4afSAya Levin 	struct net_device *dev;
40c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
41de8650a8SEran Ben Elisha 	u8 state;
42de8650a8SEran Ben Elisha 	int err;
43de8650a8SEran Ben Elisha 
44c50de4afSAya Levin 	sq = ctx;
454ad40d8eSEran Ben Elisha 	mdev = sq->mdev;
464ad40d8eSEran Ben Elisha 	dev = sq->netdev;
47c50de4afSAya Levin 
48c50de4afSAya Levin 	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
49c50de4afSAya Levin 		return 0;
50c50de4afSAya Levin 
51de8650a8SEran Ben Elisha 	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
52de8650a8SEran Ben Elisha 	if (err) {
53de8650a8SEran Ben Elisha 		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
54de8650a8SEran Ben Elisha 			   sq->sqn, err);
55276d197eSAya Levin 		goto out;
56de8650a8SEran Ben Elisha 	}
57de8650a8SEran Ben Elisha 
58d9a2fcf5SAya Levin 	if (state != MLX5_SQC_STATE_ERR)
59276d197eSAya Levin 		goto out;
60de8650a8SEran Ben Elisha 
61de8650a8SEran Ben Elisha 	mlx5e_tx_disable_queue(sq->txq);
62de8650a8SEran Ben Elisha 
63de8650a8SEran Ben Elisha 	err = mlx5e_wait_for_sq_flush(sq);
64de8650a8SEran Ben Elisha 	if (err)
65276d197eSAya Levin 		goto out;
66de8650a8SEran Ben Elisha 
67de8650a8SEran Ben Elisha 	/* At this point, no new packets will arrive from the stack as TXQ is
68de8650a8SEran Ben Elisha 	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
69de8650a8SEran Ben Elisha 	 * pending WQEs. SQ can safely reset the SQ.
70de8650a8SEran Ben Elisha 	 */
71de8650a8SEran Ben Elisha 
724ad40d8eSEran Ben Elisha 	err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn);
73de8650a8SEran Ben Elisha 	if (err)
74276d197eSAya Levin 		goto out;
75de8650a8SEran Ben Elisha 
76de8650a8SEran Ben Elisha 	mlx5e_reset_txqsq_cc_pc(sq);
77de8650a8SEran Ben Elisha 	sq->stats->recover++;
78276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
79de8650a8SEran Ben Elisha 	mlx5e_activate_txqsq(sq);
80de8650a8SEran Ben Elisha 
81de8650a8SEran Ben Elisha 	return 0;
82276d197eSAya Levin out:
83276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
84276d197eSAya Levin 	return err;
85de8650a8SEran Ben Elisha }
86de8650a8SEran Ben Elisha 
87e6205564SAya Levin struct mlx5e_tx_timeout_ctx {
88e6205564SAya Levin 	struct mlx5e_txqsq *sq;
89e6205564SAya Levin 	signed int status;
90e6205564SAya Levin };
91e6205564SAya Levin 
92c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx)
937d91126bSEran Ben Elisha {
94e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx *to_ctx;
95e6205564SAya Levin 	struct mlx5e_priv *priv;
96c50de4afSAya Levin 	struct mlx5_eq_comp *eq;
97c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
98c50de4afSAya Levin 	int err;
997d91126bSEran Ben Elisha 
100e6205564SAya Levin 	to_ctx = ctx;
101e6205564SAya Levin 	sq = to_ctx->sq;
102c50de4afSAya Levin 	eq = sq->cq.mcq.eq;
1034ad40d8eSEran Ben Elisha 	priv = sq->priv;
1044ad40d8eSEran Ben Elisha 	err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
105e6205564SAya Levin 	if (!err) {
106e6205564SAya Levin 		to_ctx->status = 0; /* this sq recovered */
107e6205564SAya Levin 		return err;
108e6205564SAya Levin 	}
109e6205564SAya Levin 
110e6205564SAya Levin 	err = mlx5e_safe_reopen_channels(priv);
111e6205564SAya Levin 	if (!err) {
112e6205564SAya Levin 		to_ctx->status = 1; /* all channels recovered */
113e6205564SAya Levin 		return err;
114e6205564SAya Levin 	}
115e6205564SAya Levin 
116e6205564SAya Levin 	to_ctx->status = err;
1177d91126bSEran Ben Elisha 	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
118e6205564SAya Levin 	netdev_err(priv->netdev,
119e6205564SAya Levin 		   "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
120e6205564SAya Levin 		   err);
1217d91126bSEran Ben Elisha 
122c50de4afSAya Levin 	return err;
1237d91126bSEran Ben Elisha }
1247d91126bSEran Ben Elisha 
125de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function.
126de8650a8SEran Ben Elisha  * It can cause a dead lock or a read-after-free.
127de8650a8SEran Ben Elisha  */
128c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
129de8650a8SEran Ben Elisha {
130c50de4afSAya Levin 	return err_ctx->recover(err_ctx->ctx);
131de8650a8SEran Ben Elisha }
132de8650a8SEran Ben Elisha 
133de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
134e7a98105SJiri Pirko 				     void *context,
135e7a98105SJiri Pirko 				     struct netlink_ext_ack *extack)
136de8650a8SEran Ben Elisha {
137de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
138c50de4afSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
139de8650a8SEran Ben Elisha 
140de8650a8SEran Ben Elisha 	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
141c50de4afSAya Levin 			 mlx5e_health_recover_channels(priv);
142de8650a8SEran Ben Elisha }
143de8650a8SEran Ben Elisha 
144de8650a8SEran Ben Elisha static int
145*145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
1462d708887SAya Levin 						  struct mlx5e_txqsq *sq, int tc)
147de8650a8SEran Ben Elisha {
148dd921fd2SAya Levin 	bool stopped = netif_xmit_stopped(sq->txq);
1494ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
150dd921fd2SAya Levin 	u8 state;
151de8650a8SEran Ben Elisha 	int err;
152de8650a8SEran Ben Elisha 
153dd921fd2SAya Levin 	err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
154dd921fd2SAya Levin 	if (err)
155dd921fd2SAya Levin 		return err;
156dd921fd2SAya Levin 
1572d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
1582d708887SAya Levin 	if (err)
1592d708887SAya Levin 		return err;
1602d708887SAya Levin 
1612d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
1622d708887SAya Levin 	if (err)
1632d708887SAya Levin 		return err;
1642d708887SAya Levin 
165dd921fd2SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
166de8650a8SEran Ben Elisha 	if (err)
167de8650a8SEran Ben Elisha 		return err;
168de8650a8SEran Ben Elisha 
169de8650a8SEran Ben Elisha 	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
170de8650a8SEran Ben Elisha 	if (err)
171de8650a8SEran Ben Elisha 		return err;
172de8650a8SEran Ben Elisha 
173de8650a8SEran Ben Elisha 	err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
174de8650a8SEran Ben Elisha 	if (err)
175de8650a8SEran Ben Elisha 		return err;
176de8650a8SEran Ben Elisha 
1772d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
1782d708887SAya Levin 	if (err)
1792d708887SAya Levin 		return err;
1802d708887SAya Levin 
1812d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
1822d708887SAya Levin 	if (err)
1832d708887SAya Levin 		return err;
1842d708887SAya Levin 
185d5cbedd7SAya Levin 	err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
1862bf09e60SAya Levin 	if (err)
1872bf09e60SAya Levin 		return err;
1882bf09e60SAya Levin 
189*145e5637SEran Ben Elisha 	return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
190*145e5637SEran Ben Elisha }
191*145e5637SEran Ben Elisha 
192*145e5637SEran Ben Elisha static int
193*145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
194*145e5637SEran Ben Elisha 					struct mlx5e_txqsq *sq, int tc)
195*145e5637SEran Ben Elisha {
196*145e5637SEran Ben Elisha 	int err;
197*145e5637SEran Ben Elisha 
198*145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
199*145e5637SEran Ben Elisha 	if (err)
200*145e5637SEran Ben Elisha 		return err;
201*145e5637SEran Ben Elisha 
202*145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
203*145e5637SEran Ben Elisha 	if (err)
204*145e5637SEran Ben Elisha 		return err;
205*145e5637SEran Ben Elisha 
206*145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc);
20756837c2aSAya Levin 	if (err)
20856837c2aSAya Levin 		return err;
20956837c2aSAya Levin 
210de8650a8SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
211de8650a8SEran Ben Elisha 	if (err)
212de8650a8SEran Ben Elisha 		return err;
213de8650a8SEran Ben Elisha 
214de8650a8SEran Ben Elisha 	return 0;
215de8650a8SEran Ben Elisha }
216de8650a8SEran Ben Elisha 
217*145e5637SEran Ben Elisha static int
218*145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg,
219*145e5637SEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq, int tc)
220*145e5637SEran Ben Elisha {
221*145e5637SEran Ben Elisha 	int err;
222*145e5637SEran Ben Elisha 
223*145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
224*145e5637SEran Ben Elisha 	if (err)
225*145e5637SEran Ben Elisha 		return err;
226*145e5637SEran Ben Elisha 
227*145e5637SEran Ben Elisha 	err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
228*145e5637SEran Ben Elisha 	if (err)
229*145e5637SEran Ben Elisha 		return err;
230*145e5637SEran Ben Elisha 
231*145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg,
232*145e5637SEran Ben Elisha 								&ptpsq->txqsq,
233*145e5637SEran Ben Elisha 								tc);
234*145e5637SEran Ben Elisha 	if (err)
235*145e5637SEran Ben Elisha 		return err;
236*145e5637SEran Ben Elisha 
237*145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
238*145e5637SEran Ben Elisha 	if (err)
239*145e5637SEran Ben Elisha 		return err;
240*145e5637SEran Ben Elisha 
241*145e5637SEran Ben Elisha 	return 0;
242*145e5637SEran Ben Elisha }
243*145e5637SEran Ben Elisha 
244*145e5637SEran Ben Elisha static int
245*145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
246*145e5637SEran Ben Elisha 					 struct mlx5e_txqsq *txqsq)
247*145e5637SEran Ben Elisha {
248*145e5637SEran Ben Elisha 	u32 sq_stride, sq_sz;
249*145e5637SEran Ben Elisha 	int err;
250*145e5637SEran Ben Elisha 
251*145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
252*145e5637SEran Ben Elisha 	if (err)
253*145e5637SEran Ben Elisha 		return err;
254*145e5637SEran Ben Elisha 
255*145e5637SEran Ben Elisha 	sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
256*145e5637SEran Ben Elisha 	sq_stride = MLX5_SEND_WQE_BB;
257*145e5637SEran Ben Elisha 
258*145e5637SEran Ben Elisha 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
259*145e5637SEran Ben Elisha 	if (err)
260*145e5637SEran Ben Elisha 		return err;
261*145e5637SEran Ben Elisha 
262*145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
263*145e5637SEran Ben Elisha 	if (err)
264*145e5637SEran Ben Elisha 		return err;
265*145e5637SEran Ben Elisha 
266*145e5637SEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
267*145e5637SEran Ben Elisha 	if (err)
268*145e5637SEran Ben Elisha 		return err;
269*145e5637SEran Ben Elisha 
270*145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
271*145e5637SEran Ben Elisha }
272*145e5637SEran Ben Elisha 
273*145e5637SEran Ben Elisha static int
274*145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
275*145e5637SEran Ben Elisha 					 struct devlink_fmsg *fmsg)
276*145e5637SEran Ben Elisha {
277*145e5637SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
278*145e5637SEran Ben Elisha 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
279*145e5637SEran Ben Elisha 	struct mlx5e_ptpsq *generic_ptpsq;
280*145e5637SEran Ben Elisha 	int err;
281*145e5637SEran Ben Elisha 
282*145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
283*145e5637SEran Ben Elisha 	if (err)
284*145e5637SEran Ben Elisha 		return err;
285*145e5637SEran Ben Elisha 
286*145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq);
287*145e5637SEran Ben Elisha 	if (err)
288*145e5637SEran Ben Elisha 		return err;
289*145e5637SEran Ben Elisha 
290*145e5637SEran Ben Elisha 	generic_ptpsq = priv->channels.port_ptp ?
291*145e5637SEran Ben Elisha 			&priv->channels.port_ptp->ptpsq[0] :
292*145e5637SEran Ben Elisha 			NULL;
293*145e5637SEran Ben Elisha 	if (!generic_ptpsq)
294*145e5637SEran Ben Elisha 		goto out;
295*145e5637SEran Ben Elisha 
296*145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
297*145e5637SEran Ben Elisha 	if (err)
298*145e5637SEran Ben Elisha 		return err;
299*145e5637SEran Ben Elisha 
300*145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq);
301*145e5637SEran Ben Elisha 	if (err)
302*145e5637SEran Ben Elisha 		return err;
303*145e5637SEran Ben Elisha 
304*145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
305*145e5637SEran Ben Elisha 	if (err)
306*145e5637SEran Ben Elisha 		return err;
307*145e5637SEran Ben Elisha 
308*145e5637SEran Ben Elisha out:
309*145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
310*145e5637SEran Ben Elisha }
311*145e5637SEran Ben Elisha 
312de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
313e7a98105SJiri Pirko 				      struct devlink_fmsg *fmsg,
314e7a98105SJiri Pirko 				      struct netlink_ext_ack *extack)
315de8650a8SEran Ben Elisha {
316de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
317*145e5637SEran Ben Elisha 	struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp;
3182d708887SAya Levin 
3192d708887SAya Levin 	int i, tc, err = 0;
320de8650a8SEran Ben Elisha 
321de8650a8SEran Ben Elisha 	mutex_lock(&priv->state_lock);
322de8650a8SEran Ben Elisha 
323de8650a8SEran Ben Elisha 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
324de8650a8SEran Ben Elisha 		goto unlock;
325de8650a8SEran Ben Elisha 
326*145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg);
3272d708887SAya Levin 	if (err)
3282d708887SAya Levin 		goto unlock;
3292d708887SAya Levin 
330de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
331de8650a8SEran Ben Elisha 	if (err)
332de8650a8SEran Ben Elisha 		goto unlock;
333de8650a8SEran Ben Elisha 
3342d708887SAya Levin 	for (i = 0; i < priv->channels.num; i++) {
3352d708887SAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
336de8650a8SEran Ben Elisha 
3372d708887SAya Levin 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
3382d708887SAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
3392d708887SAya Levin 
3402d708887SAya Levin 			err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
341de8650a8SEran Ben Elisha 			if (err)
34299d31cbdSAya Levin 				goto unlock;
343de8650a8SEran Ben Elisha 		}
3442d708887SAya Levin 	}
345*145e5637SEran Ben Elisha 
346*145e5637SEran Ben Elisha 	if (!ptp_ch)
347*145e5637SEran Ben Elisha 		goto close_sqs_nest;
348*145e5637SEran Ben Elisha 
349*145e5637SEran Ben Elisha 	for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
350*145e5637SEran Ben Elisha 		err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
351*145e5637SEran Ben Elisha 								    &ptp_ch->ptpsq[tc],
352*145e5637SEran Ben Elisha 								    tc);
353*145e5637SEran Ben Elisha 		if (err)
354*145e5637SEran Ben Elisha 			goto unlock;
355*145e5637SEran Ben Elisha 	}
356*145e5637SEran Ben Elisha 
357*145e5637SEran Ben Elisha close_sqs_nest:
358de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
359de8650a8SEran Ben Elisha 	if (err)
360de8650a8SEran Ben Elisha 		goto unlock;
361de8650a8SEran Ben Elisha 
362de8650a8SEran Ben Elisha unlock:
363de8650a8SEran Ben Elisha 	mutex_unlock(&priv->state_lock);
364de8650a8SEran Ben Elisha 	return err;
365de8650a8SEran Ben Elisha }
366de8650a8SEran Ben Elisha 
3675f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
3685f29458bSAya Levin 				     void *ctx)
3695f29458bSAya Levin {
3705f29458bSAya Levin 	struct mlx5_rsc_key key = {};
3715f29458bSAya Levin 	struct mlx5e_txqsq *sq = ctx;
3725f29458bSAya Levin 	int err;
3735f29458bSAya Levin 
3745f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
3755f29458bSAya Levin 		return 0;
3765f29458bSAya Levin 
377d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
3785f29458bSAya Levin 	if (err)
3795f29458bSAya Levin 		return err;
3805f29458bSAya Levin 
3815f29458bSAya Levin 	key.size = PAGE_SIZE;
3825f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
3835f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
3845f29458bSAya Levin 	if (err)
3855f29458bSAya Levin 		return err;
3865f29458bSAya Levin 
387d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3885f29458bSAya Levin 	if (err)
3895f29458bSAya Levin 		return err;
3905f29458bSAya Levin 
391d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
3925f29458bSAya Levin 	if (err)
3935f29458bSAya Levin 		return err;
3945f29458bSAya Levin 
395d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
3965f29458bSAya Levin 	if (err)
3975f29458bSAya Levin 		return err;
3985f29458bSAya Levin 
3995f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
4005f29458bSAya Levin 	key.index1 = sq->sqn;
4015f29458bSAya Levin 	key.num_of_obj1 = 1;
4025f29458bSAya Levin 
4035f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4045f29458bSAya Levin 	if (err)
4055f29458bSAya Levin 		return err;
4065f29458bSAya Levin 
407d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4085f29458bSAya Levin 	if (err)
4095f29458bSAya Levin 		return err;
4105f29458bSAya Levin 
411d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
4125f29458bSAya Levin 	if (err)
4135f29458bSAya Levin 		return err;
4145f29458bSAya Levin 
4155f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
4165f29458bSAya Levin 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
4175f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4185f29458bSAya Levin 	if (err)
4195f29458bSAya Levin 		return err;
4205f29458bSAya Levin 
421d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4225f29458bSAya Levin 	if (err)
4235f29458bSAya Levin 		return err;
4245f29458bSAya Levin 
425d5cbedd7SAya Levin 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4265f29458bSAya Levin }
4275f29458bSAya Levin 
4285f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
4295f29458bSAya Levin 					  struct devlink_fmsg *fmsg)
4305f29458bSAya Levin {
431*145e5637SEran Ben Elisha 	struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp;
4325f29458bSAya Levin 	struct mlx5_rsc_key key = {};
4335f29458bSAya Levin 	int i, tc, err;
4345f29458bSAya Levin 
4355f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4365f29458bSAya Levin 		return 0;
4375f29458bSAya Levin 
438d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
4395f29458bSAya Levin 	if (err)
4405f29458bSAya Levin 		return err;
4415f29458bSAya Levin 
4425f29458bSAya Levin 	key.size = PAGE_SIZE;
4435f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
4445f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4455f29458bSAya Levin 	if (err)
4465f29458bSAya Levin 		return err;
4475f29458bSAya Levin 
448d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4495f29458bSAya Levin 	if (err)
4505f29458bSAya Levin 		return err;
4515f29458bSAya Levin 
4525f29458bSAya Levin 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
4535f29458bSAya Levin 	if (err)
4545f29458bSAya Levin 		return err;
4555f29458bSAya Levin 
4565f29458bSAya Levin 	for (i = 0; i < priv->channels.num; i++) {
4575f29458bSAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
4585f29458bSAya Levin 
4595f29458bSAya Levin 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
4605f29458bSAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
4615f29458bSAya Levin 
4625f29458bSAya Levin 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
4635f29458bSAya Levin 			if (err)
4645f29458bSAya Levin 				return err;
4655f29458bSAya Levin 		}
4665f29458bSAya Levin 	}
467*145e5637SEran Ben Elisha 
468*145e5637SEran Ben Elisha 	if (ptp_ch) {
469*145e5637SEran Ben Elisha 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
470*145e5637SEran Ben Elisha 			struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
471*145e5637SEran Ben Elisha 
472*145e5637SEran Ben Elisha 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
473*145e5637SEran Ben Elisha 			if (err)
474*145e5637SEran Ben Elisha 				return err;
475*145e5637SEran Ben Elisha 		}
476*145e5637SEran Ben Elisha 	}
477*145e5637SEran Ben Elisha 
4785f29458bSAya Levin 	return devlink_fmsg_arr_pair_nest_end(fmsg);
4795f29458bSAya Levin }
4805f29458bSAya Levin 
4815f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
4825f29458bSAya Levin 					   struct mlx5e_err_ctx *err_ctx,
4835f29458bSAya Levin 					   struct devlink_fmsg *fmsg)
4845f29458bSAya Levin {
4855f29458bSAya Levin 	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
4865f29458bSAya Levin }
4875f29458bSAya Levin 
4885f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
4895f29458bSAya Levin 				  struct devlink_fmsg *fmsg, void *context,
4905f29458bSAya Levin 				  struct netlink_ext_ack *extack)
4915f29458bSAya Levin {
4925f29458bSAya Levin 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
4935f29458bSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
4945f29458bSAya Levin 
4955f29458bSAya Levin 	return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
4965f29458bSAya Levin 			 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
4975f29458bSAya Levin }
4985f29458bSAya Levin 
4990a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
5000a56be3cSAya Levin {
5010a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
5024ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
5030a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
5040a56be3cSAya Levin 
5050a56be3cSAya Levin 	err_ctx.ctx = sq;
5060a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
5075f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
508b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
5090a56be3cSAya Levin 
5100a56be3cSAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
5110a56be3cSAya Levin }
5120a56be3cSAya Levin 
5130a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
5140a56be3cSAya Levin {
5150a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
516e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx to_ctx = {};
5174ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
5180a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
5190a56be3cSAya Levin 
520e6205564SAya Levin 	to_ctx.sq = sq;
521e6205564SAya Levin 	err_ctx.ctx = &to_ctx;
5220a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
5235f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
524b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str),
525b21aef7eSJoe Perches 		 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
5264ad40d8eSEran Ben Elisha 		 sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
5270a56be3cSAya Levin 		 jiffies_to_usecs(jiffies - sq->txq->trans_start));
5280a56be3cSAya Levin 
529e6205564SAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
530e6205564SAya Levin 	return to_ctx.status;
5310a56be3cSAya Levin }
5320a56be3cSAya Levin 
533de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
534de8650a8SEran Ben Elisha 		.name = "tx",
535de8650a8SEran Ben Elisha 		.recover = mlx5e_tx_reporter_recover,
536de8650a8SEran Ben Elisha 		.diagnose = mlx5e_tx_reporter_diagnose,
5375f29458bSAya Levin 		.dump = mlx5e_tx_reporter_dump,
538de8650a8SEran Ben Elisha };
539de8650a8SEran Ben Elisha 
540de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
541de8650a8SEran Ben Elisha 
542b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
543de8650a8SEran Ben Elisha {
544baf6dfdbSAya Levin 	struct devlink_health_reporter *reporter;
545de8650a8SEran Ben Elisha 
546b7e93bb6SVladyslav Tarasiuk 	reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_tx_reporter_ops,
547b7e93bb6SVladyslav Tarasiuk 						       MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
548baf6dfdbSAya Levin 	if (IS_ERR(reporter)) {
549de8650a8SEran Ben Elisha 		netdev_warn(priv->netdev,
550de8650a8SEran Ben Elisha 			    "Failed to create tx reporter, err = %ld\n",
551baf6dfdbSAya Levin 			    PTR_ERR(reporter));
552b3ea4c4fSEran Ben Elisha 		return;
5537f7cc235SAya Levin 	}
554baf6dfdbSAya Levin 	priv->tx_reporter = reporter;
555de8650a8SEran Ben Elisha }
556de8650a8SEran Ben Elisha 
55706293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
558de8650a8SEran Ben Elisha {
559baf6dfdbSAya Levin 	if (!priv->tx_reporter)
560de8650a8SEran Ben Elisha 		return;
561de8650a8SEran Ben Elisha 
562b7e93bb6SVladyslav Tarasiuk 	devlink_port_health_reporter_destroy(priv->tx_reporter);
563de8650a8SEran Ben Elisha }
564