1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */
2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */
3de8650a8SEran Ben Elisha 
44edc17fdSAya Levin #include "health.h"
5145e5637SEran Ben Elisha #include "en/ptp.h"
6c27971d0SRoi Dayan #include "en/devlink.h"
7de8650a8SEran Ben Elisha 
8de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
9de8650a8SEran Ben Elisha {
10e74e28aeSAya Levin 	unsigned long exp_time = jiffies +
11e74e28aeSAya Levin 				 msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
12de8650a8SEran Ben Elisha 
13de8650a8SEran Ben Elisha 	while (time_before(jiffies, exp_time)) {
14de8650a8SEran Ben Elisha 		if (sq->cc == sq->pc)
15de8650a8SEran Ben Elisha 			return 0;
16de8650a8SEran Ben Elisha 
17de8650a8SEran Ben Elisha 		msleep(20);
18de8650a8SEran Ben Elisha 	}
19de8650a8SEran Ben Elisha 
204ad40d8eSEran Ben Elisha 	netdev_err(sq->netdev,
21de8650a8SEran Ben Elisha 		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
22de8650a8SEran Ben Elisha 		   sq->sqn, sq->cc, sq->pc);
23de8650a8SEran Ben Elisha 
24de8650a8SEran Ben Elisha 	return -ETIMEDOUT;
25de8650a8SEran Ben Elisha }
26de8650a8SEran Ben Elisha 
27de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
28de8650a8SEran Ben Elisha {
29de8650a8SEran Ben Elisha 	WARN_ONCE(sq->cc != sq->pc,
30de8650a8SEran Ben Elisha 		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
31de8650a8SEran Ben Elisha 		  sq->sqn, sq->cc, sq->pc);
32de8650a8SEran Ben Elisha 	sq->cc = 0;
33de8650a8SEran Ben Elisha 	sq->dma_fifo_cc = 0;
34de8650a8SEran Ben Elisha 	sq->pc = 0;
35de8650a8SEran Ben Elisha }
36de8650a8SEran Ben Elisha 
37c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
38de8650a8SEran Ben Elisha {
39c50de4afSAya Levin 	struct mlx5_core_dev *mdev;
40c50de4afSAya Levin 	struct net_device *dev;
41c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
42de8650a8SEran Ben Elisha 	u8 state;
43de8650a8SEran Ben Elisha 	int err;
44de8650a8SEran Ben Elisha 
45c50de4afSAya Levin 	sq = ctx;
464ad40d8eSEran Ben Elisha 	mdev = sq->mdev;
474ad40d8eSEran Ben Elisha 	dev = sq->netdev;
48c50de4afSAya Levin 
49c50de4afSAya Levin 	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
50c50de4afSAya Levin 		return 0;
51c50de4afSAya Levin 
52de8650a8SEran Ben Elisha 	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
53de8650a8SEran Ben Elisha 	if (err) {
54de8650a8SEran Ben Elisha 		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
55de8650a8SEran Ben Elisha 			   sq->sqn, err);
56276d197eSAya Levin 		goto out;
57de8650a8SEran Ben Elisha 	}
58de8650a8SEran Ben Elisha 
59d9a2fcf5SAya Levin 	if (state != MLX5_SQC_STATE_ERR)
60276d197eSAya Levin 		goto out;
61de8650a8SEran Ben Elisha 
62de8650a8SEran Ben Elisha 	mlx5e_tx_disable_queue(sq->txq);
63de8650a8SEran Ben Elisha 
64de8650a8SEran Ben Elisha 	err = mlx5e_wait_for_sq_flush(sq);
65de8650a8SEran Ben Elisha 	if (err)
66276d197eSAya Levin 		goto out;
67de8650a8SEran Ben Elisha 
68de8650a8SEran Ben Elisha 	/* At this point, no new packets will arrive from the stack as TXQ is
69de8650a8SEran Ben Elisha 	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
70de8650a8SEran Ben Elisha 	 * pending WQEs. SQ can safely reset the SQ.
71de8650a8SEran Ben Elisha 	 */
72de8650a8SEran Ben Elisha 
734ad40d8eSEran Ben Elisha 	err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn);
74de8650a8SEran Ben Elisha 	if (err)
75276d197eSAya Levin 		goto out;
76de8650a8SEran Ben Elisha 
77de8650a8SEran Ben Elisha 	mlx5e_reset_txqsq_cc_pc(sq);
78de8650a8SEran Ben Elisha 	sq->stats->recover++;
79276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
80de8650a8SEran Ben Elisha 	mlx5e_activate_txqsq(sq);
81de8650a8SEran Ben Elisha 
82de8650a8SEran Ben Elisha 	return 0;
83276d197eSAya Levin out:
84276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
85276d197eSAya Levin 	return err;
86de8650a8SEran Ben Elisha }
87de8650a8SEran Ben Elisha 
88e6205564SAya Levin struct mlx5e_tx_timeout_ctx {
89e6205564SAya Levin 	struct mlx5e_txqsq *sq;
90e6205564SAya Levin 	signed int status;
91e6205564SAya Levin };
92e6205564SAya Levin 
93c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx)
947d91126bSEran Ben Elisha {
95e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx *to_ctx;
96e6205564SAya Levin 	struct mlx5e_priv *priv;
97c50de4afSAya Levin 	struct mlx5_eq_comp *eq;
98c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
99c50de4afSAya Levin 	int err;
1007d91126bSEran Ben Elisha 
101e6205564SAya Levin 	to_ctx = ctx;
102e6205564SAya Levin 	sq = to_ctx->sq;
103c50de4afSAya Levin 	eq = sq->cq.mcq.eq;
1044ad40d8eSEran Ben Elisha 	priv = sq->priv;
1054ad40d8eSEran Ben Elisha 	err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
106e6205564SAya Levin 	if (!err) {
107e6205564SAya Levin 		to_ctx->status = 0; /* this sq recovered */
108e6205564SAya Levin 		return err;
109e6205564SAya Levin 	}
110e6205564SAya Levin 
111e6205564SAya Levin 	err = mlx5e_safe_reopen_channels(priv);
112e6205564SAya Levin 	if (!err) {
113e6205564SAya Levin 		to_ctx->status = 1; /* all channels recovered */
114e6205564SAya Levin 		return err;
115e6205564SAya Levin 	}
116e6205564SAya Levin 
117e6205564SAya Levin 	to_ctx->status = err;
1187d91126bSEran Ben Elisha 	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
119e6205564SAya Levin 	netdev_err(priv->netdev,
120e6205564SAya Levin 		   "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
121e6205564SAya Levin 		   err);
1227d91126bSEran Ben Elisha 
123c50de4afSAya Levin 	return err;
1247d91126bSEran Ben Elisha }
1257d91126bSEran Ben Elisha 
126de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function.
127de8650a8SEran Ben Elisha  * It can cause a dead lock or a read-after-free.
128de8650a8SEran Ben Elisha  */
129c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
130de8650a8SEran Ben Elisha {
131c50de4afSAya Levin 	return err_ctx->recover(err_ctx->ctx);
132de8650a8SEran Ben Elisha }
133de8650a8SEran Ben Elisha 
134de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
135e7a98105SJiri Pirko 				     void *context,
136e7a98105SJiri Pirko 				     struct netlink_ext_ack *extack)
137de8650a8SEran Ben Elisha {
138de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
139c50de4afSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
140de8650a8SEran Ben Elisha 
141de8650a8SEran Ben Elisha 	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
142c50de4afSAya Levin 			 mlx5e_health_recover_channels(priv);
143de8650a8SEran Ben Elisha }
144de8650a8SEran Ben Elisha 
145de8650a8SEran Ben Elisha static int
146145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
1472d708887SAya Levin 						  struct mlx5e_txqsq *sq, int tc)
148de8650a8SEran Ben Elisha {
149dd921fd2SAya Levin 	bool stopped = netif_xmit_stopped(sq->txq);
1504ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
151dd921fd2SAya Levin 	u8 state;
152de8650a8SEran Ben Elisha 	int err;
153de8650a8SEran Ben Elisha 
154dd921fd2SAya Levin 	err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
155dd921fd2SAya Levin 	if (err)
156dd921fd2SAya Levin 		return err;
157dd921fd2SAya Levin 
1582d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
1592d708887SAya Levin 	if (err)
1602d708887SAya Levin 		return err;
1612d708887SAya Levin 
1622d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
1632d708887SAya Levin 	if (err)
1642d708887SAya Levin 		return err;
1652d708887SAya Levin 
166dd921fd2SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
167de8650a8SEran Ben Elisha 	if (err)
168de8650a8SEran Ben Elisha 		return err;
169de8650a8SEran Ben Elisha 
170de8650a8SEran Ben Elisha 	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
171de8650a8SEran Ben Elisha 	if (err)
172de8650a8SEran Ben Elisha 		return err;
173de8650a8SEran Ben Elisha 
174de8650a8SEran Ben Elisha 	err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
175de8650a8SEran Ben Elisha 	if (err)
176de8650a8SEran Ben Elisha 		return err;
177de8650a8SEran Ben Elisha 
1782d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
1792d708887SAya Levin 	if (err)
1802d708887SAya Levin 		return err;
1812d708887SAya Levin 
1822d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
1832d708887SAya Levin 	if (err)
1842d708887SAya Levin 		return err;
1852d708887SAya Levin 
186d5cbedd7SAya Levin 	err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
1872bf09e60SAya Levin 	if (err)
1882bf09e60SAya Levin 		return err;
1892bf09e60SAya Levin 
190145e5637SEran Ben Elisha 	return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
191145e5637SEran Ben Elisha }
192145e5637SEran Ben Elisha 
193145e5637SEran Ben Elisha static int
194145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
195145e5637SEran Ben Elisha 					struct mlx5e_txqsq *sq, int tc)
196145e5637SEran Ben Elisha {
197145e5637SEran Ben Elisha 	int err;
198145e5637SEran Ben Elisha 
199145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
200145e5637SEran Ben Elisha 	if (err)
201145e5637SEran Ben Elisha 		return err;
202145e5637SEran Ben Elisha 
203145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
204145e5637SEran Ben Elisha 	if (err)
205145e5637SEran Ben Elisha 		return err;
206145e5637SEran Ben Elisha 
207145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc);
20856837c2aSAya Levin 	if (err)
20956837c2aSAya Levin 		return err;
21056837c2aSAya Levin 
211de8650a8SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
212de8650a8SEran Ben Elisha 	if (err)
213de8650a8SEran Ben Elisha 		return err;
214de8650a8SEran Ben Elisha 
215de8650a8SEran Ben Elisha 	return 0;
216de8650a8SEran Ben Elisha }
217de8650a8SEran Ben Elisha 
218145e5637SEran Ben Elisha static int
219145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg,
220145e5637SEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq, int tc)
221145e5637SEran Ben Elisha {
222145e5637SEran Ben Elisha 	int err;
223145e5637SEran Ben Elisha 
224145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
225145e5637SEran Ben Elisha 	if (err)
226145e5637SEran Ben Elisha 		return err;
227145e5637SEran Ben Elisha 
228145e5637SEran Ben Elisha 	err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
229145e5637SEran Ben Elisha 	if (err)
230145e5637SEran Ben Elisha 		return err;
231145e5637SEran Ben Elisha 
2321880bc4eSEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc);
2331880bc4eSEran Ben Elisha 	if (err)
2341880bc4eSEran Ben Elisha 		return err;
2351880bc4eSEran Ben Elisha 
2361880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
2371880bc4eSEran Ben Elisha 	if (err)
2381880bc4eSEran Ben Elisha 		return err;
2391880bc4eSEran Ben Elisha 
2401880bc4eSEran Ben Elisha 	err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg);
2411880bc4eSEran Ben Elisha 	if (err)
2421880bc4eSEran Ben Elisha 		return err;
2431880bc4eSEran Ben Elisha 
2441880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
245145e5637SEran Ben Elisha 	if (err)
246145e5637SEran Ben Elisha 		return err;
247145e5637SEran Ben Elisha 
248145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
249145e5637SEran Ben Elisha 	if (err)
250145e5637SEran Ben Elisha 		return err;
251145e5637SEran Ben Elisha 
252145e5637SEran Ben Elisha 	return 0;
253145e5637SEran Ben Elisha }
254145e5637SEran Ben Elisha 
255145e5637SEran Ben Elisha static int
256145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
257145e5637SEran Ben Elisha 					 struct mlx5e_txqsq *txqsq)
258145e5637SEran Ben Elisha {
259145e5637SEran Ben Elisha 	u32 sq_stride, sq_sz;
260*95742c1cSAya Levin 	bool real_time;
261145e5637SEran Ben Elisha 	int err;
262145e5637SEran Ben Elisha 
263145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
264145e5637SEran Ben Elisha 	if (err)
265145e5637SEran Ben Elisha 		return err;
266145e5637SEran Ben Elisha 
267*95742c1cSAya Levin 	real_time =  mlx5_is_real_time_sq(txqsq->mdev);
268145e5637SEran Ben Elisha 	sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
269145e5637SEran Ben Elisha 	sq_stride = MLX5_SEND_WQE_BB;
270145e5637SEran Ben Elisha 
271145e5637SEran Ben Elisha 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
272145e5637SEran Ben Elisha 	if (err)
273145e5637SEran Ben Elisha 		return err;
274145e5637SEran Ben Elisha 
275145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
276145e5637SEran Ben Elisha 	if (err)
277145e5637SEran Ben Elisha 		return err;
278145e5637SEran Ben Elisha 
279*95742c1cSAya Levin 	err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
280*95742c1cSAya Levin 	if (err)
281*95742c1cSAya Levin 		return err;
282*95742c1cSAya Levin 
283145e5637SEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
284145e5637SEran Ben Elisha 	if (err)
285145e5637SEran Ben Elisha 		return err;
286145e5637SEran Ben Elisha 
287145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
288145e5637SEran Ben Elisha }
289145e5637SEran Ben Elisha 
290145e5637SEran Ben Elisha static int
2911880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg,
2921880bc4eSEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq)
2931880bc4eSEran Ben Elisha {
2941880bc4eSEran Ben Elisha 	int err;
2951880bc4eSEran Ben Elisha 
2961880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
2971880bc4eSEran Ben Elisha 	if (err)
2981880bc4eSEran Ben Elisha 		return err;
2991880bc4eSEran Ben Elisha 
3001880bc4eSEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg);
3011880bc4eSEran Ben Elisha 	if (err)
3021880bc4eSEran Ben Elisha 		return err;
3031880bc4eSEran Ben Elisha 
3041880bc4eSEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3051880bc4eSEran Ben Elisha }
3061880bc4eSEran Ben Elisha 
3071880bc4eSEran Ben Elisha static int
308145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
309145e5637SEran Ben Elisha 					 struct devlink_fmsg *fmsg)
310145e5637SEran Ben Elisha {
311145e5637SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
312145e5637SEran Ben Elisha 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
31324c22dd0SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
314145e5637SEran Ben Elisha 	struct mlx5e_ptpsq *generic_ptpsq;
315145e5637SEran Ben Elisha 	int err;
316145e5637SEran Ben Elisha 
317145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
318145e5637SEran Ben Elisha 	if (err)
319145e5637SEran Ben Elisha 		return err;
320145e5637SEran Ben Elisha 
321145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq);
322145e5637SEran Ben Elisha 	if (err)
323145e5637SEran Ben Elisha 		return err;
324145e5637SEran Ben Elisha 
32524c22dd0SAya Levin 	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
326145e5637SEran Ben Elisha 		goto out;
327145e5637SEran Ben Elisha 
32824c22dd0SAya Levin 	generic_ptpsq = &ptp_ch->ptpsq[0];
32924c22dd0SAya Levin 
330145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
331145e5637SEran Ben Elisha 	if (err)
332145e5637SEran Ben Elisha 		return err;
333145e5637SEran Ben Elisha 
334145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq);
335145e5637SEran Ben Elisha 	if (err)
336145e5637SEran Ben Elisha 		return err;
337145e5637SEran Ben Elisha 
3381880bc4eSEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq);
3391880bc4eSEran Ben Elisha 	if (err)
3401880bc4eSEran Ben Elisha 		return err;
3411880bc4eSEran Ben Elisha 
342145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
343145e5637SEran Ben Elisha 	if (err)
344145e5637SEran Ben Elisha 		return err;
345145e5637SEran Ben Elisha 
346145e5637SEran Ben Elisha out:
347145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
348145e5637SEran Ben Elisha }
349145e5637SEran Ben Elisha 
350de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
351e7a98105SJiri Pirko 				      struct devlink_fmsg *fmsg,
352e7a98105SJiri Pirko 				      struct netlink_ext_ack *extack)
353de8650a8SEran Ben Elisha {
354de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
355b0d35de4SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
3562d708887SAya Levin 
3572d708887SAya Levin 	int i, tc, err = 0;
358de8650a8SEran Ben Elisha 
359de8650a8SEran Ben Elisha 	mutex_lock(&priv->state_lock);
360de8650a8SEran Ben Elisha 
361de8650a8SEran Ben Elisha 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
362de8650a8SEran Ben Elisha 		goto unlock;
363de8650a8SEran Ben Elisha 
364145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg);
3652d708887SAya Levin 	if (err)
3662d708887SAya Levin 		goto unlock;
3672d708887SAya Levin 
368de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
369de8650a8SEran Ben Elisha 	if (err)
370de8650a8SEran Ben Elisha 		goto unlock;
371de8650a8SEran Ben Elisha 
3722d708887SAya Levin 	for (i = 0; i < priv->channels.num; i++) {
3732d708887SAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
374de8650a8SEran Ben Elisha 
3752d708887SAya Levin 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
3762d708887SAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
3772d708887SAya Levin 
3782d708887SAya Levin 			err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
379de8650a8SEran Ben Elisha 			if (err)
38099d31cbdSAya Levin 				goto unlock;
381de8650a8SEran Ben Elisha 		}
3822d708887SAya Levin 	}
383145e5637SEran Ben Elisha 
38424c22dd0SAya Levin 	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
385145e5637SEran Ben Elisha 		goto close_sqs_nest;
386145e5637SEran Ben Elisha 
387145e5637SEran Ben Elisha 	for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
388145e5637SEran Ben Elisha 		err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
389145e5637SEran Ben Elisha 								    &ptp_ch->ptpsq[tc],
390145e5637SEran Ben Elisha 								    tc);
391145e5637SEran Ben Elisha 		if (err)
392145e5637SEran Ben Elisha 			goto unlock;
393145e5637SEran Ben Elisha 	}
394145e5637SEran Ben Elisha 
395145e5637SEran Ben Elisha close_sqs_nest:
396de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
397de8650a8SEran Ben Elisha 	if (err)
398de8650a8SEran Ben Elisha 		goto unlock;
399de8650a8SEran Ben Elisha 
400de8650a8SEran Ben Elisha unlock:
401de8650a8SEran Ben Elisha 	mutex_unlock(&priv->state_lock);
402de8650a8SEran Ben Elisha 	return err;
403de8650a8SEran Ben Elisha }
404de8650a8SEran Ben Elisha 
4055f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
4065f29458bSAya Levin 				     void *ctx)
4075f29458bSAya Levin {
4085f29458bSAya Levin 	struct mlx5_rsc_key key = {};
4095f29458bSAya Levin 	struct mlx5e_txqsq *sq = ctx;
4105f29458bSAya Levin 	int err;
4115f29458bSAya Levin 
4125f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4135f29458bSAya Levin 		return 0;
4145f29458bSAya Levin 
415d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
4165f29458bSAya Levin 	if (err)
4175f29458bSAya Levin 		return err;
4185f29458bSAya Levin 
4195f29458bSAya Levin 	key.size = PAGE_SIZE;
4205f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
4215f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4225f29458bSAya Levin 	if (err)
4235f29458bSAya Levin 		return err;
4245f29458bSAya Levin 
425d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4265f29458bSAya Levin 	if (err)
4275f29458bSAya Levin 		return err;
4285f29458bSAya Levin 
429d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
4305f29458bSAya Levin 	if (err)
4315f29458bSAya Levin 		return err;
4325f29458bSAya Levin 
433d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
4345f29458bSAya Levin 	if (err)
4355f29458bSAya Levin 		return err;
4365f29458bSAya Levin 
4375f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
4385f29458bSAya Levin 	key.index1 = sq->sqn;
4395f29458bSAya Levin 	key.num_of_obj1 = 1;
4405f29458bSAya Levin 
4415f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4425f29458bSAya Levin 	if (err)
4435f29458bSAya Levin 		return err;
4445f29458bSAya Levin 
445d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4465f29458bSAya Levin 	if (err)
4475f29458bSAya Levin 		return err;
4485f29458bSAya Levin 
449d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
4505f29458bSAya Levin 	if (err)
4515f29458bSAya Levin 		return err;
4525f29458bSAya Levin 
4535f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
4545f29458bSAya Levin 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
4555f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4565f29458bSAya Levin 	if (err)
4575f29458bSAya Levin 		return err;
4585f29458bSAya Levin 
459d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4605f29458bSAya Levin 	if (err)
4615f29458bSAya Levin 		return err;
4625f29458bSAya Levin 
463d5cbedd7SAya Levin 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4645f29458bSAya Levin }
4655f29458bSAya Levin 
4665f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
4675f29458bSAya Levin 					  struct devlink_fmsg *fmsg)
4685f29458bSAya Levin {
469b0d35de4SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
4705f29458bSAya Levin 	struct mlx5_rsc_key key = {};
4715f29458bSAya Levin 	int i, tc, err;
4725f29458bSAya Levin 
4735f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4745f29458bSAya Levin 		return 0;
4755f29458bSAya Levin 
476d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
4775f29458bSAya Levin 	if (err)
4785f29458bSAya Levin 		return err;
4795f29458bSAya Levin 
4805f29458bSAya Levin 	key.size = PAGE_SIZE;
4815f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
4825f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4835f29458bSAya Levin 	if (err)
4845f29458bSAya Levin 		return err;
4855f29458bSAya Levin 
486d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4875f29458bSAya Levin 	if (err)
4885f29458bSAya Levin 		return err;
4895f29458bSAya Levin 
4905f29458bSAya Levin 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
4915f29458bSAya Levin 	if (err)
4925f29458bSAya Levin 		return err;
4935f29458bSAya Levin 
4945f29458bSAya Levin 	for (i = 0; i < priv->channels.num; i++) {
4955f29458bSAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
4965f29458bSAya Levin 
4975f29458bSAya Levin 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
4985f29458bSAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
4995f29458bSAya Levin 
5005f29458bSAya Levin 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
5015f29458bSAya Levin 			if (err)
5025f29458bSAya Levin 				return err;
5035f29458bSAya Levin 		}
5045f29458bSAya Levin 	}
505145e5637SEran Ben Elisha 
50624c22dd0SAya Levin 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
507145e5637SEran Ben Elisha 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
508145e5637SEran Ben Elisha 			struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
509145e5637SEran Ben Elisha 
510145e5637SEran Ben Elisha 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
511145e5637SEran Ben Elisha 			if (err)
512145e5637SEran Ben Elisha 				return err;
513145e5637SEran Ben Elisha 		}
514145e5637SEran Ben Elisha 	}
515145e5637SEran Ben Elisha 
5165f29458bSAya Levin 	return devlink_fmsg_arr_pair_nest_end(fmsg);
5175f29458bSAya Levin }
5185f29458bSAya Levin 
5195f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
5205f29458bSAya Levin 					   struct mlx5e_err_ctx *err_ctx,
5215f29458bSAya Levin 					   struct devlink_fmsg *fmsg)
5225f29458bSAya Levin {
5235f29458bSAya Levin 	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
5245f29458bSAya Levin }
5255f29458bSAya Levin 
5265f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
5275f29458bSAya Levin 				  struct devlink_fmsg *fmsg, void *context,
5285f29458bSAya Levin 				  struct netlink_ext_ack *extack)
5295f29458bSAya Levin {
5305f29458bSAya Levin 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
5315f29458bSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
5325f29458bSAya Levin 
5335f29458bSAya Levin 	return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
5345f29458bSAya Levin 			 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
5355f29458bSAya Levin }
5365f29458bSAya Levin 
5370a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
5380a56be3cSAya Levin {
5390a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
5404ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
5410a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
5420a56be3cSAya Levin 
5430a56be3cSAya Levin 	err_ctx.ctx = sq;
5440a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
5455f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
546b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
5470a56be3cSAya Levin 
5480a56be3cSAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
5490a56be3cSAya Levin }
5500a56be3cSAya Levin 
5510a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
5520a56be3cSAya Levin {
5530a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
554e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx to_ctx = {};
5554ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
5560a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
5570a56be3cSAya Levin 
558e6205564SAya Levin 	to_ctx.sq = sq;
559e6205564SAya Levin 	err_ctx.ctx = &to_ctx;
5600a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
5615f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
562b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str),
563b21aef7eSJoe Perches 		 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
5644ad40d8eSEran Ben Elisha 		 sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
5650a56be3cSAya Levin 		 jiffies_to_usecs(jiffies - sq->txq->trans_start));
5660a56be3cSAya Levin 
567e6205564SAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
568e6205564SAya Levin 	return to_ctx.status;
5690a56be3cSAya Levin }
5700a56be3cSAya Levin 
571de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
572de8650a8SEran Ben Elisha 		.name = "tx",
573de8650a8SEran Ben Elisha 		.recover = mlx5e_tx_reporter_recover,
574de8650a8SEran Ben Elisha 		.diagnose = mlx5e_tx_reporter_diagnose,
5755f29458bSAya Levin 		.dump = mlx5e_tx_reporter_dump,
576de8650a8SEran Ben Elisha };
577de8650a8SEran Ben Elisha 
578de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
579de8650a8SEran Ben Elisha 
580b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
581de8650a8SEran Ben Elisha {
582c27971d0SRoi Dayan 	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
583baf6dfdbSAya Levin 	struct devlink_health_reporter *reporter;
584de8650a8SEran Ben Elisha 
585c27971d0SRoi Dayan 	reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops,
586b7e93bb6SVladyslav Tarasiuk 						       MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
587baf6dfdbSAya Levin 	if (IS_ERR(reporter)) {
588de8650a8SEran Ben Elisha 		netdev_warn(priv->netdev,
589de8650a8SEran Ben Elisha 			    "Failed to create tx reporter, err = %ld\n",
590baf6dfdbSAya Levin 			    PTR_ERR(reporter));
591b3ea4c4fSEran Ben Elisha 		return;
5927f7cc235SAya Levin 	}
593baf6dfdbSAya Levin 	priv->tx_reporter = reporter;
594de8650a8SEran Ben Elisha }
595de8650a8SEran Ben Elisha 
59606293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
597de8650a8SEran Ben Elisha {
598baf6dfdbSAya Levin 	if (!priv->tx_reporter)
599de8650a8SEran Ben Elisha 		return;
600de8650a8SEran Ben Elisha 
601b7e93bb6SVladyslav Tarasiuk 	devlink_port_health_reporter_destroy(priv->tx_reporter);
6027a9fb35eSRoi Dayan 	priv->tx_reporter = NULL;
603de8650a8SEran Ben Elisha }
604