1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */
2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */
3de8650a8SEran Ben Elisha 
44edc17fdSAya Levin #include "health.h"
5145e5637SEran Ben Elisha #include "en/ptp.h"
6c27971d0SRoi Dayan #include "en/devlink.h"
732def412SAmir Tzin #include "lib/tout.h"
8de8650a8SEran Ben Elisha 
9fc9d982aSAdham Faris /* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */
10fc9d982aSAdham Faris static const char * const sq_sw_state_type_name[] = {
11fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_ENABLED] = "enabled",
12fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_MPWQE] = "mpwqe",
13fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_RECOVERING] = "recovering",
14fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_IPSEC] = "ipsec",
15fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_DIM] = "dim",
16fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline",
17fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx",
18fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync",
19fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf",
20fc9d982aSAdham Faris };
21fc9d982aSAdham Faris 
22de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
23de8650a8SEran Ben Elisha {
2432def412SAmir Tzin 	struct mlx5_core_dev *dev = sq->mdev;
2532def412SAmir Tzin 	unsigned long exp_time;
2632def412SAmir Tzin 
2732def412SAmir Tzin 	exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
28de8650a8SEran Ben Elisha 
29de8650a8SEran Ben Elisha 	while (time_before(jiffies, exp_time)) {
30de8650a8SEran Ben Elisha 		if (sq->cc == sq->pc)
31de8650a8SEran Ben Elisha 			return 0;
32de8650a8SEran Ben Elisha 
33de8650a8SEran Ben Elisha 		msleep(20);
34de8650a8SEran Ben Elisha 	}
35de8650a8SEran Ben Elisha 
364ad40d8eSEran Ben Elisha 	netdev_err(sq->netdev,
37de8650a8SEran Ben Elisha 		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
38de8650a8SEran Ben Elisha 		   sq->sqn, sq->cc, sq->pc);
39de8650a8SEran Ben Elisha 
40de8650a8SEran Ben Elisha 	return -ETIMEDOUT;
41de8650a8SEran Ben Elisha }
42de8650a8SEran Ben Elisha 
43de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
44de8650a8SEran Ben Elisha {
45de8650a8SEran Ben Elisha 	WARN_ONCE(sq->cc != sq->pc,
46de8650a8SEran Ben Elisha 		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
47de8650a8SEran Ben Elisha 		  sq->sqn, sq->cc, sq->pc);
48de8650a8SEran Ben Elisha 	sq->cc = 0;
49de8650a8SEran Ben Elisha 	sq->dma_fifo_cc = 0;
50de8650a8SEran Ben Elisha 	sq->pc = 0;
51de8650a8SEran Ben Elisha }
52de8650a8SEran Ben Elisha 
53fc9d982aSAdham Faris static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq)
54fc9d982aSAdham Faris {
55fc9d982aSAdham Faris 	int err;
56fc9d982aSAdham Faris 	int i;
57fc9d982aSAdham Faris 
58fc9d982aSAdham Faris 	BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES,
59fc9d982aSAdham Faris 			 "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h");
60fc9d982aSAdham Faris 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
61fc9d982aSAdham Faris 	if (err)
62fc9d982aSAdham Faris 		return err;
63fc9d982aSAdham Faris 
64fc9d982aSAdham Faris 	for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) {
65fc9d982aSAdham Faris 		err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i],
66fc9d982aSAdham Faris 						test_bit(i, &sq->state));
67fc9d982aSAdham Faris 		if (err)
68fc9d982aSAdham Faris 			return err;
69fc9d982aSAdham Faris 	}
70fc9d982aSAdham Faris 
71*b0d87ed2SAdham Faris 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
72fc9d982aSAdham Faris }
73fc9d982aSAdham Faris 
74c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
75de8650a8SEran Ben Elisha {
76c50de4afSAya Levin 	struct mlx5_core_dev *mdev;
77c50de4afSAya Levin 	struct net_device *dev;
78c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
79de8650a8SEran Ben Elisha 	u8 state;
80de8650a8SEran Ben Elisha 	int err;
81de8650a8SEran Ben Elisha 
82c50de4afSAya Levin 	sq = ctx;
834ad40d8eSEran Ben Elisha 	mdev = sq->mdev;
844ad40d8eSEran Ben Elisha 	dev = sq->netdev;
85c50de4afSAya Levin 
86c50de4afSAya Levin 	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
87c50de4afSAya Levin 		return 0;
88c50de4afSAya Levin 
89de8650a8SEran Ben Elisha 	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
90de8650a8SEran Ben Elisha 	if (err) {
91de8650a8SEran Ben Elisha 		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
92de8650a8SEran Ben Elisha 			   sq->sqn, err);
93276d197eSAya Levin 		goto out;
94de8650a8SEran Ben Elisha 	}
95de8650a8SEran Ben Elisha 
96d9a2fcf5SAya Levin 	if (state != MLX5_SQC_STATE_ERR)
97276d197eSAya Levin 		goto out;
98de8650a8SEran Ben Elisha 
99de8650a8SEran Ben Elisha 	mlx5e_tx_disable_queue(sq->txq);
100de8650a8SEran Ben Elisha 
101de8650a8SEran Ben Elisha 	err = mlx5e_wait_for_sq_flush(sq);
102de8650a8SEran Ben Elisha 	if (err)
103276d197eSAya Levin 		goto out;
104de8650a8SEran Ben Elisha 
105de8650a8SEran Ben Elisha 	/* At this point, no new packets will arrive from the stack as TXQ is
106de8650a8SEran Ben Elisha 	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
107de8650a8SEran Ben Elisha 	 * pending WQEs. SQ can safely reset the SQ.
108de8650a8SEran Ben Elisha 	 */
109de8650a8SEran Ben Elisha 
1104ad40d8eSEran Ben Elisha 	err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn);
111de8650a8SEran Ben Elisha 	if (err)
112276d197eSAya Levin 		goto out;
113de8650a8SEran Ben Elisha 
114de8650a8SEran Ben Elisha 	mlx5e_reset_txqsq_cc_pc(sq);
115de8650a8SEran Ben Elisha 	sq->stats->recover++;
116276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
117de8650a8SEran Ben Elisha 	mlx5e_activate_txqsq(sq);
11879efecb4SMaxim Mikityanskiy 	if (sq->channel)
11979efecb4SMaxim Mikityanskiy 		mlx5e_trigger_napi_icosq(sq->channel);
12079efecb4SMaxim Mikityanskiy 	else
12179efecb4SMaxim Mikityanskiy 		mlx5e_trigger_napi_sched(sq->cq.napi);
122de8650a8SEran Ben Elisha 
123de8650a8SEran Ben Elisha 	return 0;
124276d197eSAya Levin out:
125276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
126276d197eSAya Levin 	return err;
127de8650a8SEran Ben Elisha }
128de8650a8SEran Ben Elisha 
129e6205564SAya Levin struct mlx5e_tx_timeout_ctx {
130e6205564SAya Levin 	struct mlx5e_txqsq *sq;
131e6205564SAya Levin 	signed int status;
132e6205564SAya Levin };
133e6205564SAya Levin 
134c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx)
1357d91126bSEran Ben Elisha {
136e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx *to_ctx;
137e6205564SAya Levin 	struct mlx5e_priv *priv;
138c50de4afSAya Levin 	struct mlx5_eq_comp *eq;
139c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
140c50de4afSAya Levin 	int err;
1417d91126bSEran Ben Elisha 
142e6205564SAya Levin 	to_ctx = ctx;
143e6205564SAya Levin 	sq = to_ctx->sq;
144c50de4afSAya Levin 	eq = sq->cq.mcq.eq;
1454ad40d8eSEran Ben Elisha 	priv = sq->priv;
1464ad40d8eSEran Ben Elisha 	err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
147e6205564SAya Levin 	if (!err) {
148e6205564SAya Levin 		to_ctx->status = 0; /* this sq recovered */
149e6205564SAya Levin 		return err;
150e6205564SAya Levin 	}
151e6205564SAya Levin 
152e6205564SAya Levin 	err = mlx5e_safe_reopen_channels(priv);
153e6205564SAya Levin 	if (!err) {
154e6205564SAya Levin 		to_ctx->status = 1; /* all channels recovered */
155e6205564SAya Levin 		return err;
156e6205564SAya Levin 	}
157e6205564SAya Levin 
158e6205564SAya Levin 	to_ctx->status = err;
1597d91126bSEran Ben Elisha 	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
160e6205564SAya Levin 	netdev_err(priv->netdev,
161e6205564SAya Levin 		   "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
162e6205564SAya Levin 		   err);
1637d91126bSEran Ben Elisha 
164c50de4afSAya Levin 	return err;
1657d91126bSEran Ben Elisha }
1667d91126bSEran Ben Elisha 
167de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function.
168de8650a8SEran Ben Elisha  * It can cause a dead lock or a read-after-free.
169de8650a8SEran Ben Elisha  */
170c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
171de8650a8SEran Ben Elisha {
172c50de4afSAya Levin 	return err_ctx->recover(err_ctx->ctx);
173de8650a8SEran Ben Elisha }
174de8650a8SEran Ben Elisha 
175de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
176e7a98105SJiri Pirko 				     void *context,
177e7a98105SJiri Pirko 				     struct netlink_ext_ack *extack)
178de8650a8SEran Ben Elisha {
179de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
180c50de4afSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
181de8650a8SEran Ben Elisha 
182de8650a8SEran Ben Elisha 	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
183c50de4afSAya Levin 			 mlx5e_health_recover_channels(priv);
184de8650a8SEran Ben Elisha }
185de8650a8SEran Ben Elisha 
186de8650a8SEran Ben Elisha static int
187145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
1882d708887SAya Levin 						  struct mlx5e_txqsq *sq, int tc)
189de8650a8SEran Ben Elisha {
190dd921fd2SAya Levin 	bool stopped = netif_xmit_stopped(sq->txq);
1914ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
192dd921fd2SAya Levin 	u8 state;
193de8650a8SEran Ben Elisha 	int err;
194de8650a8SEran Ben Elisha 
195dd921fd2SAya Levin 	err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
196dd921fd2SAya Levin 	if (err)
197dd921fd2SAya Levin 		return err;
198dd921fd2SAya Levin 
1992d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
2002d708887SAya Levin 	if (err)
2012d708887SAya Levin 		return err;
2022d708887SAya Levin 
2032d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
2042d708887SAya Levin 	if (err)
2052d708887SAya Levin 		return err;
2062d708887SAya Levin 
207dd921fd2SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
208de8650a8SEran Ben Elisha 	if (err)
209de8650a8SEran Ben Elisha 		return err;
210de8650a8SEran Ben Elisha 
211de8650a8SEran Ben Elisha 	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
212de8650a8SEran Ben Elisha 	if (err)
213de8650a8SEran Ben Elisha 		return err;
214de8650a8SEran Ben Elisha 
215de8650a8SEran Ben Elisha 	err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
216de8650a8SEran Ben Elisha 	if (err)
217de8650a8SEran Ben Elisha 		return err;
218de8650a8SEran Ben Elisha 
2192d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
2202d708887SAya Levin 	if (err)
2212d708887SAya Levin 		return err;
2222d708887SAya Levin 
2232d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
2242d708887SAya Levin 	if (err)
2252d708887SAya Levin 		return err;
2262d708887SAya Levin 
227fc9d982aSAdham Faris 	err = mlx5e_health_sq_put_sw_state(fmsg, sq);
228fc9d982aSAdham Faris 	if (err)
229fc9d982aSAdham Faris 		return err;
230fc9d982aSAdham Faris 
231d5cbedd7SAya Levin 	err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
2322bf09e60SAya Levin 	if (err)
2332bf09e60SAya Levin 		return err;
2342bf09e60SAya Levin 
235145e5637SEran Ben Elisha 	return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
236145e5637SEran Ben Elisha }
237145e5637SEran Ben Elisha 
238145e5637SEran Ben Elisha static int
239145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
240145e5637SEran Ben Elisha 					struct mlx5e_txqsq *sq, int tc)
241145e5637SEran Ben Elisha {
242145e5637SEran Ben Elisha 	int err;
243145e5637SEran Ben Elisha 
244145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
245145e5637SEran Ben Elisha 	if (err)
246145e5637SEran Ben Elisha 		return err;
247145e5637SEran Ben Elisha 
248145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
249145e5637SEran Ben Elisha 	if (err)
250145e5637SEran Ben Elisha 		return err;
251145e5637SEran Ben Elisha 
252145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc);
25356837c2aSAya Levin 	if (err)
25456837c2aSAya Levin 		return err;
25556837c2aSAya Levin 
256de8650a8SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
257de8650a8SEran Ben Elisha 	if (err)
258de8650a8SEran Ben Elisha 		return err;
259de8650a8SEran Ben Elisha 
260de8650a8SEran Ben Elisha 	return 0;
261de8650a8SEran Ben Elisha }
262de8650a8SEran Ben Elisha 
263145e5637SEran Ben Elisha static int
264145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg,
265145e5637SEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq, int tc)
266145e5637SEran Ben Elisha {
267145e5637SEran Ben Elisha 	int err;
268145e5637SEran Ben Elisha 
269145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
270145e5637SEran Ben Elisha 	if (err)
271145e5637SEran Ben Elisha 		return err;
272145e5637SEran Ben Elisha 
273145e5637SEran Ben Elisha 	err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
274145e5637SEran Ben Elisha 	if (err)
275145e5637SEran Ben Elisha 		return err;
276145e5637SEran Ben Elisha 
2771880bc4eSEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc);
2781880bc4eSEran Ben Elisha 	if (err)
2791880bc4eSEran Ben Elisha 		return err;
2801880bc4eSEran Ben Elisha 
2811880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
2821880bc4eSEran Ben Elisha 	if (err)
2831880bc4eSEran Ben Elisha 		return err;
2841880bc4eSEran Ben Elisha 
2851880bc4eSEran Ben Elisha 	err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg);
2861880bc4eSEran Ben Elisha 	if (err)
2871880bc4eSEran Ben Elisha 		return err;
2881880bc4eSEran Ben Elisha 
2891880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
290145e5637SEran Ben Elisha 	if (err)
291145e5637SEran Ben Elisha 		return err;
292145e5637SEran Ben Elisha 
293145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
294145e5637SEran Ben Elisha 	if (err)
295145e5637SEran Ben Elisha 		return err;
296145e5637SEran Ben Elisha 
297145e5637SEran Ben Elisha 	return 0;
298145e5637SEran Ben Elisha }
299145e5637SEran Ben Elisha 
300145e5637SEran Ben Elisha static int
301145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
302145e5637SEran Ben Elisha 					 struct mlx5e_txqsq *txqsq)
303145e5637SEran Ben Elisha {
304145e5637SEran Ben Elisha 	u32 sq_stride, sq_sz;
30595742c1cSAya Levin 	bool real_time;
306145e5637SEran Ben Elisha 	int err;
307145e5637SEran Ben Elisha 
308145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
309145e5637SEran Ben Elisha 	if (err)
310145e5637SEran Ben Elisha 		return err;
311145e5637SEran Ben Elisha 
31295742c1cSAya Levin 	real_time =  mlx5_is_real_time_sq(txqsq->mdev);
313145e5637SEran Ben Elisha 	sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
314145e5637SEran Ben Elisha 	sq_stride = MLX5_SEND_WQE_BB;
315145e5637SEran Ben Elisha 
316145e5637SEran Ben Elisha 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
317145e5637SEran Ben Elisha 	if (err)
318145e5637SEran Ben Elisha 		return err;
319145e5637SEran Ben Elisha 
320145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
321145e5637SEran Ben Elisha 	if (err)
322145e5637SEran Ben Elisha 		return err;
323145e5637SEran Ben Elisha 
32495742c1cSAya Levin 	err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
32595742c1cSAya Levin 	if (err)
32695742c1cSAya Levin 		return err;
32795742c1cSAya Levin 
328145e5637SEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
329145e5637SEran Ben Elisha 	if (err)
330145e5637SEran Ben Elisha 		return err;
331145e5637SEran Ben Elisha 
332145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
333145e5637SEran Ben Elisha }
334145e5637SEran Ben Elisha 
335145e5637SEran Ben Elisha static int
3361880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg,
3371880bc4eSEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq)
3381880bc4eSEran Ben Elisha {
3391880bc4eSEran Ben Elisha 	int err;
3401880bc4eSEran Ben Elisha 
3411880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
3421880bc4eSEran Ben Elisha 	if (err)
3431880bc4eSEran Ben Elisha 		return err;
3441880bc4eSEran Ben Elisha 
3451880bc4eSEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg);
3461880bc4eSEran Ben Elisha 	if (err)
3471880bc4eSEran Ben Elisha 		return err;
3481880bc4eSEran Ben Elisha 
3491880bc4eSEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3501880bc4eSEran Ben Elisha }
3511880bc4eSEran Ben Elisha 
3521880bc4eSEran Ben Elisha static int
353145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
354145e5637SEran Ben Elisha 					 struct devlink_fmsg *fmsg)
355145e5637SEran Ben Elisha {
356145e5637SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
357145e5637SEran Ben Elisha 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
35824c22dd0SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
359145e5637SEran Ben Elisha 	struct mlx5e_ptpsq *generic_ptpsq;
360145e5637SEran Ben Elisha 	int err;
361145e5637SEran Ben Elisha 
362145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
363145e5637SEran Ben Elisha 	if (err)
364145e5637SEran Ben Elisha 		return err;
365145e5637SEran Ben Elisha 
366145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq);
367145e5637SEran Ben Elisha 	if (err)
368145e5637SEran Ben Elisha 		return err;
369145e5637SEran Ben Elisha 
37024c22dd0SAya Levin 	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
371145e5637SEran Ben Elisha 		goto out;
372145e5637SEran Ben Elisha 
37324c22dd0SAya Levin 	generic_ptpsq = &ptp_ch->ptpsq[0];
37424c22dd0SAya Levin 
375145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
376145e5637SEran Ben Elisha 	if (err)
377145e5637SEran Ben Elisha 		return err;
378145e5637SEran Ben Elisha 
379145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq);
380145e5637SEran Ben Elisha 	if (err)
381145e5637SEran Ben Elisha 		return err;
382145e5637SEran Ben Elisha 
3831880bc4eSEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq);
3841880bc4eSEran Ben Elisha 	if (err)
3851880bc4eSEran Ben Elisha 		return err;
3861880bc4eSEran Ben Elisha 
387145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
388145e5637SEran Ben Elisha 	if (err)
389145e5637SEran Ben Elisha 		return err;
390145e5637SEran Ben Elisha 
391145e5637SEran Ben Elisha out:
392145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
393145e5637SEran Ben Elisha }
394145e5637SEran Ben Elisha 
395de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
396e7a98105SJiri Pirko 				      struct devlink_fmsg *fmsg,
397e7a98105SJiri Pirko 				      struct netlink_ext_ack *extack)
398de8650a8SEran Ben Elisha {
399de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
400b0d35de4SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
4012d708887SAya Levin 
4022d708887SAya Levin 	int i, tc, err = 0;
403de8650a8SEran Ben Elisha 
404de8650a8SEran Ben Elisha 	mutex_lock(&priv->state_lock);
405de8650a8SEran Ben Elisha 
406de8650a8SEran Ben Elisha 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
407de8650a8SEran Ben Elisha 		goto unlock;
408de8650a8SEran Ben Elisha 
409145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg);
4102d708887SAya Levin 	if (err)
4112d708887SAya Levin 		goto unlock;
4122d708887SAya Levin 
413de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
414de8650a8SEran Ben Elisha 	if (err)
415de8650a8SEran Ben Elisha 		goto unlock;
416de8650a8SEran Ben Elisha 
4172d708887SAya Levin 	for (i = 0; i < priv->channels.num; i++) {
4182d708887SAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
419de8650a8SEran Ben Elisha 
42086d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
4212d708887SAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
4222d708887SAya Levin 
4232d708887SAya Levin 			err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
424de8650a8SEran Ben Elisha 			if (err)
42599d31cbdSAya Levin 				goto unlock;
426de8650a8SEran Ben Elisha 		}
4272d708887SAya Levin 	}
428145e5637SEran Ben Elisha 
42924c22dd0SAya Levin 	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
430145e5637SEran Ben Elisha 		goto close_sqs_nest;
431145e5637SEran Ben Elisha 
43286d747a3STariq Toukan 	for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
433145e5637SEran Ben Elisha 		err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
434145e5637SEran Ben Elisha 								    &ptp_ch->ptpsq[tc],
435145e5637SEran Ben Elisha 								    tc);
436145e5637SEran Ben Elisha 		if (err)
437145e5637SEran Ben Elisha 			goto unlock;
438145e5637SEran Ben Elisha 	}
439145e5637SEran Ben Elisha 
440145e5637SEran Ben Elisha close_sqs_nest:
441de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
442de8650a8SEran Ben Elisha 	if (err)
443de8650a8SEran Ben Elisha 		goto unlock;
444de8650a8SEran Ben Elisha 
445de8650a8SEran Ben Elisha unlock:
446de8650a8SEran Ben Elisha 	mutex_unlock(&priv->state_lock);
447de8650a8SEran Ben Elisha 	return err;
448de8650a8SEran Ben Elisha }
449de8650a8SEran Ben Elisha 
4505f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
4515f29458bSAya Levin 				     void *ctx)
4525f29458bSAya Levin {
4535f29458bSAya Levin 	struct mlx5_rsc_key key = {};
4545f29458bSAya Levin 	struct mlx5e_txqsq *sq = ctx;
4555f29458bSAya Levin 	int err;
4565f29458bSAya Levin 
4575f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4585f29458bSAya Levin 		return 0;
4595f29458bSAya Levin 
460d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
4615f29458bSAya Levin 	if (err)
4625f29458bSAya Levin 		return err;
4635f29458bSAya Levin 
4645f29458bSAya Levin 	key.size = PAGE_SIZE;
4655f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
4665f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4675f29458bSAya Levin 	if (err)
4685f29458bSAya Levin 		return err;
4695f29458bSAya Levin 
470d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4715f29458bSAya Levin 	if (err)
4725f29458bSAya Levin 		return err;
4735f29458bSAya Levin 
474d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
4755f29458bSAya Levin 	if (err)
4765f29458bSAya Levin 		return err;
4775f29458bSAya Levin 
478d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
4795f29458bSAya Levin 	if (err)
4805f29458bSAya Levin 		return err;
4815f29458bSAya Levin 
4825f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
4835f29458bSAya Levin 	key.index1 = sq->sqn;
4845f29458bSAya Levin 	key.num_of_obj1 = 1;
4855f29458bSAya Levin 
4865f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4875f29458bSAya Levin 	if (err)
4885f29458bSAya Levin 		return err;
4895f29458bSAya Levin 
490d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4915f29458bSAya Levin 	if (err)
4925f29458bSAya Levin 		return err;
4935f29458bSAya Levin 
494d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
4955f29458bSAya Levin 	if (err)
4965f29458bSAya Levin 		return err;
4975f29458bSAya Levin 
4985f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
4995f29458bSAya Levin 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
5005f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
5015f29458bSAya Levin 	if (err)
5025f29458bSAya Levin 		return err;
5035f29458bSAya Levin 
504d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
5055f29458bSAya Levin 	if (err)
5065f29458bSAya Levin 		return err;
5075f29458bSAya Levin 
508d5cbedd7SAya Levin 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
5095f29458bSAya Levin }
5105f29458bSAya Levin 
511918fc385SAmir Tzin static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
512918fc385SAmir Tzin 					  void *ctx)
513918fc385SAmir Tzin {
514918fc385SAmir Tzin 	struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
515918fc385SAmir Tzin 
516918fc385SAmir Tzin 	return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
517918fc385SAmir Tzin }
518918fc385SAmir Tzin 
5195f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
5205f29458bSAya Levin 					  struct devlink_fmsg *fmsg)
5215f29458bSAya Levin {
522b0d35de4SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
5235f29458bSAya Levin 	struct mlx5_rsc_key key = {};
5245f29458bSAya Levin 	int i, tc, err;
5255f29458bSAya Levin 
5265f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
5275f29458bSAya Levin 		return 0;
5285f29458bSAya Levin 
529d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
5305f29458bSAya Levin 	if (err)
5315f29458bSAya Levin 		return err;
5325f29458bSAya Levin 
5335f29458bSAya Levin 	key.size = PAGE_SIZE;
5345f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
5355f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
5365f29458bSAya Levin 	if (err)
5375f29458bSAya Levin 		return err;
5385f29458bSAya Levin 
539d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
5405f29458bSAya Levin 	if (err)
5415f29458bSAya Levin 		return err;
5425f29458bSAya Levin 
5435f29458bSAya Levin 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
5445f29458bSAya Levin 	if (err)
5455f29458bSAya Levin 		return err;
5465f29458bSAya Levin 
5475f29458bSAya Levin 	for (i = 0; i < priv->channels.num; i++) {
5485f29458bSAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
5495f29458bSAya Levin 
55086d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
5515f29458bSAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
5525f29458bSAya Levin 
5535f29458bSAya Levin 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
5545f29458bSAya Levin 			if (err)
5555f29458bSAya Levin 				return err;
5565f29458bSAya Levin 		}
5575f29458bSAya Levin 	}
558145e5637SEran Ben Elisha 
55924c22dd0SAya Levin 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
56086d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
561145e5637SEran Ben Elisha 			struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
562145e5637SEran Ben Elisha 
563145e5637SEran Ben Elisha 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
564145e5637SEran Ben Elisha 			if (err)
565145e5637SEran Ben Elisha 				return err;
566145e5637SEran Ben Elisha 		}
567145e5637SEran Ben Elisha 	}
568145e5637SEran Ben Elisha 
5695f29458bSAya Levin 	return devlink_fmsg_arr_pair_nest_end(fmsg);
5705f29458bSAya Levin }
5715f29458bSAya Levin 
5725f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
5735f29458bSAya Levin 					   struct mlx5e_err_ctx *err_ctx,
5745f29458bSAya Levin 					   struct devlink_fmsg *fmsg)
5755f29458bSAya Levin {
5765f29458bSAya Levin 	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
5775f29458bSAya Levin }
5785f29458bSAya Levin 
5795f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
5805f29458bSAya Levin 				  struct devlink_fmsg *fmsg, void *context,
5815f29458bSAya Levin 				  struct netlink_ext_ack *extack)
5825f29458bSAya Levin {
5835f29458bSAya Levin 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
5845f29458bSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
5855f29458bSAya Levin 
5865f29458bSAya Levin 	return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
5875f29458bSAya Levin 			 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
5885f29458bSAya Levin }
5895f29458bSAya Levin 
5900a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
5910a56be3cSAya Levin {
5920a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
5934ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
5940a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
5950a56be3cSAya Levin 
5960a56be3cSAya Levin 	err_ctx.ctx = sq;
5970a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
5985f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
599b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
6000a56be3cSAya Levin 
6010a56be3cSAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
6020a56be3cSAya Levin }
6030a56be3cSAya Levin 
6040a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
6050a56be3cSAya Levin {
6060a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
607e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx to_ctx = {};
6084ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
6090a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
6100a56be3cSAya Levin 
611e6205564SAya Levin 	to_ctx.sq = sq;
612e6205564SAya Levin 	err_ctx.ctx = &to_ctx;
6130a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
614918fc385SAmir Tzin 	err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
615b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str),
616b21aef7eSJoe Perches 		 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
6174ad40d8eSEran Ben Elisha 		 sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
6185337824fSEric Dumazet 		 jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
6190a56be3cSAya Levin 
620e6205564SAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
621e6205564SAya Levin 	return to_ctx.status;
6220a56be3cSAya Levin }
6230a56be3cSAya Levin 
624de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
625de8650a8SEran Ben Elisha 		.name = "tx",
626de8650a8SEran Ben Elisha 		.recover = mlx5e_tx_reporter_recover,
627de8650a8SEran Ben Elisha 		.diagnose = mlx5e_tx_reporter_diagnose,
6285f29458bSAya Levin 		.dump = mlx5e_tx_reporter_dump,
629de8650a8SEran Ben Elisha };
630de8650a8SEran Ben Elisha 
631de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
632de8650a8SEran Ben Elisha 
633b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
634de8650a8SEran Ben Elisha {
635baf6dfdbSAya Levin 	struct devlink_health_reporter *reporter;
636de8650a8SEran Ben Elisha 
637bc1536f3SJiri Pirko 	reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port,
638bc1536f3SJiri Pirko 						       &mlx5_tx_reporter_ops,
639b7e93bb6SVladyslav Tarasiuk 						       MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
640baf6dfdbSAya Levin 	if (IS_ERR(reporter)) {
641de8650a8SEran Ben Elisha 		netdev_warn(priv->netdev,
642de8650a8SEran Ben Elisha 			    "Failed to create tx reporter, err = %ld\n",
643baf6dfdbSAya Levin 			    PTR_ERR(reporter));
644b3ea4c4fSEran Ben Elisha 		return;
6457f7cc235SAya Levin 	}
646baf6dfdbSAya Levin 	priv->tx_reporter = reporter;
647de8650a8SEran Ben Elisha }
648de8650a8SEran Ben Elisha 
64906293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
650de8650a8SEran Ben Elisha {
651baf6dfdbSAya Levin 	if (!priv->tx_reporter)
652de8650a8SEran Ben Elisha 		return;
653de8650a8SEran Ben Elisha 
6549f167327SJiri Pirko 	devlink_health_reporter_destroy(priv->tx_reporter);
6557a9fb35eSRoi Dayan 	priv->tx_reporter = NULL;
656de8650a8SEran Ben Elisha }
657