1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */
2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */
3de8650a8SEran Ben Elisha 
44edc17fdSAya Levin #include "health.h"
5145e5637SEran Ben Elisha #include "en/ptp.h"
6c27971d0SRoi Dayan #include "en/devlink.h"
732def412SAmir Tzin #include "lib/tout.h"
8de8650a8SEran Ben Elisha 
9*fc9d982aSAdham Faris /* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */
10*fc9d982aSAdham Faris static const char * const sq_sw_state_type_name[] = {
11*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_ENABLED] = "enabled",
12*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_MPWQE] = "mpwqe",
13*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_RECOVERING] = "recovering",
14*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_IPSEC] = "ipsec",
15*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_DIM] = "dim",
16*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline",
17*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx",
18*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync",
19*fc9d982aSAdham Faris 	[MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf",
20*fc9d982aSAdham Faris };
21*fc9d982aSAdham Faris 
22de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
23de8650a8SEran Ben Elisha {
2432def412SAmir Tzin 	struct mlx5_core_dev *dev = sq->mdev;
2532def412SAmir Tzin 	unsigned long exp_time;
2632def412SAmir Tzin 
2732def412SAmir Tzin 	exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR));
28de8650a8SEran Ben Elisha 
29de8650a8SEran Ben Elisha 	while (time_before(jiffies, exp_time)) {
30de8650a8SEran Ben Elisha 		if (sq->cc == sq->pc)
31de8650a8SEran Ben Elisha 			return 0;
32de8650a8SEran Ben Elisha 
33de8650a8SEran Ben Elisha 		msleep(20);
34de8650a8SEran Ben Elisha 	}
35de8650a8SEran Ben Elisha 
364ad40d8eSEran Ben Elisha 	netdev_err(sq->netdev,
37de8650a8SEran Ben Elisha 		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
38de8650a8SEran Ben Elisha 		   sq->sqn, sq->cc, sq->pc);
39de8650a8SEran Ben Elisha 
40de8650a8SEran Ben Elisha 	return -ETIMEDOUT;
41de8650a8SEran Ben Elisha }
42de8650a8SEran Ben Elisha 
43de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
44de8650a8SEran Ben Elisha {
45de8650a8SEran Ben Elisha 	WARN_ONCE(sq->cc != sq->pc,
46de8650a8SEran Ben Elisha 		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
47de8650a8SEran Ben Elisha 		  sq->sqn, sq->cc, sq->pc);
48de8650a8SEran Ben Elisha 	sq->cc = 0;
49de8650a8SEran Ben Elisha 	sq->dma_fifo_cc = 0;
50de8650a8SEran Ben Elisha 	sq->pc = 0;
51de8650a8SEran Ben Elisha }
52de8650a8SEran Ben Elisha 
53*fc9d982aSAdham Faris static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq)
54*fc9d982aSAdham Faris {
55*fc9d982aSAdham Faris 	int err;
56*fc9d982aSAdham Faris 	int i;
57*fc9d982aSAdham Faris 
58*fc9d982aSAdham Faris 	BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES,
59*fc9d982aSAdham Faris 			 "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h");
60*fc9d982aSAdham Faris 	err = devlink_fmsg_obj_nest_start(fmsg);
61*fc9d982aSAdham Faris 	if (err)
62*fc9d982aSAdham Faris 		return err;
63*fc9d982aSAdham Faris 
64*fc9d982aSAdham Faris 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
65*fc9d982aSAdham Faris 	if (err)
66*fc9d982aSAdham Faris 		return err;
67*fc9d982aSAdham Faris 
68*fc9d982aSAdham Faris 	for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) {
69*fc9d982aSAdham Faris 		err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i],
70*fc9d982aSAdham Faris 						test_bit(i, &sq->state));
71*fc9d982aSAdham Faris 		if (err)
72*fc9d982aSAdham Faris 			return err;
73*fc9d982aSAdham Faris 	}
74*fc9d982aSAdham Faris 
75*fc9d982aSAdham Faris 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
76*fc9d982aSAdham Faris 	if (err)
77*fc9d982aSAdham Faris 		return err;
78*fc9d982aSAdham Faris 
79*fc9d982aSAdham Faris 	return devlink_fmsg_obj_nest_end(fmsg);
80*fc9d982aSAdham Faris }
81*fc9d982aSAdham Faris 
82c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
83de8650a8SEran Ben Elisha {
84c50de4afSAya Levin 	struct mlx5_core_dev *mdev;
85c50de4afSAya Levin 	struct net_device *dev;
86c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
87de8650a8SEran Ben Elisha 	u8 state;
88de8650a8SEran Ben Elisha 	int err;
89de8650a8SEran Ben Elisha 
90c50de4afSAya Levin 	sq = ctx;
914ad40d8eSEran Ben Elisha 	mdev = sq->mdev;
924ad40d8eSEran Ben Elisha 	dev = sq->netdev;
93c50de4afSAya Levin 
94c50de4afSAya Levin 	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
95c50de4afSAya Levin 		return 0;
96c50de4afSAya Levin 
97de8650a8SEran Ben Elisha 	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
98de8650a8SEran Ben Elisha 	if (err) {
99de8650a8SEran Ben Elisha 		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
100de8650a8SEran Ben Elisha 			   sq->sqn, err);
101276d197eSAya Levin 		goto out;
102de8650a8SEran Ben Elisha 	}
103de8650a8SEran Ben Elisha 
104d9a2fcf5SAya Levin 	if (state != MLX5_SQC_STATE_ERR)
105276d197eSAya Levin 		goto out;
106de8650a8SEran Ben Elisha 
107de8650a8SEran Ben Elisha 	mlx5e_tx_disable_queue(sq->txq);
108de8650a8SEran Ben Elisha 
109de8650a8SEran Ben Elisha 	err = mlx5e_wait_for_sq_flush(sq);
110de8650a8SEran Ben Elisha 	if (err)
111276d197eSAya Levin 		goto out;
112de8650a8SEran Ben Elisha 
113de8650a8SEran Ben Elisha 	/* At this point, no new packets will arrive from the stack as TXQ is
114de8650a8SEran Ben Elisha 	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
115de8650a8SEran Ben Elisha 	 * pending WQEs. SQ can safely reset the SQ.
116de8650a8SEran Ben Elisha 	 */
117de8650a8SEran Ben Elisha 
1184ad40d8eSEran Ben Elisha 	err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn);
119de8650a8SEran Ben Elisha 	if (err)
120276d197eSAya Levin 		goto out;
121de8650a8SEran Ben Elisha 
122de8650a8SEran Ben Elisha 	mlx5e_reset_txqsq_cc_pc(sq);
123de8650a8SEran Ben Elisha 	sq->stats->recover++;
124276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
125de8650a8SEran Ben Elisha 	mlx5e_activate_txqsq(sq);
12679efecb4SMaxim Mikityanskiy 	if (sq->channel)
12779efecb4SMaxim Mikityanskiy 		mlx5e_trigger_napi_icosq(sq->channel);
12879efecb4SMaxim Mikityanskiy 	else
12979efecb4SMaxim Mikityanskiy 		mlx5e_trigger_napi_sched(sq->cq.napi);
130de8650a8SEran Ben Elisha 
131de8650a8SEran Ben Elisha 	return 0;
132276d197eSAya Levin out:
133276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
134276d197eSAya Levin 	return err;
135de8650a8SEran Ben Elisha }
136de8650a8SEran Ben Elisha 
137e6205564SAya Levin struct mlx5e_tx_timeout_ctx {
138e6205564SAya Levin 	struct mlx5e_txqsq *sq;
139e6205564SAya Levin 	signed int status;
140e6205564SAya Levin };
141e6205564SAya Levin 
142c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx)
1437d91126bSEran Ben Elisha {
144e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx *to_ctx;
145e6205564SAya Levin 	struct mlx5e_priv *priv;
146c50de4afSAya Levin 	struct mlx5_eq_comp *eq;
147c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
148c50de4afSAya Levin 	int err;
1497d91126bSEran Ben Elisha 
150e6205564SAya Levin 	to_ctx = ctx;
151e6205564SAya Levin 	sq = to_ctx->sq;
152c50de4afSAya Levin 	eq = sq->cq.mcq.eq;
1534ad40d8eSEran Ben Elisha 	priv = sq->priv;
1544ad40d8eSEran Ben Elisha 	err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
155e6205564SAya Levin 	if (!err) {
156e6205564SAya Levin 		to_ctx->status = 0; /* this sq recovered */
157e6205564SAya Levin 		return err;
158e6205564SAya Levin 	}
159e6205564SAya Levin 
160e6205564SAya Levin 	err = mlx5e_safe_reopen_channels(priv);
161e6205564SAya Levin 	if (!err) {
162e6205564SAya Levin 		to_ctx->status = 1; /* all channels recovered */
163e6205564SAya Levin 		return err;
164e6205564SAya Levin 	}
165e6205564SAya Levin 
166e6205564SAya Levin 	to_ctx->status = err;
1677d91126bSEran Ben Elisha 	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
168e6205564SAya Levin 	netdev_err(priv->netdev,
169e6205564SAya Levin 		   "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
170e6205564SAya Levin 		   err);
1717d91126bSEran Ben Elisha 
172c50de4afSAya Levin 	return err;
1737d91126bSEran Ben Elisha }
1747d91126bSEran Ben Elisha 
175de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function.
176de8650a8SEran Ben Elisha  * It can cause a dead lock or a read-after-free.
177de8650a8SEran Ben Elisha  */
178c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
179de8650a8SEran Ben Elisha {
180c50de4afSAya Levin 	return err_ctx->recover(err_ctx->ctx);
181de8650a8SEran Ben Elisha }
182de8650a8SEran Ben Elisha 
183de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
184e7a98105SJiri Pirko 				     void *context,
185e7a98105SJiri Pirko 				     struct netlink_ext_ack *extack)
186de8650a8SEran Ben Elisha {
187de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
188c50de4afSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
189de8650a8SEran Ben Elisha 
190de8650a8SEran Ben Elisha 	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
191c50de4afSAya Levin 			 mlx5e_health_recover_channels(priv);
192de8650a8SEran Ben Elisha }
193de8650a8SEran Ben Elisha 
194de8650a8SEran Ben Elisha static int
195145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
1962d708887SAya Levin 						  struct mlx5e_txqsq *sq, int tc)
197de8650a8SEran Ben Elisha {
198dd921fd2SAya Levin 	bool stopped = netif_xmit_stopped(sq->txq);
1994ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
200dd921fd2SAya Levin 	u8 state;
201de8650a8SEran Ben Elisha 	int err;
202de8650a8SEran Ben Elisha 
203dd921fd2SAya Levin 	err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
204dd921fd2SAya Levin 	if (err)
205dd921fd2SAya Levin 		return err;
206dd921fd2SAya Levin 
2072d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
2082d708887SAya Levin 	if (err)
2092d708887SAya Levin 		return err;
2102d708887SAya Levin 
2112d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
2122d708887SAya Levin 	if (err)
2132d708887SAya Levin 		return err;
2142d708887SAya Levin 
215dd921fd2SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
216de8650a8SEran Ben Elisha 	if (err)
217de8650a8SEran Ben Elisha 		return err;
218de8650a8SEran Ben Elisha 
219de8650a8SEran Ben Elisha 	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
220de8650a8SEran Ben Elisha 	if (err)
221de8650a8SEran Ben Elisha 		return err;
222de8650a8SEran Ben Elisha 
223de8650a8SEran Ben Elisha 	err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
224de8650a8SEran Ben Elisha 	if (err)
225de8650a8SEran Ben Elisha 		return err;
226de8650a8SEran Ben Elisha 
2272d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
2282d708887SAya Levin 	if (err)
2292d708887SAya Levin 		return err;
2302d708887SAya Levin 
2312d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
2322d708887SAya Levin 	if (err)
2332d708887SAya Levin 		return err;
2342d708887SAya Levin 
235*fc9d982aSAdham Faris 	err = mlx5e_health_sq_put_sw_state(fmsg, sq);
236*fc9d982aSAdham Faris 	if (err)
237*fc9d982aSAdham Faris 		return err;
238*fc9d982aSAdham Faris 
239d5cbedd7SAya Levin 	err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
2402bf09e60SAya Levin 	if (err)
2412bf09e60SAya Levin 		return err;
2422bf09e60SAya Levin 
243145e5637SEran Ben Elisha 	return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
244145e5637SEran Ben Elisha }
245145e5637SEran Ben Elisha 
246145e5637SEran Ben Elisha static int
247145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
248145e5637SEran Ben Elisha 					struct mlx5e_txqsq *sq, int tc)
249145e5637SEran Ben Elisha {
250145e5637SEran Ben Elisha 	int err;
251145e5637SEran Ben Elisha 
252145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
253145e5637SEran Ben Elisha 	if (err)
254145e5637SEran Ben Elisha 		return err;
255145e5637SEran Ben Elisha 
256145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
257145e5637SEran Ben Elisha 	if (err)
258145e5637SEran Ben Elisha 		return err;
259145e5637SEran Ben Elisha 
260145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc);
26156837c2aSAya Levin 	if (err)
26256837c2aSAya Levin 		return err;
26356837c2aSAya Levin 
264de8650a8SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
265de8650a8SEran Ben Elisha 	if (err)
266de8650a8SEran Ben Elisha 		return err;
267de8650a8SEran Ben Elisha 
268de8650a8SEran Ben Elisha 	return 0;
269de8650a8SEran Ben Elisha }
270de8650a8SEran Ben Elisha 
271145e5637SEran Ben Elisha static int
272145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg,
273145e5637SEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq, int tc)
274145e5637SEran Ben Elisha {
275145e5637SEran Ben Elisha 	int err;
276145e5637SEran Ben Elisha 
277145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
278145e5637SEran Ben Elisha 	if (err)
279145e5637SEran Ben Elisha 		return err;
280145e5637SEran Ben Elisha 
281145e5637SEran Ben Elisha 	err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
282145e5637SEran Ben Elisha 	if (err)
283145e5637SEran Ben Elisha 		return err;
284145e5637SEran Ben Elisha 
2851880bc4eSEran Ben Elisha 	err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc);
2861880bc4eSEran Ben Elisha 	if (err)
2871880bc4eSEran Ben Elisha 		return err;
2881880bc4eSEran Ben Elisha 
2891880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
2901880bc4eSEran Ben Elisha 	if (err)
2911880bc4eSEran Ben Elisha 		return err;
2921880bc4eSEran Ben Elisha 
2931880bc4eSEran Ben Elisha 	err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg);
2941880bc4eSEran Ben Elisha 	if (err)
2951880bc4eSEran Ben Elisha 		return err;
2961880bc4eSEran Ben Elisha 
2971880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
298145e5637SEran Ben Elisha 	if (err)
299145e5637SEran Ben Elisha 		return err;
300145e5637SEran Ben Elisha 
301145e5637SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
302145e5637SEran Ben Elisha 	if (err)
303145e5637SEran Ben Elisha 		return err;
304145e5637SEran Ben Elisha 
305145e5637SEran Ben Elisha 	return 0;
306145e5637SEran Ben Elisha }
307145e5637SEran Ben Elisha 
308145e5637SEran Ben Elisha static int
309145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
310145e5637SEran Ben Elisha 					 struct mlx5e_txqsq *txqsq)
311145e5637SEran Ben Elisha {
312145e5637SEran Ben Elisha 	u32 sq_stride, sq_sz;
31395742c1cSAya Levin 	bool real_time;
314145e5637SEran Ben Elisha 	int err;
315145e5637SEran Ben Elisha 
316145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
317145e5637SEran Ben Elisha 	if (err)
318145e5637SEran Ben Elisha 		return err;
319145e5637SEran Ben Elisha 
32095742c1cSAya Levin 	real_time =  mlx5_is_real_time_sq(txqsq->mdev);
321145e5637SEran Ben Elisha 	sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
322145e5637SEran Ben Elisha 	sq_stride = MLX5_SEND_WQE_BB;
323145e5637SEran Ben Elisha 
324145e5637SEran Ben Elisha 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
325145e5637SEran Ben Elisha 	if (err)
326145e5637SEran Ben Elisha 		return err;
327145e5637SEran Ben Elisha 
328145e5637SEran Ben Elisha 	err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
329145e5637SEran Ben Elisha 	if (err)
330145e5637SEran Ben Elisha 		return err;
331145e5637SEran Ben Elisha 
33295742c1cSAya Levin 	err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
33395742c1cSAya Levin 	if (err)
33495742c1cSAya Levin 		return err;
33595742c1cSAya Levin 
336145e5637SEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
337145e5637SEran Ben Elisha 	if (err)
338145e5637SEran Ben Elisha 		return err;
339145e5637SEran Ben Elisha 
340145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
341145e5637SEran Ben Elisha }
342145e5637SEran Ben Elisha 
343145e5637SEran Ben Elisha static int
3441880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg,
3451880bc4eSEran Ben Elisha 					      struct mlx5e_ptpsq *ptpsq)
3461880bc4eSEran Ben Elisha {
3471880bc4eSEran Ben Elisha 	int err;
3481880bc4eSEran Ben Elisha 
3491880bc4eSEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS");
3501880bc4eSEran Ben Elisha 	if (err)
3511880bc4eSEran Ben Elisha 		return err;
3521880bc4eSEran Ben Elisha 
3531880bc4eSEran Ben Elisha 	err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg);
3541880bc4eSEran Ben Elisha 	if (err)
3551880bc4eSEran Ben Elisha 		return err;
3561880bc4eSEran Ben Elisha 
3571880bc4eSEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3581880bc4eSEran Ben Elisha }
3591880bc4eSEran Ben Elisha 
3601880bc4eSEran Ben Elisha static int
361145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
362145e5637SEran Ben Elisha 					 struct devlink_fmsg *fmsg)
363145e5637SEran Ben Elisha {
364145e5637SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
365145e5637SEran Ben Elisha 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
36624c22dd0SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
367145e5637SEran Ben Elisha 	struct mlx5e_ptpsq *generic_ptpsq;
368145e5637SEran Ben Elisha 	int err;
369145e5637SEran Ben Elisha 
370145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
371145e5637SEran Ben Elisha 	if (err)
372145e5637SEran Ben Elisha 		return err;
373145e5637SEran Ben Elisha 
374145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq);
375145e5637SEran Ben Elisha 	if (err)
376145e5637SEran Ben Elisha 		return err;
377145e5637SEran Ben Elisha 
37824c22dd0SAya Levin 	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
379145e5637SEran Ben Elisha 		goto out;
380145e5637SEran Ben Elisha 
38124c22dd0SAya Levin 	generic_ptpsq = &ptp_ch->ptpsq[0];
38224c22dd0SAya Levin 
383145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
384145e5637SEran Ben Elisha 	if (err)
385145e5637SEran Ben Elisha 		return err;
386145e5637SEran Ben Elisha 
387145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq);
388145e5637SEran Ben Elisha 	if (err)
389145e5637SEran Ben Elisha 		return err;
390145e5637SEran Ben Elisha 
3911880bc4eSEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq);
3921880bc4eSEran Ben Elisha 	if (err)
3931880bc4eSEran Ben Elisha 		return err;
3941880bc4eSEran Ben Elisha 
395145e5637SEran Ben Elisha 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
396145e5637SEran Ben Elisha 	if (err)
397145e5637SEran Ben Elisha 		return err;
398145e5637SEran Ben Elisha 
399145e5637SEran Ben Elisha out:
400145e5637SEran Ben Elisha 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
401145e5637SEran Ben Elisha }
402145e5637SEran Ben Elisha 
403de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
404e7a98105SJiri Pirko 				      struct devlink_fmsg *fmsg,
405e7a98105SJiri Pirko 				      struct netlink_ext_ack *extack)
406de8650a8SEran Ben Elisha {
407de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
408b0d35de4SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
4092d708887SAya Levin 
4102d708887SAya Levin 	int i, tc, err = 0;
411de8650a8SEran Ben Elisha 
412de8650a8SEran Ben Elisha 	mutex_lock(&priv->state_lock);
413de8650a8SEran Ben Elisha 
414de8650a8SEran Ben Elisha 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
415de8650a8SEran Ben Elisha 		goto unlock;
416de8650a8SEran Ben Elisha 
417145e5637SEran Ben Elisha 	err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg);
4182d708887SAya Levin 	if (err)
4192d708887SAya Levin 		goto unlock;
4202d708887SAya Levin 
421de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
422de8650a8SEran Ben Elisha 	if (err)
423de8650a8SEran Ben Elisha 		goto unlock;
424de8650a8SEran Ben Elisha 
4252d708887SAya Levin 	for (i = 0; i < priv->channels.num; i++) {
4262d708887SAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
427de8650a8SEran Ben Elisha 
42886d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
4292d708887SAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
4302d708887SAya Levin 
4312d708887SAya Levin 			err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
432de8650a8SEran Ben Elisha 			if (err)
43399d31cbdSAya Levin 				goto unlock;
434de8650a8SEran Ben Elisha 		}
4352d708887SAya Levin 	}
436145e5637SEran Ben Elisha 
43724c22dd0SAya Levin 	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
438145e5637SEran Ben Elisha 		goto close_sqs_nest;
439145e5637SEran Ben Elisha 
44086d747a3STariq Toukan 	for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
441145e5637SEran Ben Elisha 		err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
442145e5637SEran Ben Elisha 								    &ptp_ch->ptpsq[tc],
443145e5637SEran Ben Elisha 								    tc);
444145e5637SEran Ben Elisha 		if (err)
445145e5637SEran Ben Elisha 			goto unlock;
446145e5637SEran Ben Elisha 	}
447145e5637SEran Ben Elisha 
448145e5637SEran Ben Elisha close_sqs_nest:
449de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
450de8650a8SEran Ben Elisha 	if (err)
451de8650a8SEran Ben Elisha 		goto unlock;
452de8650a8SEran Ben Elisha 
453de8650a8SEran Ben Elisha unlock:
454de8650a8SEran Ben Elisha 	mutex_unlock(&priv->state_lock);
455de8650a8SEran Ben Elisha 	return err;
456de8650a8SEran Ben Elisha }
457de8650a8SEran Ben Elisha 
4585f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
4595f29458bSAya Levin 				     void *ctx)
4605f29458bSAya Levin {
4615f29458bSAya Levin 	struct mlx5_rsc_key key = {};
4625f29458bSAya Levin 	struct mlx5e_txqsq *sq = ctx;
4635f29458bSAya Levin 	int err;
4645f29458bSAya Levin 
4655f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4665f29458bSAya Levin 		return 0;
4675f29458bSAya Levin 
468d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
4695f29458bSAya Levin 	if (err)
4705f29458bSAya Levin 		return err;
4715f29458bSAya Levin 
4725f29458bSAya Levin 	key.size = PAGE_SIZE;
4735f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
4745f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4755f29458bSAya Levin 	if (err)
4765f29458bSAya Levin 		return err;
4775f29458bSAya Levin 
478d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4795f29458bSAya Levin 	if (err)
4805f29458bSAya Levin 		return err;
4815f29458bSAya Levin 
482d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
4835f29458bSAya Levin 	if (err)
4845f29458bSAya Levin 		return err;
4855f29458bSAya Levin 
486d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
4875f29458bSAya Levin 	if (err)
4885f29458bSAya Levin 		return err;
4895f29458bSAya Levin 
4905f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
4915f29458bSAya Levin 	key.index1 = sq->sqn;
4925f29458bSAya Levin 	key.num_of_obj1 = 1;
4935f29458bSAya Levin 
4945f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
4955f29458bSAya Levin 	if (err)
4965f29458bSAya Levin 		return err;
4975f29458bSAya Levin 
498d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
4995f29458bSAya Levin 	if (err)
5005f29458bSAya Levin 		return err;
5015f29458bSAya Levin 
502d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
5035f29458bSAya Levin 	if (err)
5045f29458bSAya Levin 		return err;
5055f29458bSAya Levin 
5065f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
5075f29458bSAya Levin 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
5085f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
5095f29458bSAya Levin 	if (err)
5105f29458bSAya Levin 		return err;
5115f29458bSAya Levin 
512d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
5135f29458bSAya Levin 	if (err)
5145f29458bSAya Levin 		return err;
5155f29458bSAya Levin 
516d5cbedd7SAya Levin 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
5175f29458bSAya Levin }
5185f29458bSAya Levin 
519918fc385SAmir Tzin static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
520918fc385SAmir Tzin 					  void *ctx)
521918fc385SAmir Tzin {
522918fc385SAmir Tzin 	struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
523918fc385SAmir Tzin 
524918fc385SAmir Tzin 	return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
525918fc385SAmir Tzin }
526918fc385SAmir Tzin 
5275f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
5285f29458bSAya Levin 					  struct devlink_fmsg *fmsg)
5295f29458bSAya Levin {
530b0d35de4SAya Levin 	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
5315f29458bSAya Levin 	struct mlx5_rsc_key key = {};
5325f29458bSAya Levin 	int i, tc, err;
5335f29458bSAya Levin 
5345f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
5355f29458bSAya Levin 		return 0;
5365f29458bSAya Levin 
537d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
5385f29458bSAya Levin 	if (err)
5395f29458bSAya Levin 		return err;
5405f29458bSAya Levin 
5415f29458bSAya Levin 	key.size = PAGE_SIZE;
5425f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
5435f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
5445f29458bSAya Levin 	if (err)
5455f29458bSAya Levin 		return err;
5465f29458bSAya Levin 
547d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
5485f29458bSAya Levin 	if (err)
5495f29458bSAya Levin 		return err;
5505f29458bSAya Levin 
5515f29458bSAya Levin 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
5525f29458bSAya Levin 	if (err)
5535f29458bSAya Levin 		return err;
5545f29458bSAya Levin 
5555f29458bSAya Levin 	for (i = 0; i < priv->channels.num; i++) {
5565f29458bSAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
5575f29458bSAya Levin 
55886d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
5595f29458bSAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
5605f29458bSAya Levin 
5615f29458bSAya Levin 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
5625f29458bSAya Levin 			if (err)
5635f29458bSAya Levin 				return err;
5645f29458bSAya Levin 		}
5655f29458bSAya Levin 	}
566145e5637SEran Ben Elisha 
56724c22dd0SAya Levin 	if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
56886d747a3STariq Toukan 		for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
569145e5637SEran Ben Elisha 			struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
570145e5637SEran Ben Elisha 
571145e5637SEran Ben Elisha 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
572145e5637SEran Ben Elisha 			if (err)
573145e5637SEran Ben Elisha 				return err;
574145e5637SEran Ben Elisha 		}
575145e5637SEran Ben Elisha 	}
576145e5637SEran Ben Elisha 
5775f29458bSAya Levin 	return devlink_fmsg_arr_pair_nest_end(fmsg);
5785f29458bSAya Levin }
5795f29458bSAya Levin 
5805f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
5815f29458bSAya Levin 					   struct mlx5e_err_ctx *err_ctx,
5825f29458bSAya Levin 					   struct devlink_fmsg *fmsg)
5835f29458bSAya Levin {
5845f29458bSAya Levin 	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
5855f29458bSAya Levin }
5865f29458bSAya Levin 
5875f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
5885f29458bSAya Levin 				  struct devlink_fmsg *fmsg, void *context,
5895f29458bSAya Levin 				  struct netlink_ext_ack *extack)
5905f29458bSAya Levin {
5915f29458bSAya Levin 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
5925f29458bSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
5935f29458bSAya Levin 
5945f29458bSAya Levin 	return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
5955f29458bSAya Levin 			 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
5965f29458bSAya Levin }
5975f29458bSAya Levin 
5980a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
5990a56be3cSAya Levin {
6000a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
6014ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
6020a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
6030a56be3cSAya Levin 
6040a56be3cSAya Levin 	err_ctx.ctx = sq;
6050a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
6065f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
607b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
6080a56be3cSAya Levin 
6090a56be3cSAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
6100a56be3cSAya Levin }
6110a56be3cSAya Levin 
6120a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
6130a56be3cSAya Levin {
6140a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
615e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx to_ctx = {};
6164ad40d8eSEran Ben Elisha 	struct mlx5e_priv *priv = sq->priv;
6170a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
6180a56be3cSAya Levin 
619e6205564SAya Levin 	to_ctx.sq = sq;
620e6205564SAya Levin 	err_ctx.ctx = &to_ctx;
6210a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
622918fc385SAmir Tzin 	err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
623b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str),
624b21aef7eSJoe Perches 		 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
6254ad40d8eSEran Ben Elisha 		 sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
6265337824fSEric Dumazet 		 jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
6270a56be3cSAya Levin 
628e6205564SAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
629e6205564SAya Levin 	return to_ctx.status;
6300a56be3cSAya Levin }
6310a56be3cSAya Levin 
632de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
633de8650a8SEran Ben Elisha 		.name = "tx",
634de8650a8SEran Ben Elisha 		.recover = mlx5e_tx_reporter_recover,
635de8650a8SEran Ben Elisha 		.diagnose = mlx5e_tx_reporter_diagnose,
6365f29458bSAya Levin 		.dump = mlx5e_tx_reporter_dump,
637de8650a8SEran Ben Elisha };
638de8650a8SEran Ben Elisha 
639de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
640de8650a8SEran Ben Elisha 
641b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
642de8650a8SEran Ben Elisha {
643baf6dfdbSAya Levin 	struct devlink_health_reporter *reporter;
644de8650a8SEran Ben Elisha 
645bc1536f3SJiri Pirko 	reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port,
646bc1536f3SJiri Pirko 						       &mlx5_tx_reporter_ops,
647b7e93bb6SVladyslav Tarasiuk 						       MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
648baf6dfdbSAya Levin 	if (IS_ERR(reporter)) {
649de8650a8SEran Ben Elisha 		netdev_warn(priv->netdev,
650de8650a8SEran Ben Elisha 			    "Failed to create tx reporter, err = %ld\n",
651baf6dfdbSAya Levin 			    PTR_ERR(reporter));
652b3ea4c4fSEran Ben Elisha 		return;
6537f7cc235SAya Levin 	}
654baf6dfdbSAya Levin 	priv->tx_reporter = reporter;
655de8650a8SEran Ben Elisha }
656de8650a8SEran Ben Elisha 
65706293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
658de8650a8SEran Ben Elisha {
659baf6dfdbSAya Levin 	if (!priv->tx_reporter)
660de8650a8SEran Ben Elisha 		return;
661de8650a8SEran Ben Elisha 
6629f167327SJiri Pirko 	devlink_health_reporter_destroy(priv->tx_reporter);
6637a9fb35eSRoi Dayan 	priv->tx_reporter = NULL;
664de8650a8SEran Ben Elisha }
665