1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */
2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */
3de8650a8SEran Ben Elisha 
44edc17fdSAya Levin #include "health.h"
5de8650a8SEran Ben Elisha 
6de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
7de8650a8SEran Ben Elisha {
8e74e28aeSAya Levin 	unsigned long exp_time = jiffies +
9e74e28aeSAya Levin 				 msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC);
10de8650a8SEran Ben Elisha 
11de8650a8SEran Ben Elisha 	while (time_before(jiffies, exp_time)) {
12de8650a8SEran Ben Elisha 		if (sq->cc == sq->pc)
13de8650a8SEran Ben Elisha 			return 0;
14de8650a8SEran Ben Elisha 
15de8650a8SEran Ben Elisha 		msleep(20);
16de8650a8SEran Ben Elisha 	}
17de8650a8SEran Ben Elisha 
18de8650a8SEran Ben Elisha 	netdev_err(sq->channel->netdev,
19de8650a8SEran Ben Elisha 		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
20de8650a8SEran Ben Elisha 		   sq->sqn, sq->cc, sq->pc);
21de8650a8SEran Ben Elisha 
22de8650a8SEran Ben Elisha 	return -ETIMEDOUT;
23de8650a8SEran Ben Elisha }
24de8650a8SEran Ben Elisha 
25de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
26de8650a8SEran Ben Elisha {
27de8650a8SEran Ben Elisha 	WARN_ONCE(sq->cc != sq->pc,
28de8650a8SEran Ben Elisha 		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
29de8650a8SEran Ben Elisha 		  sq->sqn, sq->cc, sq->pc);
30de8650a8SEran Ben Elisha 	sq->cc = 0;
31de8650a8SEran Ben Elisha 	sq->dma_fifo_cc = 0;
32de8650a8SEran Ben Elisha 	sq->pc = 0;
33de8650a8SEran Ben Elisha }
34de8650a8SEran Ben Elisha 
35c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
36de8650a8SEran Ben Elisha {
37c50de4afSAya Levin 	struct mlx5_core_dev *mdev;
38c50de4afSAya Levin 	struct net_device *dev;
39c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
40de8650a8SEran Ben Elisha 	u8 state;
41de8650a8SEran Ben Elisha 	int err;
42de8650a8SEran Ben Elisha 
43c50de4afSAya Levin 	sq = ctx;
44c50de4afSAya Levin 	mdev = sq->channel->mdev;
45c50de4afSAya Levin 	dev = sq->channel->netdev;
46c50de4afSAya Levin 
47c50de4afSAya Levin 	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
48c50de4afSAya Levin 		return 0;
49c50de4afSAya Levin 
50de8650a8SEran Ben Elisha 	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
51de8650a8SEran Ben Elisha 	if (err) {
52de8650a8SEran Ben Elisha 		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
53de8650a8SEran Ben Elisha 			   sq->sqn, err);
54276d197eSAya Levin 		goto out;
55de8650a8SEran Ben Elisha 	}
56de8650a8SEran Ben Elisha 
57d9a2fcf5SAya Levin 	if (state != MLX5_SQC_STATE_ERR)
58276d197eSAya Levin 		goto out;
59de8650a8SEran Ben Elisha 
60de8650a8SEran Ben Elisha 	mlx5e_tx_disable_queue(sq->txq);
61de8650a8SEran Ben Elisha 
62de8650a8SEran Ben Elisha 	err = mlx5e_wait_for_sq_flush(sq);
63de8650a8SEran Ben Elisha 	if (err)
64276d197eSAya Levin 		goto out;
65de8650a8SEran Ben Elisha 
66de8650a8SEran Ben Elisha 	/* At this point, no new packets will arrive from the stack as TXQ is
67de8650a8SEran Ben Elisha 	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
68de8650a8SEran Ben Elisha 	 * pending WQEs. SQ can safely reset the SQ.
69de8650a8SEran Ben Elisha 	 */
70de8650a8SEran Ben Elisha 
71c50de4afSAya Levin 	err = mlx5e_health_sq_to_ready(sq->channel, sq->sqn);
72de8650a8SEran Ben Elisha 	if (err)
73276d197eSAya Levin 		goto out;
74de8650a8SEran Ben Elisha 
75de8650a8SEran Ben Elisha 	mlx5e_reset_txqsq_cc_pc(sq);
76de8650a8SEran Ben Elisha 	sq->stats->recover++;
77276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
78de8650a8SEran Ben Elisha 	mlx5e_activate_txqsq(sq);
79de8650a8SEran Ben Elisha 
80de8650a8SEran Ben Elisha 	return 0;
81276d197eSAya Levin out:
82276d197eSAya Levin 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
83276d197eSAya Levin 	return err;
84de8650a8SEran Ben Elisha }
85de8650a8SEran Ben Elisha 
86e6205564SAya Levin struct mlx5e_tx_timeout_ctx {
87e6205564SAya Levin 	struct mlx5e_txqsq *sq;
88e6205564SAya Levin 	signed int status;
89e6205564SAya Levin };
90e6205564SAya Levin 
91c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx)
927d91126bSEran Ben Elisha {
93e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx *to_ctx;
94e6205564SAya Levin 	struct mlx5e_priv *priv;
95c50de4afSAya Levin 	struct mlx5_eq_comp *eq;
96c50de4afSAya Levin 	struct mlx5e_txqsq *sq;
97c50de4afSAya Levin 	int err;
987d91126bSEran Ben Elisha 
99e6205564SAya Levin 	to_ctx = ctx;
100e6205564SAya Levin 	sq = to_ctx->sq;
101c50de4afSAya Levin 	eq = sq->cq.mcq.eq;
102e6205564SAya Levin 	priv = sq->channel->priv;
103c50de4afSAya Levin 	err = mlx5e_health_channel_eq_recover(eq, sq->channel);
104e6205564SAya Levin 	if (!err) {
105e6205564SAya Levin 		to_ctx->status = 0; /* this sq recovered */
106e6205564SAya Levin 		return err;
107e6205564SAya Levin 	}
108e6205564SAya Levin 
109e6205564SAya Levin 	err = mlx5e_safe_reopen_channels(priv);
110e6205564SAya Levin 	if (!err) {
111e6205564SAya Levin 		to_ctx->status = 1; /* all channels recovered */
112e6205564SAya Levin 		return err;
113e6205564SAya Levin 	}
114e6205564SAya Levin 
115e6205564SAya Levin 	to_ctx->status = err;
1167d91126bSEran Ben Elisha 	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
117e6205564SAya Levin 	netdev_err(priv->netdev,
118e6205564SAya Levin 		   "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
119e6205564SAya Levin 		   err);
1207d91126bSEran Ben Elisha 
121c50de4afSAya Levin 	return err;
1227d91126bSEran Ben Elisha }
1237d91126bSEran Ben Elisha 
124de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function.
125de8650a8SEran Ben Elisha  * It can cause a dead lock or a read-after-free.
126de8650a8SEran Ben Elisha  */
127c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
128de8650a8SEran Ben Elisha {
129c50de4afSAya Levin 	return err_ctx->recover(err_ctx->ctx);
130de8650a8SEran Ben Elisha }
131de8650a8SEran Ben Elisha 
132de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
133e7a98105SJiri Pirko 				     void *context,
134e7a98105SJiri Pirko 				     struct netlink_ext_ack *extack)
135de8650a8SEran Ben Elisha {
136de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
137c50de4afSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
138de8650a8SEran Ben Elisha 
139de8650a8SEran Ben Elisha 	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
140c50de4afSAya Levin 			 mlx5e_health_recover_channels(priv);
141de8650a8SEran Ben Elisha }
142de8650a8SEran Ben Elisha 
143de8650a8SEran Ben Elisha static int
144de8650a8SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
1452d708887SAya Levin 					struct mlx5e_txqsq *sq, int tc)
146de8650a8SEran Ben Elisha {
147dd921fd2SAya Levin 	struct mlx5e_priv *priv = sq->channel->priv;
148dd921fd2SAya Levin 	bool stopped = netif_xmit_stopped(sq->txq);
149dd921fd2SAya Levin 	u8 state;
150de8650a8SEran Ben Elisha 	int err;
151de8650a8SEran Ben Elisha 
152dd921fd2SAya Levin 	err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
153dd921fd2SAya Levin 	if (err)
154dd921fd2SAya Levin 		return err;
155dd921fd2SAya Levin 
156de8650a8SEran Ben Elisha 	err = devlink_fmsg_obj_nest_start(fmsg);
157de8650a8SEran Ben Elisha 	if (err)
158de8650a8SEran Ben Elisha 		return err;
159de8650a8SEran Ben Elisha 
1602d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
1612d708887SAya Levin 	if (err)
1622d708887SAya Levin 		return err;
1632d708887SAya Levin 
1642d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
1652d708887SAya Levin 	if (err)
1662d708887SAya Levin 		return err;
1672d708887SAya Levin 
1682d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
1692d708887SAya Levin 	if (err)
1702d708887SAya Levin 		return err;
1712d708887SAya Levin 
172dd921fd2SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
173de8650a8SEran Ben Elisha 	if (err)
174de8650a8SEran Ben Elisha 		return err;
175de8650a8SEran Ben Elisha 
176de8650a8SEran Ben Elisha 	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
177de8650a8SEran Ben Elisha 	if (err)
178de8650a8SEran Ben Elisha 		return err;
179de8650a8SEran Ben Elisha 
180de8650a8SEran Ben Elisha 	err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
181de8650a8SEran Ben Elisha 	if (err)
182de8650a8SEran Ben Elisha 		return err;
183de8650a8SEran Ben Elisha 
1842d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
1852d708887SAya Levin 	if (err)
1862d708887SAya Levin 		return err;
1872d708887SAya Levin 
1882d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
1892d708887SAya Levin 	if (err)
1902d708887SAya Levin 		return err;
1912d708887SAya Levin 
192d5cbedd7SAya Levin 	err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
1932bf09e60SAya Levin 	if (err)
1942bf09e60SAya Levin 		return err;
1952bf09e60SAya Levin 
19656837c2aSAya Levin 	err = mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg);
19756837c2aSAya Levin 	if (err)
19856837c2aSAya Levin 		return err;
19956837c2aSAya Levin 
200de8650a8SEran Ben Elisha 	err = devlink_fmsg_obj_nest_end(fmsg);
201de8650a8SEran Ben Elisha 	if (err)
202de8650a8SEran Ben Elisha 		return err;
203de8650a8SEran Ben Elisha 
204de8650a8SEran Ben Elisha 	return 0;
205de8650a8SEran Ben Elisha }
206de8650a8SEran Ben Elisha 
207de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
208e7a98105SJiri Pirko 				      struct devlink_fmsg *fmsg,
209e7a98105SJiri Pirko 				      struct netlink_ext_ack *extack)
210de8650a8SEran Ben Elisha {
211de8650a8SEran Ben Elisha 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
2122d708887SAya Levin 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
2132d708887SAya Levin 	u32 sq_stride, sq_sz;
2142d708887SAya Levin 
2152d708887SAya Levin 	int i, tc, err = 0;
216de8650a8SEran Ben Elisha 
217de8650a8SEran Ben Elisha 	mutex_lock(&priv->state_lock);
218de8650a8SEran Ben Elisha 
219de8650a8SEran Ben Elisha 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
220de8650a8SEran Ben Elisha 		goto unlock;
221de8650a8SEran Ben Elisha 
2222d708887SAya Levin 	sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq);
2232d708887SAya Levin 	sq_stride = MLX5_SEND_WQE_BB;
2242d708887SAya Levin 
225d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config");
2262d708887SAya Levin 	if (err)
2272d708887SAya Levin 		goto unlock;
2282d708887SAya Levin 
229d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
2302d708887SAya Levin 	if (err)
2312d708887SAya Levin 		goto unlock;
2322d708887SAya Levin 
2332d708887SAya Levin 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
2342d708887SAya Levin 	if (err)
2352d708887SAya Levin 		goto unlock;
2362d708887SAya Levin 
2372d708887SAya Levin 	err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
2382d708887SAya Levin 	if (err)
2392d708887SAya Levin 		goto unlock;
2402d708887SAya Levin 
241d5cbedd7SAya Levin 	err = mlx5e_health_cq_common_diag_fmsg(&generic_sq->cq, fmsg);
2422bf09e60SAya Levin 	if (err)
2432bf09e60SAya Levin 		goto unlock;
2442bf09e60SAya Levin 
245d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
2462d708887SAya Levin 	if (err)
2472d708887SAya Levin 		goto unlock;
2482d708887SAya Levin 
249d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
2502d708887SAya Levin 	if (err)
2512d708887SAya Levin 		goto unlock;
2522d708887SAya Levin 
253de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
254de8650a8SEran Ben Elisha 	if (err)
255de8650a8SEran Ben Elisha 		goto unlock;
256de8650a8SEran Ben Elisha 
2572d708887SAya Levin 	for (i = 0; i < priv->channels.num; i++) {
2582d708887SAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
259de8650a8SEran Ben Elisha 
2602d708887SAya Levin 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
2612d708887SAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
2622d708887SAya Levin 
2632d708887SAya Levin 			err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
264de8650a8SEran Ben Elisha 			if (err)
26599d31cbdSAya Levin 				goto unlock;
266de8650a8SEran Ben Elisha 		}
2672d708887SAya Levin 	}
268de8650a8SEran Ben Elisha 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
269de8650a8SEran Ben Elisha 	if (err)
270de8650a8SEran Ben Elisha 		goto unlock;
271de8650a8SEran Ben Elisha 
272de8650a8SEran Ben Elisha unlock:
273de8650a8SEran Ben Elisha 	mutex_unlock(&priv->state_lock);
274de8650a8SEran Ben Elisha 	return err;
275de8650a8SEran Ben Elisha }
276de8650a8SEran Ben Elisha 
2775f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
2785f29458bSAya Levin 				     void *ctx)
2795f29458bSAya Levin {
2805f29458bSAya Levin 	struct mlx5_rsc_key key = {};
2815f29458bSAya Levin 	struct mlx5e_txqsq *sq = ctx;
2825f29458bSAya Levin 	int err;
2835f29458bSAya Levin 
2845f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
2855f29458bSAya Levin 		return 0;
2865f29458bSAya Levin 
287d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
2885f29458bSAya Levin 	if (err)
2895f29458bSAya Levin 		return err;
2905f29458bSAya Levin 
2915f29458bSAya Levin 	key.size = PAGE_SIZE;
2925f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
2935f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
2945f29458bSAya Levin 	if (err)
2955f29458bSAya Levin 		return err;
2965f29458bSAya Levin 
297d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
2985f29458bSAya Levin 	if (err)
2995f29458bSAya Levin 		return err;
3005f29458bSAya Levin 
301d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
3025f29458bSAya Levin 	if (err)
3035f29458bSAya Levin 		return err;
3045f29458bSAya Levin 
305d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC");
3065f29458bSAya Levin 	if (err)
3075f29458bSAya Levin 		return err;
3085f29458bSAya Levin 
3095f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
3105f29458bSAya Levin 	key.index1 = sq->sqn;
3115f29458bSAya Levin 	key.num_of_obj1 = 1;
3125f29458bSAya Levin 
3135f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
3145f29458bSAya Levin 	if (err)
3155f29458bSAya Levin 		return err;
3165f29458bSAya Levin 
317d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3185f29458bSAya Levin 	if (err)
3195f29458bSAya Levin 		return err;
3205f29458bSAya Levin 
321d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff");
3225f29458bSAya Levin 	if (err)
3235f29458bSAya Levin 		return err;
3245f29458bSAya Levin 
3255f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
3265f29458bSAya Levin 	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
3275f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
3285f29458bSAya Levin 	if (err)
3295f29458bSAya Levin 		return err;
3305f29458bSAya Levin 
331d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3325f29458bSAya Levin 	if (err)
3335f29458bSAya Levin 		return err;
3345f29458bSAya Levin 
335d5cbedd7SAya Levin 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3365f29458bSAya Levin }
3375f29458bSAya Levin 
3385f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
3395f29458bSAya Levin 					  struct devlink_fmsg *fmsg)
3405f29458bSAya Levin {
3415f29458bSAya Levin 	struct mlx5_rsc_key key = {};
3425f29458bSAya Levin 	int i, tc, err;
3435f29458bSAya Levin 
3445f29458bSAya Levin 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
3455f29458bSAya Levin 		return 0;
3465f29458bSAya Levin 
347d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice");
3485f29458bSAya Levin 	if (err)
3495f29458bSAya Levin 		return err;
3505f29458bSAya Levin 
3515f29458bSAya Levin 	key.size = PAGE_SIZE;
3525f29458bSAya Levin 	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
3535f29458bSAya Levin 	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
3545f29458bSAya Levin 	if (err)
3555f29458bSAya Levin 		return err;
3565f29458bSAya Levin 
357d5cbedd7SAya Levin 	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
3585f29458bSAya Levin 	if (err)
3595f29458bSAya Levin 		return err;
3605f29458bSAya Levin 
3615f29458bSAya Levin 	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
3625f29458bSAya Levin 	if (err)
3635f29458bSAya Levin 		return err;
3645f29458bSAya Levin 
3655f29458bSAya Levin 	for (i = 0; i < priv->channels.num; i++) {
3665f29458bSAya Levin 		struct mlx5e_channel *c = priv->channels.c[i];
3675f29458bSAya Levin 
3685f29458bSAya Levin 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
3695f29458bSAya Levin 			struct mlx5e_txqsq *sq = &c->sq[tc];
3705f29458bSAya Levin 
3715f29458bSAya Levin 			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
3725f29458bSAya Levin 			if (err)
3735f29458bSAya Levin 				return err;
3745f29458bSAya Levin 		}
3755f29458bSAya Levin 	}
3765f29458bSAya Levin 	return devlink_fmsg_arr_pair_nest_end(fmsg);
3775f29458bSAya Levin }
3785f29458bSAya Levin 
3795f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
3805f29458bSAya Levin 					   struct mlx5e_err_ctx *err_ctx,
3815f29458bSAya Levin 					   struct devlink_fmsg *fmsg)
3825f29458bSAya Levin {
3835f29458bSAya Levin 	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
3845f29458bSAya Levin }
3855f29458bSAya Levin 
3865f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
3875f29458bSAya Levin 				  struct devlink_fmsg *fmsg, void *context,
3885f29458bSAya Levin 				  struct netlink_ext_ack *extack)
3895f29458bSAya Levin {
3905f29458bSAya Levin 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
3915f29458bSAya Levin 	struct mlx5e_err_ctx *err_ctx = context;
3925f29458bSAya Levin 
3935f29458bSAya Levin 	return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
3945f29458bSAya Levin 			 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
3955f29458bSAya Levin }
3965f29458bSAya Levin 
3970a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
3980a56be3cSAya Levin {
3990a56be3cSAya Levin 	struct mlx5e_priv *priv = sq->channel->priv;
4000a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
4010a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
4020a56be3cSAya Levin 
4030a56be3cSAya Levin 	err_ctx.ctx = sq;
4040a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
4055f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
406b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn);
4070a56be3cSAya Levin 
4080a56be3cSAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
4090a56be3cSAya Levin }
4100a56be3cSAya Levin 
4110a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
4120a56be3cSAya Levin {
4130a56be3cSAya Levin 	struct mlx5e_priv *priv = sq->channel->priv;
4140a56be3cSAya Levin 	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
415e6205564SAya Levin 	struct mlx5e_tx_timeout_ctx to_ctx = {};
4160a56be3cSAya Levin 	struct mlx5e_err_ctx err_ctx = {};
4170a56be3cSAya Levin 
418e6205564SAya Levin 	to_ctx.sq = sq;
419e6205564SAya Levin 	err_ctx.ctx = &to_ctx;
4200a56be3cSAya Levin 	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
4215f29458bSAya Levin 	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
422b21aef7eSJoe Perches 	snprintf(err_str, sizeof(err_str),
423b21aef7eSJoe Perches 		 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
4240a56be3cSAya Levin 		 sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
4250a56be3cSAya Levin 		 jiffies_to_usecs(jiffies - sq->txq->trans_start));
4260a56be3cSAya Levin 
427e6205564SAya Levin 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
428e6205564SAya Levin 	return to_ctx.status;
4290a56be3cSAya Levin }
4300a56be3cSAya Levin 
431de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
432de8650a8SEran Ben Elisha 		.name = "tx",
433de8650a8SEran Ben Elisha 		.recover = mlx5e_tx_reporter_recover,
434de8650a8SEran Ben Elisha 		.diagnose = mlx5e_tx_reporter_diagnose,
4355f29458bSAya Levin 		.dump = mlx5e_tx_reporter_dump,
436de8650a8SEran Ben Elisha };
437de8650a8SEran Ben Elisha 
438de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
439de8650a8SEran Ben Elisha 
440b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
441de8650a8SEran Ben Elisha {
442baf6dfdbSAya Levin 	struct devlink_health_reporter *reporter;
443de8650a8SEran Ben Elisha 	struct mlx5_core_dev *mdev = priv->mdev;
444c50de4afSAya Levin 	struct devlink *devlink;
445de8650a8SEran Ben Elisha 
446c50de4afSAya Levin 	devlink = priv_to_devlink(mdev);
447baf6dfdbSAya Levin 	reporter =
448de8650a8SEran Ben Elisha 		devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
449de8650a8SEran Ben Elisha 					       MLX5_REPORTER_TX_GRACEFUL_PERIOD,
450ba7d16c7SEran Ben Elisha 					       priv);
451baf6dfdbSAya Levin 	if (IS_ERR(reporter)) {
452de8650a8SEran Ben Elisha 		netdev_warn(priv->netdev,
453de8650a8SEran Ben Elisha 			    "Failed to create tx reporter, err = %ld\n",
454baf6dfdbSAya Levin 			    PTR_ERR(reporter));
455b3ea4c4fSEran Ben Elisha 		return;
4567f7cc235SAya Levin 	}
457baf6dfdbSAya Levin 	priv->tx_reporter = reporter;
458de8650a8SEran Ben Elisha }
459de8650a8SEran Ben Elisha 
46006293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
461de8650a8SEran Ben Elisha {
462baf6dfdbSAya Levin 	if (!priv->tx_reporter)
463de8650a8SEran Ben Elisha 		return;
464de8650a8SEran Ben Elisha 
465de8650a8SEran Ben Elisha 	devlink_health_reporter_destroy(priv->tx_reporter);
466de8650a8SEran Ben Elisha }
467