1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */ 2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */ 3de8650a8SEran Ben Elisha 44edc17fdSAya Levin #include "health.h" 5*145e5637SEran Ben Elisha #include "en/ptp.h" 6de8650a8SEran Ben Elisha 7de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 8de8650a8SEran Ben Elisha { 9e74e28aeSAya Levin unsigned long exp_time = jiffies + 10e74e28aeSAya Levin msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); 11de8650a8SEran Ben Elisha 12de8650a8SEran Ben Elisha while (time_before(jiffies, exp_time)) { 13de8650a8SEran Ben Elisha if (sq->cc == sq->pc) 14de8650a8SEran Ben Elisha return 0; 15de8650a8SEran Ben Elisha 16de8650a8SEran Ben Elisha msleep(20); 17de8650a8SEran Ben Elisha } 18de8650a8SEran Ben Elisha 194ad40d8eSEran Ben Elisha netdev_err(sq->netdev, 20de8650a8SEran Ben Elisha "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", 21de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 22de8650a8SEran Ben Elisha 23de8650a8SEran Ben Elisha return -ETIMEDOUT; 24de8650a8SEran Ben Elisha } 25de8650a8SEran Ben Elisha 26de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 27de8650a8SEran Ben Elisha { 28de8650a8SEran Ben Elisha WARN_ONCE(sq->cc != sq->pc, 29de8650a8SEran Ben Elisha "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", 30de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 31de8650a8SEran Ben Elisha sq->cc = 0; 32de8650a8SEran Ben Elisha sq->dma_fifo_cc = 0; 33de8650a8SEran Ben Elisha sq->pc = 0; 34de8650a8SEran Ben Elisha } 35de8650a8SEran Ben Elisha 36c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) 37de8650a8SEran Ben Elisha { 38c50de4afSAya Levin struct mlx5_core_dev *mdev; 39c50de4afSAya Levin struct net_device *dev; 40c50de4afSAya Levin struct mlx5e_txqsq *sq; 41de8650a8SEran Ben Elisha u8 state; 42de8650a8SEran Ben Elisha int err; 43de8650a8SEran Ben Elisha 44c50de4afSAya Levin sq = ctx; 454ad40d8eSEran Ben Elisha mdev = sq->mdev; 464ad40d8eSEran Ben Elisha dev = sq->netdev; 47c50de4afSAya Levin 48c50de4afSAya Levin if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 49c50de4afSAya Levin return 0; 50c50de4afSAya Levin 51de8650a8SEran Ben Elisha err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 52de8650a8SEran Ben Elisha if (err) { 53de8650a8SEran Ben Elisha netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", 54de8650a8SEran Ben Elisha sq->sqn, err); 55276d197eSAya Levin goto out; 56de8650a8SEran Ben Elisha } 57de8650a8SEran Ben Elisha 58d9a2fcf5SAya Levin if (state != MLX5_SQC_STATE_ERR) 59276d197eSAya Levin goto out; 60de8650a8SEran Ben Elisha 61de8650a8SEran Ben Elisha mlx5e_tx_disable_queue(sq->txq); 62de8650a8SEran Ben Elisha 63de8650a8SEran Ben Elisha err = mlx5e_wait_for_sq_flush(sq); 64de8650a8SEran Ben Elisha if (err) 65276d197eSAya Levin goto out; 66de8650a8SEran Ben Elisha 67de8650a8SEran Ben Elisha /* At this point, no new packets will arrive from the stack as TXQ is 68de8650a8SEran Ben Elisha * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all 69de8650a8SEran Ben Elisha * pending WQEs. SQ can safely reset the SQ. 70de8650a8SEran Ben Elisha */ 71de8650a8SEran Ben Elisha 724ad40d8eSEran Ben Elisha err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); 73de8650a8SEran Ben Elisha if (err) 74276d197eSAya Levin goto out; 75de8650a8SEran Ben Elisha 76de8650a8SEran Ben Elisha mlx5e_reset_txqsq_cc_pc(sq); 77de8650a8SEran Ben Elisha sq->stats->recover++; 78276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 79de8650a8SEran Ben Elisha mlx5e_activate_txqsq(sq); 80de8650a8SEran Ben Elisha 81de8650a8SEran Ben Elisha return 0; 82276d197eSAya Levin out: 83276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 84276d197eSAya Levin return err; 85de8650a8SEran Ben Elisha } 86de8650a8SEran Ben Elisha 87e6205564SAya Levin struct mlx5e_tx_timeout_ctx { 88e6205564SAya Levin struct mlx5e_txqsq *sq; 89e6205564SAya Levin signed int status; 90e6205564SAya Levin }; 91e6205564SAya Levin 92c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx) 937d91126bSEran Ben Elisha { 94e6205564SAya Levin struct mlx5e_tx_timeout_ctx *to_ctx; 95e6205564SAya Levin struct mlx5e_priv *priv; 96c50de4afSAya Levin struct mlx5_eq_comp *eq; 97c50de4afSAya Levin struct mlx5e_txqsq *sq; 98c50de4afSAya Levin int err; 997d91126bSEran Ben Elisha 100e6205564SAya Levin to_ctx = ctx; 101e6205564SAya Levin sq = to_ctx->sq; 102c50de4afSAya Levin eq = sq->cq.mcq.eq; 1034ad40d8eSEran Ben Elisha priv = sq->priv; 1044ad40d8eSEran Ben Elisha err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 105e6205564SAya Levin if (!err) { 106e6205564SAya Levin to_ctx->status = 0; /* this sq recovered */ 107e6205564SAya Levin return err; 108e6205564SAya Levin } 109e6205564SAya Levin 110e6205564SAya Levin err = mlx5e_safe_reopen_channels(priv); 111e6205564SAya Levin if (!err) { 112e6205564SAya Levin to_ctx->status = 1; /* all channels recovered */ 113e6205564SAya Levin return err; 114e6205564SAya Levin } 115e6205564SAya Levin 116e6205564SAya Levin to_ctx->status = err; 1177d91126bSEran Ben Elisha clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 118e6205564SAya Levin netdev_err(priv->netdev, 119e6205564SAya Levin "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 120e6205564SAya Levin err); 1217d91126bSEran Ben Elisha 122c50de4afSAya Levin return err; 1237d91126bSEran Ben Elisha } 1247d91126bSEran Ben Elisha 125de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function. 126de8650a8SEran Ben Elisha * It can cause a dead lock or a read-after-free. 127de8650a8SEran Ben Elisha */ 128c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 129de8650a8SEran Ben Elisha { 130c50de4afSAya Levin return err_ctx->recover(err_ctx->ctx); 131de8650a8SEran Ben Elisha } 132de8650a8SEran Ben Elisha 133de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, 134e7a98105SJiri Pirko void *context, 135e7a98105SJiri Pirko struct netlink_ext_ack *extack) 136de8650a8SEran Ben Elisha { 137de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 138c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx = context; 139de8650a8SEran Ben Elisha 140de8650a8SEran Ben Elisha return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : 141c50de4afSAya Levin mlx5e_health_recover_channels(priv); 142de8650a8SEran Ben Elisha } 143de8650a8SEran Ben Elisha 144de8650a8SEran Ben Elisha static int 145*145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, 1462d708887SAya Levin struct mlx5e_txqsq *sq, int tc) 147de8650a8SEran Ben Elisha { 148dd921fd2SAya Levin bool stopped = netif_xmit_stopped(sq->txq); 1494ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 150dd921fd2SAya Levin u8 state; 151de8650a8SEran Ben Elisha int err; 152de8650a8SEran Ben Elisha 153dd921fd2SAya Levin err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); 154dd921fd2SAya Levin if (err) 155dd921fd2SAya Levin return err; 156dd921fd2SAya Levin 1572d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); 1582d708887SAya Levin if (err) 1592d708887SAya Levin return err; 1602d708887SAya Levin 1612d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); 1622d708887SAya Levin if (err) 1632d708887SAya Levin return err; 1642d708887SAya Levin 165dd921fd2SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); 166de8650a8SEran Ben Elisha if (err) 167de8650a8SEran Ben Elisha return err; 168de8650a8SEran Ben Elisha 169de8650a8SEran Ben Elisha err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); 170de8650a8SEran Ben Elisha if (err) 171de8650a8SEran Ben Elisha return err; 172de8650a8SEran Ben Elisha 173de8650a8SEran Ben Elisha err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); 174de8650a8SEran Ben Elisha if (err) 175de8650a8SEran Ben Elisha return err; 176de8650a8SEran Ben Elisha 1772d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); 1782d708887SAya Levin if (err) 1792d708887SAya Levin return err; 1802d708887SAya Levin 1812d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); 1822d708887SAya Levin if (err) 1832d708887SAya Levin return err; 1842d708887SAya Levin 185d5cbedd7SAya Levin err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); 1862bf09e60SAya Levin if (err) 1872bf09e60SAya Levin return err; 1882bf09e60SAya Levin 189*145e5637SEran Ben Elisha return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); 190*145e5637SEran Ben Elisha } 191*145e5637SEran Ben Elisha 192*145e5637SEran Ben Elisha static int 193*145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, 194*145e5637SEran Ben Elisha struct mlx5e_txqsq *sq, int tc) 195*145e5637SEran Ben Elisha { 196*145e5637SEran Ben Elisha int err; 197*145e5637SEran Ben Elisha 198*145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 199*145e5637SEran Ben Elisha if (err) 200*145e5637SEran Ben Elisha return err; 201*145e5637SEran Ben Elisha 202*145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); 203*145e5637SEran Ben Elisha if (err) 204*145e5637SEran Ben Elisha return err; 205*145e5637SEran Ben Elisha 206*145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); 20756837c2aSAya Levin if (err) 20856837c2aSAya Levin return err; 20956837c2aSAya Levin 210de8650a8SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 211de8650a8SEran Ben Elisha if (err) 212de8650a8SEran Ben Elisha return err; 213de8650a8SEran Ben Elisha 214de8650a8SEran Ben Elisha return 0; 215de8650a8SEran Ben Elisha } 216de8650a8SEran Ben Elisha 217*145e5637SEran Ben Elisha static int 218*145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, 219*145e5637SEran Ben Elisha struct mlx5e_ptpsq *ptpsq, int tc) 220*145e5637SEran Ben Elisha { 221*145e5637SEran Ben Elisha int err; 222*145e5637SEran Ben Elisha 223*145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 224*145e5637SEran Ben Elisha if (err) 225*145e5637SEran Ben Elisha return err; 226*145e5637SEran Ben Elisha 227*145e5637SEran Ben Elisha err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 228*145e5637SEran Ben Elisha if (err) 229*145e5637SEran Ben Elisha return err; 230*145e5637SEran Ben Elisha 231*145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, 232*145e5637SEran Ben Elisha &ptpsq->txqsq, 233*145e5637SEran Ben Elisha tc); 234*145e5637SEran Ben Elisha if (err) 235*145e5637SEran Ben Elisha return err; 236*145e5637SEran Ben Elisha 237*145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 238*145e5637SEran Ben Elisha if (err) 239*145e5637SEran Ben Elisha return err; 240*145e5637SEran Ben Elisha 241*145e5637SEran Ben Elisha return 0; 242*145e5637SEran Ben Elisha } 243*145e5637SEran Ben Elisha 244*145e5637SEran Ben Elisha static int 245*145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, 246*145e5637SEran Ben Elisha struct mlx5e_txqsq *txqsq) 247*145e5637SEran Ben Elisha { 248*145e5637SEran Ben Elisha u32 sq_stride, sq_sz; 249*145e5637SEran Ben Elisha int err; 250*145e5637SEran Ben Elisha 251*145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 252*145e5637SEran Ben Elisha if (err) 253*145e5637SEran Ben Elisha return err; 254*145e5637SEran Ben Elisha 255*145e5637SEran Ben Elisha sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); 256*145e5637SEran Ben Elisha sq_stride = MLX5_SEND_WQE_BB; 257*145e5637SEran Ben Elisha 258*145e5637SEran Ben Elisha err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); 259*145e5637SEran Ben Elisha if (err) 260*145e5637SEran Ben Elisha return err; 261*145e5637SEran Ben Elisha 262*145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); 263*145e5637SEran Ben Elisha if (err) 264*145e5637SEran Ben Elisha return err; 265*145e5637SEran Ben Elisha 266*145e5637SEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); 267*145e5637SEran Ben Elisha if (err) 268*145e5637SEran Ben Elisha return err; 269*145e5637SEran Ben Elisha 270*145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 271*145e5637SEran Ben Elisha } 272*145e5637SEran Ben Elisha 273*145e5637SEran Ben Elisha static int 274*145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 275*145e5637SEran Ben Elisha struct devlink_fmsg *fmsg) 276*145e5637SEran Ben Elisha { 277*145e5637SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 278*145e5637SEran Ben Elisha struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; 279*145e5637SEran Ben Elisha struct mlx5e_ptpsq *generic_ptpsq; 280*145e5637SEran Ben Elisha int err; 281*145e5637SEran Ben Elisha 282*145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); 283*145e5637SEran Ben Elisha if (err) 284*145e5637SEran Ben Elisha return err; 285*145e5637SEran Ben Elisha 286*145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); 287*145e5637SEran Ben Elisha if (err) 288*145e5637SEran Ben Elisha return err; 289*145e5637SEran Ben Elisha 290*145e5637SEran Ben Elisha generic_ptpsq = priv->channels.port_ptp ? 291*145e5637SEran Ben Elisha &priv->channels.port_ptp->ptpsq[0] : 292*145e5637SEran Ben Elisha NULL; 293*145e5637SEran Ben Elisha if (!generic_ptpsq) 294*145e5637SEran Ben Elisha goto out; 295*145e5637SEran Ben Elisha 296*145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 297*145e5637SEran Ben Elisha if (err) 298*145e5637SEran Ben Elisha return err; 299*145e5637SEran Ben Elisha 300*145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); 301*145e5637SEran Ben Elisha if (err) 302*145e5637SEran Ben Elisha return err; 303*145e5637SEran Ben Elisha 304*145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 305*145e5637SEran Ben Elisha if (err) 306*145e5637SEran Ben Elisha return err; 307*145e5637SEran Ben Elisha 308*145e5637SEran Ben Elisha out: 309*145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 310*145e5637SEran Ben Elisha } 311*145e5637SEran Ben Elisha 312de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, 313e7a98105SJiri Pirko struct devlink_fmsg *fmsg, 314e7a98105SJiri Pirko struct netlink_ext_ack *extack) 315de8650a8SEran Ben Elisha { 316de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 317*145e5637SEran Ben Elisha struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp; 3182d708887SAya Levin 3192d708887SAya Levin int i, tc, err = 0; 320de8650a8SEran Ben Elisha 321de8650a8SEran Ben Elisha mutex_lock(&priv->state_lock); 322de8650a8SEran Ben Elisha 323de8650a8SEran Ben Elisha if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 324de8650a8SEran Ben Elisha goto unlock; 325de8650a8SEran Ben Elisha 326*145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); 3272d708887SAya Levin if (err) 3282d708887SAya Levin goto unlock; 3292d708887SAya Levin 330de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 331de8650a8SEran Ben Elisha if (err) 332de8650a8SEran Ben Elisha goto unlock; 333de8650a8SEran Ben Elisha 3342d708887SAya Levin for (i = 0; i < priv->channels.num; i++) { 3352d708887SAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 336de8650a8SEran Ben Elisha 3372d708887SAya Levin for (tc = 0; tc < priv->channels.params.num_tc; tc++) { 3382d708887SAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 3392d708887SAya Levin 3402d708887SAya Levin err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); 341de8650a8SEran Ben Elisha if (err) 34299d31cbdSAya Levin goto unlock; 343de8650a8SEran Ben Elisha } 3442d708887SAya Levin } 345*145e5637SEran Ben Elisha 346*145e5637SEran Ben Elisha if (!ptp_ch) 347*145e5637SEran Ben Elisha goto close_sqs_nest; 348*145e5637SEran Ben Elisha 349*145e5637SEran Ben Elisha for (tc = 0; tc < priv->channels.params.num_tc; tc++) { 350*145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, 351*145e5637SEran Ben Elisha &ptp_ch->ptpsq[tc], 352*145e5637SEran Ben Elisha tc); 353*145e5637SEran Ben Elisha if (err) 354*145e5637SEran Ben Elisha goto unlock; 355*145e5637SEran Ben Elisha } 356*145e5637SEran Ben Elisha 357*145e5637SEran Ben Elisha close_sqs_nest: 358de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_end(fmsg); 359de8650a8SEran Ben Elisha if (err) 360de8650a8SEran Ben Elisha goto unlock; 361de8650a8SEran Ben Elisha 362de8650a8SEran Ben Elisha unlock: 363de8650a8SEran Ben Elisha mutex_unlock(&priv->state_lock); 364de8650a8SEran Ben Elisha return err; 365de8650a8SEran Ben Elisha } 366de8650a8SEran Ben Elisha 3675f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 3685f29458bSAya Levin void *ctx) 3695f29458bSAya Levin { 3705f29458bSAya Levin struct mlx5_rsc_key key = {}; 3715f29458bSAya Levin struct mlx5e_txqsq *sq = ctx; 3725f29458bSAya Levin int err; 3735f29458bSAya Levin 3745f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 3755f29458bSAya Levin return 0; 3765f29458bSAya Levin 377d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 3785f29458bSAya Levin if (err) 3795f29458bSAya Levin return err; 3805f29458bSAya Levin 3815f29458bSAya Levin key.size = PAGE_SIZE; 3825f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 3835f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 3845f29458bSAya Levin if (err) 3855f29458bSAya Levin return err; 3865f29458bSAya Levin 387d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 3885f29458bSAya Levin if (err) 3895f29458bSAya Levin return err; 3905f29458bSAya Levin 391d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 3925f29458bSAya Levin if (err) 3935f29458bSAya Levin return err; 3945f29458bSAya Levin 395d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 3965f29458bSAya Levin if (err) 3975f29458bSAya Levin return err; 3985f29458bSAya Levin 3995f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 4005f29458bSAya Levin key.index1 = sq->sqn; 4015f29458bSAya Levin key.num_of_obj1 = 1; 4025f29458bSAya Levin 4035f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4045f29458bSAya Levin if (err) 4055f29458bSAya Levin return err; 4065f29458bSAya Levin 407d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4085f29458bSAya Levin if (err) 4095f29458bSAya Levin return err; 4105f29458bSAya Levin 411d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 4125f29458bSAya Levin if (err) 4135f29458bSAya Levin return err; 4145f29458bSAya Levin 4155f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 4165f29458bSAya Levin key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 4175f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4185f29458bSAya Levin if (err) 4195f29458bSAya Levin return err; 4205f29458bSAya Levin 421d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4225f29458bSAya Levin if (err) 4235f29458bSAya Levin return err; 4245f29458bSAya Levin 425d5cbedd7SAya Levin return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4265f29458bSAya Levin } 4275f29458bSAya Levin 4285f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, 4295f29458bSAya Levin struct devlink_fmsg *fmsg) 4305f29458bSAya Levin { 431*145e5637SEran Ben Elisha struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp; 4325f29458bSAya Levin struct mlx5_rsc_key key = {}; 4335f29458bSAya Levin int i, tc, err; 4345f29458bSAya Levin 4355f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4365f29458bSAya Levin return 0; 4375f29458bSAya Levin 438d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4395f29458bSAya Levin if (err) 4405f29458bSAya Levin return err; 4415f29458bSAya Levin 4425f29458bSAya Levin key.size = PAGE_SIZE; 4435f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4445f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4455f29458bSAya Levin if (err) 4465f29458bSAya Levin return err; 4475f29458bSAya Levin 448d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4495f29458bSAya Levin if (err) 4505f29458bSAya Levin return err; 4515f29458bSAya Levin 4525f29458bSAya Levin err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 4535f29458bSAya Levin if (err) 4545f29458bSAya Levin return err; 4555f29458bSAya Levin 4565f29458bSAya Levin for (i = 0; i < priv->channels.num; i++) { 4575f29458bSAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 4585f29458bSAya Levin 4595f29458bSAya Levin for (tc = 0; tc < priv->channels.params.num_tc; tc++) { 4605f29458bSAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 4615f29458bSAya Levin 4625f29458bSAya Levin err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); 4635f29458bSAya Levin if (err) 4645f29458bSAya Levin return err; 4655f29458bSAya Levin } 4665f29458bSAya Levin } 467*145e5637SEran Ben Elisha 468*145e5637SEran Ben Elisha if (ptp_ch) { 469*145e5637SEran Ben Elisha for (tc = 0; tc < priv->channels.params.num_tc; tc++) { 470*145e5637SEran Ben Elisha struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; 471*145e5637SEran Ben Elisha 472*145e5637SEran Ben Elisha err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); 473*145e5637SEran Ben Elisha if (err) 474*145e5637SEran Ben Elisha return err; 475*145e5637SEran Ben Elisha } 476*145e5637SEran Ben Elisha } 477*145e5637SEran Ben Elisha 4785f29458bSAya Levin return devlink_fmsg_arr_pair_nest_end(fmsg); 4795f29458bSAya Levin } 4805f29458bSAya Levin 4815f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 4825f29458bSAya Levin struct mlx5e_err_ctx *err_ctx, 4835f29458bSAya Levin struct devlink_fmsg *fmsg) 4845f29458bSAya Levin { 4855f29458bSAya Levin return err_ctx->dump(priv, fmsg, err_ctx->ctx); 4865f29458bSAya Levin } 4875f29458bSAya Levin 4885f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, 4895f29458bSAya Levin struct devlink_fmsg *fmsg, void *context, 4905f29458bSAya Levin struct netlink_ext_ack *extack) 4915f29458bSAya Levin { 4925f29458bSAya Levin struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 4935f29458bSAya Levin struct mlx5e_err_ctx *err_ctx = context; 4945f29458bSAya Levin 4955f29458bSAya Levin return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 4965f29458bSAya Levin mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); 4975f29458bSAya Levin } 4985f29458bSAya Levin 4990a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) 5000a56be3cSAya Levin { 5010a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 5024ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5030a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5040a56be3cSAya Levin 5050a56be3cSAya Levin err_ctx.ctx = sq; 5060a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; 5075f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 508b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); 5090a56be3cSAya Levin 5100a56be3cSAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 5110a56be3cSAya Levin } 5120a56be3cSAya Levin 5130a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) 5140a56be3cSAya Levin { 5150a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 516e6205564SAya Levin struct mlx5e_tx_timeout_ctx to_ctx = {}; 5174ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5180a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5190a56be3cSAya Levin 520e6205564SAya Levin to_ctx.sq = sq; 521e6205564SAya Levin err_ctx.ctx = &to_ctx; 5220a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 5235f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 524b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), 525b21aef7eSJoe Perches "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", 5264ad40d8eSEran Ben Elisha sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 5270a56be3cSAya Levin jiffies_to_usecs(jiffies - sq->txq->trans_start)); 5280a56be3cSAya Levin 529e6205564SAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 530e6205564SAya Levin return to_ctx.status; 5310a56be3cSAya Levin } 5320a56be3cSAya Levin 533de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { 534de8650a8SEran Ben Elisha .name = "tx", 535de8650a8SEran Ben Elisha .recover = mlx5e_tx_reporter_recover, 536de8650a8SEran Ben Elisha .diagnose = mlx5e_tx_reporter_diagnose, 5375f29458bSAya Levin .dump = mlx5e_tx_reporter_dump, 538de8650a8SEran Ben Elisha }; 539de8650a8SEran Ben Elisha 540de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 541de8650a8SEran Ben Elisha 542b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) 543de8650a8SEran Ben Elisha { 544baf6dfdbSAya Levin struct devlink_health_reporter *reporter; 545de8650a8SEran Ben Elisha 546b7e93bb6SVladyslav Tarasiuk reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_tx_reporter_ops, 547b7e93bb6SVladyslav Tarasiuk MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); 548baf6dfdbSAya Levin if (IS_ERR(reporter)) { 549de8650a8SEran Ben Elisha netdev_warn(priv->netdev, 550de8650a8SEran Ben Elisha "Failed to create tx reporter, err = %ld\n", 551baf6dfdbSAya Levin PTR_ERR(reporter)); 552b3ea4c4fSEran Ben Elisha return; 5537f7cc235SAya Levin } 554baf6dfdbSAya Levin priv->tx_reporter = reporter; 555de8650a8SEran Ben Elisha } 556de8650a8SEran Ben Elisha 55706293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) 558de8650a8SEran Ben Elisha { 559baf6dfdbSAya Levin if (!priv->tx_reporter) 560de8650a8SEran Ben Elisha return; 561de8650a8SEran Ben Elisha 562b7e93bb6SVladyslav Tarasiuk devlink_port_health_reporter_destroy(priv->tx_reporter); 563de8650a8SEran Ben Elisha } 564