1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */ 2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */ 3de8650a8SEran Ben Elisha 44edc17fdSAya Levin #include "health.h" 5145e5637SEran Ben Elisha #include "en/ptp.h" 6de8650a8SEran Ben Elisha 7de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 8de8650a8SEran Ben Elisha { 9e74e28aeSAya Levin unsigned long exp_time = jiffies + 10e74e28aeSAya Levin msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); 11de8650a8SEran Ben Elisha 12de8650a8SEran Ben Elisha while (time_before(jiffies, exp_time)) { 13de8650a8SEran Ben Elisha if (sq->cc == sq->pc) 14de8650a8SEran Ben Elisha return 0; 15de8650a8SEran Ben Elisha 16de8650a8SEran Ben Elisha msleep(20); 17de8650a8SEran Ben Elisha } 18de8650a8SEran Ben Elisha 194ad40d8eSEran Ben Elisha netdev_err(sq->netdev, 20de8650a8SEran Ben Elisha "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", 21de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 22de8650a8SEran Ben Elisha 23de8650a8SEran Ben Elisha return -ETIMEDOUT; 24de8650a8SEran Ben Elisha } 25de8650a8SEran Ben Elisha 26de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 27de8650a8SEran Ben Elisha { 28de8650a8SEran Ben Elisha WARN_ONCE(sq->cc != sq->pc, 29de8650a8SEran Ben Elisha "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", 30de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 31de8650a8SEran Ben Elisha sq->cc = 0; 32de8650a8SEran Ben Elisha sq->dma_fifo_cc = 0; 33de8650a8SEran Ben Elisha sq->pc = 0; 34de8650a8SEran Ben Elisha } 35de8650a8SEran Ben Elisha 36c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) 37de8650a8SEran Ben Elisha { 38c50de4afSAya Levin struct mlx5_core_dev *mdev; 39c50de4afSAya Levin struct net_device *dev; 40c50de4afSAya Levin struct mlx5e_txqsq *sq; 41de8650a8SEran Ben Elisha u8 state; 42de8650a8SEran Ben Elisha int err; 43de8650a8SEran Ben Elisha 44c50de4afSAya Levin sq = ctx; 454ad40d8eSEran Ben Elisha mdev = sq->mdev; 464ad40d8eSEran Ben Elisha dev = sq->netdev; 47c50de4afSAya Levin 48c50de4afSAya Levin if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 49c50de4afSAya Levin return 0; 50c50de4afSAya Levin 51de8650a8SEran Ben Elisha err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 52de8650a8SEran Ben Elisha if (err) { 53de8650a8SEran Ben Elisha netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", 54de8650a8SEran Ben Elisha sq->sqn, err); 55276d197eSAya Levin goto out; 56de8650a8SEran Ben Elisha } 57de8650a8SEran Ben Elisha 58d9a2fcf5SAya Levin if (state != MLX5_SQC_STATE_ERR) 59276d197eSAya Levin goto out; 60de8650a8SEran Ben Elisha 61de8650a8SEran Ben Elisha mlx5e_tx_disable_queue(sq->txq); 62de8650a8SEran Ben Elisha 63de8650a8SEran Ben Elisha err = mlx5e_wait_for_sq_flush(sq); 64de8650a8SEran Ben Elisha if (err) 65276d197eSAya Levin goto out; 66de8650a8SEran Ben Elisha 67de8650a8SEran Ben Elisha /* At this point, no new packets will arrive from the stack as TXQ is 68de8650a8SEran Ben Elisha * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all 69de8650a8SEran Ben Elisha * pending WQEs. SQ can safely reset the SQ. 70de8650a8SEran Ben Elisha */ 71de8650a8SEran Ben Elisha 724ad40d8eSEran Ben Elisha err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); 73de8650a8SEran Ben Elisha if (err) 74276d197eSAya Levin goto out; 75de8650a8SEran Ben Elisha 76de8650a8SEran Ben Elisha mlx5e_reset_txqsq_cc_pc(sq); 77de8650a8SEran Ben Elisha sq->stats->recover++; 78276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 79de8650a8SEran Ben Elisha mlx5e_activate_txqsq(sq); 80de8650a8SEran Ben Elisha 81de8650a8SEran Ben Elisha return 0; 82276d197eSAya Levin out: 83276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 84276d197eSAya Levin return err; 85de8650a8SEran Ben Elisha } 86de8650a8SEran Ben Elisha 87e6205564SAya Levin struct mlx5e_tx_timeout_ctx { 88e6205564SAya Levin struct mlx5e_txqsq *sq; 89e6205564SAya Levin signed int status; 90e6205564SAya Levin }; 91e6205564SAya Levin 92c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx) 937d91126bSEran Ben Elisha { 94e6205564SAya Levin struct mlx5e_tx_timeout_ctx *to_ctx; 95e6205564SAya Levin struct mlx5e_priv *priv; 96c50de4afSAya Levin struct mlx5_eq_comp *eq; 97c50de4afSAya Levin struct mlx5e_txqsq *sq; 98c50de4afSAya Levin int err; 997d91126bSEran Ben Elisha 100e6205564SAya Levin to_ctx = ctx; 101e6205564SAya Levin sq = to_ctx->sq; 102c50de4afSAya Levin eq = sq->cq.mcq.eq; 1034ad40d8eSEran Ben Elisha priv = sq->priv; 1044ad40d8eSEran Ben Elisha err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 105e6205564SAya Levin if (!err) { 106e6205564SAya Levin to_ctx->status = 0; /* this sq recovered */ 107e6205564SAya Levin return err; 108e6205564SAya Levin } 109e6205564SAya Levin 110e6205564SAya Levin err = mlx5e_safe_reopen_channels(priv); 111e6205564SAya Levin if (!err) { 112e6205564SAya Levin to_ctx->status = 1; /* all channels recovered */ 113e6205564SAya Levin return err; 114e6205564SAya Levin } 115e6205564SAya Levin 116e6205564SAya Levin to_ctx->status = err; 1177d91126bSEran Ben Elisha clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 118e6205564SAya Levin netdev_err(priv->netdev, 119e6205564SAya Levin "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 120e6205564SAya Levin err); 1217d91126bSEran Ben Elisha 122c50de4afSAya Levin return err; 1237d91126bSEran Ben Elisha } 1247d91126bSEran Ben Elisha 125de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function. 126de8650a8SEran Ben Elisha * It can cause a dead lock or a read-after-free. 127de8650a8SEran Ben Elisha */ 128c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 129de8650a8SEran Ben Elisha { 130c50de4afSAya Levin return err_ctx->recover(err_ctx->ctx); 131de8650a8SEran Ben Elisha } 132de8650a8SEran Ben Elisha 133de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, 134e7a98105SJiri Pirko void *context, 135e7a98105SJiri Pirko struct netlink_ext_ack *extack) 136de8650a8SEran Ben Elisha { 137de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 138c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx = context; 139de8650a8SEran Ben Elisha 140de8650a8SEran Ben Elisha return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : 141c50de4afSAya Levin mlx5e_health_recover_channels(priv); 142de8650a8SEran Ben Elisha } 143de8650a8SEran Ben Elisha 144de8650a8SEran Ben Elisha static int 145145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, 1462d708887SAya Levin struct mlx5e_txqsq *sq, int tc) 147de8650a8SEran Ben Elisha { 148dd921fd2SAya Levin bool stopped = netif_xmit_stopped(sq->txq); 1494ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 150dd921fd2SAya Levin u8 state; 151de8650a8SEran Ben Elisha int err; 152de8650a8SEran Ben Elisha 153dd921fd2SAya Levin err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); 154dd921fd2SAya Levin if (err) 155dd921fd2SAya Levin return err; 156dd921fd2SAya Levin 1572d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); 1582d708887SAya Levin if (err) 1592d708887SAya Levin return err; 1602d708887SAya Levin 1612d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); 1622d708887SAya Levin if (err) 1632d708887SAya Levin return err; 1642d708887SAya Levin 165dd921fd2SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); 166de8650a8SEran Ben Elisha if (err) 167de8650a8SEran Ben Elisha return err; 168de8650a8SEran Ben Elisha 169de8650a8SEran Ben Elisha err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); 170de8650a8SEran Ben Elisha if (err) 171de8650a8SEran Ben Elisha return err; 172de8650a8SEran Ben Elisha 173de8650a8SEran Ben Elisha err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); 174de8650a8SEran Ben Elisha if (err) 175de8650a8SEran Ben Elisha return err; 176de8650a8SEran Ben Elisha 1772d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); 1782d708887SAya Levin if (err) 1792d708887SAya Levin return err; 1802d708887SAya Levin 1812d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); 1822d708887SAya Levin if (err) 1832d708887SAya Levin return err; 1842d708887SAya Levin 185d5cbedd7SAya Levin err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); 1862bf09e60SAya Levin if (err) 1872bf09e60SAya Levin return err; 1882bf09e60SAya Levin 189145e5637SEran Ben Elisha return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); 190145e5637SEran Ben Elisha } 191145e5637SEran Ben Elisha 192145e5637SEran Ben Elisha static int 193145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, 194145e5637SEran Ben Elisha struct mlx5e_txqsq *sq, int tc) 195145e5637SEran Ben Elisha { 196145e5637SEran Ben Elisha int err; 197145e5637SEran Ben Elisha 198145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 199145e5637SEran Ben Elisha if (err) 200145e5637SEran Ben Elisha return err; 201145e5637SEran Ben Elisha 202145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); 203145e5637SEran Ben Elisha if (err) 204145e5637SEran Ben Elisha return err; 205145e5637SEran Ben Elisha 206145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); 20756837c2aSAya Levin if (err) 20856837c2aSAya Levin return err; 20956837c2aSAya Levin 210de8650a8SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 211de8650a8SEran Ben Elisha if (err) 212de8650a8SEran Ben Elisha return err; 213de8650a8SEran Ben Elisha 214de8650a8SEran Ben Elisha return 0; 215de8650a8SEran Ben Elisha } 216de8650a8SEran Ben Elisha 217145e5637SEran Ben Elisha static int 218145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, 219145e5637SEran Ben Elisha struct mlx5e_ptpsq *ptpsq, int tc) 220145e5637SEran Ben Elisha { 221145e5637SEran Ben Elisha int err; 222145e5637SEran Ben Elisha 223145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 224145e5637SEran Ben Elisha if (err) 225145e5637SEran Ben Elisha return err; 226145e5637SEran Ben Elisha 227145e5637SEran Ben Elisha err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 228145e5637SEran Ben Elisha if (err) 229145e5637SEran Ben Elisha return err; 230145e5637SEran Ben Elisha 231*1880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); 232*1880bc4eSEran Ben Elisha if (err) 233*1880bc4eSEran Ben Elisha return err; 234*1880bc4eSEran Ben Elisha 235*1880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 236*1880bc4eSEran Ben Elisha if (err) 237*1880bc4eSEran Ben Elisha return err; 238*1880bc4eSEran Ben Elisha 239*1880bc4eSEran Ben Elisha err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); 240*1880bc4eSEran Ben Elisha if (err) 241*1880bc4eSEran Ben Elisha return err; 242*1880bc4eSEran Ben Elisha 243*1880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 244145e5637SEran Ben Elisha if (err) 245145e5637SEran Ben Elisha return err; 246145e5637SEran Ben Elisha 247145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 248145e5637SEran Ben Elisha if (err) 249145e5637SEran Ben Elisha return err; 250145e5637SEran Ben Elisha 251145e5637SEran Ben Elisha return 0; 252145e5637SEran Ben Elisha } 253145e5637SEran Ben Elisha 254145e5637SEran Ben Elisha static int 255145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, 256145e5637SEran Ben Elisha struct mlx5e_txqsq *txqsq) 257145e5637SEran Ben Elisha { 258145e5637SEran Ben Elisha u32 sq_stride, sq_sz; 259145e5637SEran Ben Elisha int err; 260145e5637SEran Ben Elisha 261145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 262145e5637SEran Ben Elisha if (err) 263145e5637SEran Ben Elisha return err; 264145e5637SEran Ben Elisha 265145e5637SEran Ben Elisha sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); 266145e5637SEran Ben Elisha sq_stride = MLX5_SEND_WQE_BB; 267145e5637SEran Ben Elisha 268145e5637SEran Ben Elisha err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); 269145e5637SEran Ben Elisha if (err) 270145e5637SEran Ben Elisha return err; 271145e5637SEran Ben Elisha 272145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); 273145e5637SEran Ben Elisha if (err) 274145e5637SEran Ben Elisha return err; 275145e5637SEran Ben Elisha 276145e5637SEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); 277145e5637SEran Ben Elisha if (err) 278145e5637SEran Ben Elisha return err; 279145e5637SEran Ben Elisha 280145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 281145e5637SEran Ben Elisha } 282145e5637SEran Ben Elisha 283145e5637SEran Ben Elisha static int 284*1880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, 285*1880bc4eSEran Ben Elisha struct mlx5e_ptpsq *ptpsq) 286*1880bc4eSEran Ben Elisha { 287*1880bc4eSEran Ben Elisha int err; 288*1880bc4eSEran Ben Elisha 289*1880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 290*1880bc4eSEran Ben Elisha if (err) 291*1880bc4eSEran Ben Elisha return err; 292*1880bc4eSEran Ben Elisha 293*1880bc4eSEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); 294*1880bc4eSEran Ben Elisha if (err) 295*1880bc4eSEran Ben Elisha return err; 296*1880bc4eSEran Ben Elisha 297*1880bc4eSEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 298*1880bc4eSEran Ben Elisha } 299*1880bc4eSEran Ben Elisha 300*1880bc4eSEran Ben Elisha static int 301145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 302145e5637SEran Ben Elisha struct devlink_fmsg *fmsg) 303145e5637SEran Ben Elisha { 304145e5637SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 305145e5637SEran Ben Elisha struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; 306145e5637SEran Ben Elisha struct mlx5e_ptpsq *generic_ptpsq; 307145e5637SEran Ben Elisha int err; 308145e5637SEran Ben Elisha 309145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); 310145e5637SEran Ben Elisha if (err) 311145e5637SEran Ben Elisha return err; 312145e5637SEran Ben Elisha 313145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); 314145e5637SEran Ben Elisha if (err) 315145e5637SEran Ben Elisha return err; 316145e5637SEran Ben Elisha 317145e5637SEran Ben Elisha generic_ptpsq = priv->channels.port_ptp ? 318145e5637SEran Ben Elisha &priv->channels.port_ptp->ptpsq[0] : 319145e5637SEran Ben Elisha NULL; 320145e5637SEran Ben Elisha if (!generic_ptpsq) 321145e5637SEran Ben Elisha goto out; 322145e5637SEran Ben Elisha 323145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 324145e5637SEran Ben Elisha if (err) 325145e5637SEran Ben Elisha return err; 326145e5637SEran Ben Elisha 327145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); 328145e5637SEran Ben Elisha if (err) 329145e5637SEran Ben Elisha return err; 330145e5637SEran Ben Elisha 331*1880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); 332*1880bc4eSEran Ben Elisha if (err) 333*1880bc4eSEran Ben Elisha return err; 334*1880bc4eSEran Ben Elisha 335145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 336145e5637SEran Ben Elisha if (err) 337145e5637SEran Ben Elisha return err; 338145e5637SEran Ben Elisha 339145e5637SEran Ben Elisha out: 340145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 341145e5637SEran Ben Elisha } 342145e5637SEran Ben Elisha 343de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, 344e7a98105SJiri Pirko struct devlink_fmsg *fmsg, 345e7a98105SJiri Pirko struct netlink_ext_ack *extack) 346de8650a8SEran Ben Elisha { 347de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 348145e5637SEran Ben Elisha struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp; 3492d708887SAya Levin 3502d708887SAya Levin int i, tc, err = 0; 351de8650a8SEran Ben Elisha 352de8650a8SEran Ben Elisha mutex_lock(&priv->state_lock); 353de8650a8SEran Ben Elisha 354de8650a8SEran Ben Elisha if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 355de8650a8SEran Ben Elisha goto unlock; 356de8650a8SEran Ben Elisha 357145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); 3582d708887SAya Levin if (err) 3592d708887SAya Levin goto unlock; 3602d708887SAya Levin 361de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 362de8650a8SEran Ben Elisha if (err) 363de8650a8SEran Ben Elisha goto unlock; 364de8650a8SEran Ben Elisha 3652d708887SAya Levin for (i = 0; i < priv->channels.num; i++) { 3662d708887SAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 367de8650a8SEran Ben Elisha 3682d708887SAya Levin for (tc = 0; tc < priv->channels.params.num_tc; tc++) { 3692d708887SAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 3702d708887SAya Levin 3712d708887SAya Levin err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); 372de8650a8SEran Ben Elisha if (err) 37399d31cbdSAya Levin goto unlock; 374de8650a8SEran Ben Elisha } 3752d708887SAya Levin } 376145e5637SEran Ben Elisha 377145e5637SEran Ben Elisha if (!ptp_ch) 378145e5637SEran Ben Elisha goto close_sqs_nest; 379145e5637SEran Ben Elisha 380145e5637SEran Ben Elisha for (tc = 0; tc < priv->channels.params.num_tc; tc++) { 381145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, 382145e5637SEran Ben Elisha &ptp_ch->ptpsq[tc], 383145e5637SEran Ben Elisha tc); 384145e5637SEran Ben Elisha if (err) 385145e5637SEran Ben Elisha goto unlock; 386145e5637SEran Ben Elisha } 387145e5637SEran Ben Elisha 388145e5637SEran Ben Elisha close_sqs_nest: 389de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_end(fmsg); 390de8650a8SEran Ben Elisha if (err) 391de8650a8SEran Ben Elisha goto unlock; 392de8650a8SEran Ben Elisha 393de8650a8SEran Ben Elisha unlock: 394de8650a8SEran Ben Elisha mutex_unlock(&priv->state_lock); 395de8650a8SEran Ben Elisha return err; 396de8650a8SEran Ben Elisha } 397de8650a8SEran Ben Elisha 3985f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 3995f29458bSAya Levin void *ctx) 4005f29458bSAya Levin { 4015f29458bSAya Levin struct mlx5_rsc_key key = {}; 4025f29458bSAya Levin struct mlx5e_txqsq *sq = ctx; 4035f29458bSAya Levin int err; 4045f29458bSAya Levin 4055f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4065f29458bSAya Levin return 0; 4075f29458bSAya Levin 408d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4095f29458bSAya Levin if (err) 4105f29458bSAya Levin return err; 4115f29458bSAya Levin 4125f29458bSAya Levin key.size = PAGE_SIZE; 4135f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4145f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4155f29458bSAya Levin if (err) 4165f29458bSAya Levin return err; 4175f29458bSAya Levin 418d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4195f29458bSAya Levin if (err) 4205f29458bSAya Levin return err; 4215f29458bSAya Levin 422d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 4235f29458bSAya Levin if (err) 4245f29458bSAya Levin return err; 4255f29458bSAya Levin 426d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 4275f29458bSAya Levin if (err) 4285f29458bSAya Levin return err; 4295f29458bSAya Levin 4305f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 4315f29458bSAya Levin key.index1 = sq->sqn; 4325f29458bSAya Levin key.num_of_obj1 = 1; 4335f29458bSAya Levin 4345f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4355f29458bSAya Levin if (err) 4365f29458bSAya Levin return err; 4375f29458bSAya Levin 438d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4395f29458bSAya Levin if (err) 4405f29458bSAya Levin return err; 4415f29458bSAya Levin 442d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 4435f29458bSAya Levin if (err) 4445f29458bSAya Levin return err; 4455f29458bSAya Levin 4465f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 4475f29458bSAya Levin key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 4485f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4495f29458bSAya Levin if (err) 4505f29458bSAya Levin return err; 4515f29458bSAya Levin 452d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4535f29458bSAya Levin if (err) 4545f29458bSAya Levin return err; 4555f29458bSAya Levin 456d5cbedd7SAya Levin return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4575f29458bSAya Levin } 4585f29458bSAya Levin 4595f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, 4605f29458bSAya Levin struct devlink_fmsg *fmsg) 4615f29458bSAya Levin { 462145e5637SEran Ben Elisha struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp; 4635f29458bSAya Levin struct mlx5_rsc_key key = {}; 4645f29458bSAya Levin int i, tc, err; 4655f29458bSAya Levin 4665f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4675f29458bSAya Levin return 0; 4685f29458bSAya Levin 469d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4705f29458bSAya Levin if (err) 4715f29458bSAya Levin return err; 4725f29458bSAya Levin 4735f29458bSAya Levin key.size = PAGE_SIZE; 4745f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4755f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4765f29458bSAya Levin if (err) 4775f29458bSAya Levin return err; 4785f29458bSAya Levin 479d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4805f29458bSAya Levin if (err) 4815f29458bSAya Levin return err; 4825f29458bSAya Levin 4835f29458bSAya Levin err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 4845f29458bSAya Levin if (err) 4855f29458bSAya Levin return err; 4865f29458bSAya Levin 4875f29458bSAya Levin for (i = 0; i < priv->channels.num; i++) { 4885f29458bSAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 4895f29458bSAya Levin 4905f29458bSAya Levin for (tc = 0; tc < priv->channels.params.num_tc; tc++) { 4915f29458bSAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 4925f29458bSAya Levin 4935f29458bSAya Levin err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); 4945f29458bSAya Levin if (err) 4955f29458bSAya Levin return err; 4965f29458bSAya Levin } 4975f29458bSAya Levin } 498145e5637SEran Ben Elisha 499145e5637SEran Ben Elisha if (ptp_ch) { 500145e5637SEran Ben Elisha for (tc = 0; tc < priv->channels.params.num_tc; tc++) { 501145e5637SEran Ben Elisha struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; 502145e5637SEran Ben Elisha 503145e5637SEran Ben Elisha err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); 504145e5637SEran Ben Elisha if (err) 505145e5637SEran Ben Elisha return err; 506145e5637SEran Ben Elisha } 507145e5637SEran Ben Elisha } 508145e5637SEran Ben Elisha 5095f29458bSAya Levin return devlink_fmsg_arr_pair_nest_end(fmsg); 5105f29458bSAya Levin } 5115f29458bSAya Levin 5125f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 5135f29458bSAya Levin struct mlx5e_err_ctx *err_ctx, 5145f29458bSAya Levin struct devlink_fmsg *fmsg) 5155f29458bSAya Levin { 5165f29458bSAya Levin return err_ctx->dump(priv, fmsg, err_ctx->ctx); 5175f29458bSAya Levin } 5185f29458bSAya Levin 5195f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, 5205f29458bSAya Levin struct devlink_fmsg *fmsg, void *context, 5215f29458bSAya Levin struct netlink_ext_ack *extack) 5225f29458bSAya Levin { 5235f29458bSAya Levin struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 5245f29458bSAya Levin struct mlx5e_err_ctx *err_ctx = context; 5255f29458bSAya Levin 5265f29458bSAya Levin return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 5275f29458bSAya Levin mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); 5285f29458bSAya Levin } 5295f29458bSAya Levin 5300a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) 5310a56be3cSAya Levin { 5320a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 5334ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5340a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5350a56be3cSAya Levin 5360a56be3cSAya Levin err_ctx.ctx = sq; 5370a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; 5385f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 539b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); 5400a56be3cSAya Levin 5410a56be3cSAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 5420a56be3cSAya Levin } 5430a56be3cSAya Levin 5440a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) 5450a56be3cSAya Levin { 5460a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 547e6205564SAya Levin struct mlx5e_tx_timeout_ctx to_ctx = {}; 5484ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5490a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5500a56be3cSAya Levin 551e6205564SAya Levin to_ctx.sq = sq; 552e6205564SAya Levin err_ctx.ctx = &to_ctx; 5530a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 5545f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 555b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), 556b21aef7eSJoe Perches "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", 5574ad40d8eSEran Ben Elisha sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 5580a56be3cSAya Levin jiffies_to_usecs(jiffies - sq->txq->trans_start)); 5590a56be3cSAya Levin 560e6205564SAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 561e6205564SAya Levin return to_ctx.status; 5620a56be3cSAya Levin } 5630a56be3cSAya Levin 564de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { 565de8650a8SEran Ben Elisha .name = "tx", 566de8650a8SEran Ben Elisha .recover = mlx5e_tx_reporter_recover, 567de8650a8SEran Ben Elisha .diagnose = mlx5e_tx_reporter_diagnose, 5685f29458bSAya Levin .dump = mlx5e_tx_reporter_dump, 569de8650a8SEran Ben Elisha }; 570de8650a8SEran Ben Elisha 571de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 572de8650a8SEran Ben Elisha 573b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) 574de8650a8SEran Ben Elisha { 575baf6dfdbSAya Levin struct devlink_health_reporter *reporter; 576de8650a8SEran Ben Elisha 577b7e93bb6SVladyslav Tarasiuk reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_tx_reporter_ops, 578b7e93bb6SVladyslav Tarasiuk MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); 579baf6dfdbSAya Levin if (IS_ERR(reporter)) { 580de8650a8SEran Ben Elisha netdev_warn(priv->netdev, 581de8650a8SEran Ben Elisha "Failed to create tx reporter, err = %ld\n", 582baf6dfdbSAya Levin PTR_ERR(reporter)); 583b3ea4c4fSEran Ben Elisha return; 5847f7cc235SAya Levin } 585baf6dfdbSAya Levin priv->tx_reporter = reporter; 586de8650a8SEran Ben Elisha } 587de8650a8SEran Ben Elisha 58806293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) 589de8650a8SEran Ben Elisha { 590baf6dfdbSAya Levin if (!priv->tx_reporter) 591de8650a8SEran Ben Elisha return; 592de8650a8SEran Ben Elisha 593b7e93bb6SVladyslav Tarasiuk devlink_port_health_reporter_destroy(priv->tx_reporter); 594de8650a8SEran Ben Elisha } 595