1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */ 2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */ 3de8650a8SEran Ben Elisha 44edc17fdSAya Levin #include "health.h" 5145e5637SEran Ben Elisha #include "en/ptp.h" 6c27971d0SRoi Dayan #include "en/devlink.h" 732def412SAmir Tzin #include "lib/tout.h" 8de8650a8SEran Ben Elisha 9de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 10de8650a8SEran Ben Elisha { 1132def412SAmir Tzin struct mlx5_core_dev *dev = sq->mdev; 1232def412SAmir Tzin unsigned long exp_time; 1332def412SAmir Tzin 1432def412SAmir Tzin exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); 15de8650a8SEran Ben Elisha 16de8650a8SEran Ben Elisha while (time_before(jiffies, exp_time)) { 17de8650a8SEran Ben Elisha if (sq->cc == sq->pc) 18de8650a8SEran Ben Elisha return 0; 19de8650a8SEran Ben Elisha 20de8650a8SEran Ben Elisha msleep(20); 21de8650a8SEran Ben Elisha } 22de8650a8SEran Ben Elisha 234ad40d8eSEran Ben Elisha netdev_err(sq->netdev, 24de8650a8SEran Ben Elisha "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", 25de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 26de8650a8SEran Ben Elisha 27de8650a8SEran Ben Elisha return -ETIMEDOUT; 28de8650a8SEran Ben Elisha } 29de8650a8SEran Ben Elisha 30de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 31de8650a8SEran Ben Elisha { 32de8650a8SEran Ben Elisha WARN_ONCE(sq->cc != sq->pc, 33de8650a8SEran Ben Elisha "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", 34de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 35de8650a8SEran Ben Elisha sq->cc = 0; 36de8650a8SEran Ben Elisha sq->dma_fifo_cc = 0; 37de8650a8SEran Ben Elisha sq->pc = 0; 38de8650a8SEran Ben Elisha } 39de8650a8SEran Ben Elisha 40c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) 41de8650a8SEran Ben Elisha { 42c50de4afSAya Levin struct mlx5_core_dev *mdev; 43c50de4afSAya Levin struct net_device *dev; 44c50de4afSAya Levin struct mlx5e_txqsq *sq; 45de8650a8SEran Ben Elisha u8 state; 46de8650a8SEran Ben Elisha int err; 47de8650a8SEran Ben Elisha 48c50de4afSAya Levin sq = ctx; 494ad40d8eSEran Ben Elisha mdev = sq->mdev; 504ad40d8eSEran Ben Elisha dev = sq->netdev; 51c50de4afSAya Levin 52c50de4afSAya Levin if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 53c50de4afSAya Levin return 0; 54c50de4afSAya Levin 55de8650a8SEran Ben Elisha err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 56de8650a8SEran Ben Elisha if (err) { 57de8650a8SEran Ben Elisha netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", 58de8650a8SEran Ben Elisha sq->sqn, err); 59276d197eSAya Levin goto out; 60de8650a8SEran Ben Elisha } 61de8650a8SEran Ben Elisha 62d9a2fcf5SAya Levin if (state != MLX5_SQC_STATE_ERR) 63276d197eSAya Levin goto out; 64de8650a8SEran Ben Elisha 65de8650a8SEran Ben Elisha mlx5e_tx_disable_queue(sq->txq); 66de8650a8SEran Ben Elisha 67de8650a8SEran Ben Elisha err = mlx5e_wait_for_sq_flush(sq); 68de8650a8SEran Ben Elisha if (err) 69276d197eSAya Levin goto out; 70de8650a8SEran Ben Elisha 71de8650a8SEran Ben Elisha /* At this point, no new packets will arrive from the stack as TXQ is 72de8650a8SEran Ben Elisha * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all 73de8650a8SEran Ben Elisha * pending WQEs. SQ can safely reset the SQ. 74de8650a8SEran Ben Elisha */ 75de8650a8SEran Ben Elisha 764ad40d8eSEran Ben Elisha err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); 77de8650a8SEran Ben Elisha if (err) 78276d197eSAya Levin goto out; 79de8650a8SEran Ben Elisha 80de8650a8SEran Ben Elisha mlx5e_reset_txqsq_cc_pc(sq); 81de8650a8SEran Ben Elisha sq->stats->recover++; 82276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 83de8650a8SEran Ben Elisha mlx5e_activate_txqsq(sq); 8479efecb4SMaxim Mikityanskiy if (sq->channel) 8579efecb4SMaxim Mikityanskiy mlx5e_trigger_napi_icosq(sq->channel); 8679efecb4SMaxim Mikityanskiy else 8779efecb4SMaxim Mikityanskiy mlx5e_trigger_napi_sched(sq->cq.napi); 88de8650a8SEran Ben Elisha 89de8650a8SEran Ben Elisha return 0; 90276d197eSAya Levin out: 91276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 92276d197eSAya Levin return err; 93de8650a8SEran Ben Elisha } 94de8650a8SEran Ben Elisha 95e6205564SAya Levin struct mlx5e_tx_timeout_ctx { 96e6205564SAya Levin struct mlx5e_txqsq *sq; 97e6205564SAya Levin signed int status; 98e6205564SAya Levin }; 99e6205564SAya Levin 100c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx) 1017d91126bSEran Ben Elisha { 102e6205564SAya Levin struct mlx5e_tx_timeout_ctx *to_ctx; 103e6205564SAya Levin struct mlx5e_priv *priv; 104c50de4afSAya Levin struct mlx5_eq_comp *eq; 105c50de4afSAya Levin struct mlx5e_txqsq *sq; 106c50de4afSAya Levin int err; 1077d91126bSEran Ben Elisha 108e6205564SAya Levin to_ctx = ctx; 109e6205564SAya Levin sq = to_ctx->sq; 110c50de4afSAya Levin eq = sq->cq.mcq.eq; 1114ad40d8eSEran Ben Elisha priv = sq->priv; 1124ad40d8eSEran Ben Elisha err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 113e6205564SAya Levin if (!err) { 114e6205564SAya Levin to_ctx->status = 0; /* this sq recovered */ 115e6205564SAya Levin return err; 116e6205564SAya Levin } 117e6205564SAya Levin 118e6205564SAya Levin err = mlx5e_safe_reopen_channels(priv); 119e6205564SAya Levin if (!err) { 120e6205564SAya Levin to_ctx->status = 1; /* all channels recovered */ 121e6205564SAya Levin return err; 122e6205564SAya Levin } 123e6205564SAya Levin 124e6205564SAya Levin to_ctx->status = err; 1257d91126bSEran Ben Elisha clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 126e6205564SAya Levin netdev_err(priv->netdev, 127e6205564SAya Levin "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 128e6205564SAya Levin err); 1297d91126bSEran Ben Elisha 130c50de4afSAya Levin return err; 1317d91126bSEran Ben Elisha } 1327d91126bSEran Ben Elisha 133de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function. 134de8650a8SEran Ben Elisha * It can cause a dead lock or a read-after-free. 135de8650a8SEran Ben Elisha */ 136c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 137de8650a8SEran Ben Elisha { 138c50de4afSAya Levin return err_ctx->recover(err_ctx->ctx); 139de8650a8SEran Ben Elisha } 140de8650a8SEran Ben Elisha 141de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, 142e7a98105SJiri Pirko void *context, 143e7a98105SJiri Pirko struct netlink_ext_ack *extack) 144de8650a8SEran Ben Elisha { 145de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 146c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx = context; 147de8650a8SEran Ben Elisha 148de8650a8SEran Ben Elisha return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : 149c50de4afSAya Levin mlx5e_health_recover_channels(priv); 150de8650a8SEran Ben Elisha } 151de8650a8SEran Ben Elisha 152de8650a8SEran Ben Elisha static int 153145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, 1542d708887SAya Levin struct mlx5e_txqsq *sq, int tc) 155de8650a8SEran Ben Elisha { 156dd921fd2SAya Levin bool stopped = netif_xmit_stopped(sq->txq); 1574ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 158dd921fd2SAya Levin u8 state; 159de8650a8SEran Ben Elisha int err; 160de8650a8SEran Ben Elisha 161dd921fd2SAya Levin err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); 162dd921fd2SAya Levin if (err) 163dd921fd2SAya Levin return err; 164dd921fd2SAya Levin 1652d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); 1662d708887SAya Levin if (err) 1672d708887SAya Levin return err; 1682d708887SAya Levin 1692d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); 1702d708887SAya Levin if (err) 1712d708887SAya Levin return err; 1722d708887SAya Levin 173dd921fd2SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); 174de8650a8SEran Ben Elisha if (err) 175de8650a8SEran Ben Elisha return err; 176de8650a8SEran Ben Elisha 177de8650a8SEran Ben Elisha err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); 178de8650a8SEran Ben Elisha if (err) 179de8650a8SEran Ben Elisha return err; 180de8650a8SEran Ben Elisha 181de8650a8SEran Ben Elisha err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); 182de8650a8SEran Ben Elisha if (err) 183de8650a8SEran Ben Elisha return err; 184de8650a8SEran Ben Elisha 1852d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); 1862d708887SAya Levin if (err) 1872d708887SAya Levin return err; 1882d708887SAya Levin 1892d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); 1902d708887SAya Levin if (err) 1912d708887SAya Levin return err; 1922d708887SAya Levin 193d5cbedd7SAya Levin err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); 1942bf09e60SAya Levin if (err) 1952bf09e60SAya Levin return err; 1962bf09e60SAya Levin 197145e5637SEran Ben Elisha return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); 198145e5637SEran Ben Elisha } 199145e5637SEran Ben Elisha 200145e5637SEran Ben Elisha static int 201145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, 202145e5637SEran Ben Elisha struct mlx5e_txqsq *sq, int tc) 203145e5637SEran Ben Elisha { 204145e5637SEran Ben Elisha int err; 205145e5637SEran Ben Elisha 206145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 207145e5637SEran Ben Elisha if (err) 208145e5637SEran Ben Elisha return err; 209145e5637SEran Ben Elisha 210145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); 211145e5637SEran Ben Elisha if (err) 212145e5637SEran Ben Elisha return err; 213145e5637SEran Ben Elisha 214145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); 21556837c2aSAya Levin if (err) 21656837c2aSAya Levin return err; 21756837c2aSAya Levin 218de8650a8SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 219de8650a8SEran Ben Elisha if (err) 220de8650a8SEran Ben Elisha return err; 221de8650a8SEran Ben Elisha 222de8650a8SEran Ben Elisha return 0; 223de8650a8SEran Ben Elisha } 224de8650a8SEran Ben Elisha 225145e5637SEran Ben Elisha static int 226145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, 227145e5637SEran Ben Elisha struct mlx5e_ptpsq *ptpsq, int tc) 228145e5637SEran Ben Elisha { 229145e5637SEran Ben Elisha int err; 230145e5637SEran Ben Elisha 231145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 232145e5637SEran Ben Elisha if (err) 233145e5637SEran Ben Elisha return err; 234145e5637SEran Ben Elisha 235145e5637SEran Ben Elisha err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 236145e5637SEran Ben Elisha if (err) 237145e5637SEran Ben Elisha return err; 238145e5637SEran Ben Elisha 2391880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); 2401880bc4eSEran Ben Elisha if (err) 2411880bc4eSEran Ben Elisha return err; 2421880bc4eSEran Ben Elisha 2431880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 2441880bc4eSEran Ben Elisha if (err) 2451880bc4eSEran Ben Elisha return err; 2461880bc4eSEran Ben Elisha 2471880bc4eSEran Ben Elisha err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); 2481880bc4eSEran Ben Elisha if (err) 2491880bc4eSEran Ben Elisha return err; 2501880bc4eSEran Ben Elisha 2511880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 252145e5637SEran Ben Elisha if (err) 253145e5637SEran Ben Elisha return err; 254145e5637SEran Ben Elisha 255145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 256145e5637SEran Ben Elisha if (err) 257145e5637SEran Ben Elisha return err; 258145e5637SEran Ben Elisha 259145e5637SEran Ben Elisha return 0; 260145e5637SEran Ben Elisha } 261145e5637SEran Ben Elisha 262145e5637SEran Ben Elisha static int 263145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, 264145e5637SEran Ben Elisha struct mlx5e_txqsq *txqsq) 265145e5637SEran Ben Elisha { 266145e5637SEran Ben Elisha u32 sq_stride, sq_sz; 26795742c1cSAya Levin bool real_time; 268145e5637SEran Ben Elisha int err; 269145e5637SEran Ben Elisha 270145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 271145e5637SEran Ben Elisha if (err) 272145e5637SEran Ben Elisha return err; 273145e5637SEran Ben Elisha 27495742c1cSAya Levin real_time = mlx5_is_real_time_sq(txqsq->mdev); 275145e5637SEran Ben Elisha sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); 276145e5637SEran Ben Elisha sq_stride = MLX5_SEND_WQE_BB; 277145e5637SEran Ben Elisha 278145e5637SEran Ben Elisha err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); 279145e5637SEran Ben Elisha if (err) 280145e5637SEran Ben Elisha return err; 281145e5637SEran Ben Elisha 282145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); 283145e5637SEran Ben Elisha if (err) 284145e5637SEran Ben Elisha return err; 285145e5637SEran Ben Elisha 28695742c1cSAya Levin err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); 28795742c1cSAya Levin if (err) 28895742c1cSAya Levin return err; 28995742c1cSAya Levin 290145e5637SEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); 291145e5637SEran Ben Elisha if (err) 292145e5637SEran Ben Elisha return err; 293145e5637SEran Ben Elisha 294145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 295145e5637SEran Ben Elisha } 296145e5637SEran Ben Elisha 297145e5637SEran Ben Elisha static int 2981880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, 2991880bc4eSEran Ben Elisha struct mlx5e_ptpsq *ptpsq) 3001880bc4eSEran Ben Elisha { 3011880bc4eSEran Ben Elisha int err; 3021880bc4eSEran Ben Elisha 3031880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 3041880bc4eSEran Ben Elisha if (err) 3051880bc4eSEran Ben Elisha return err; 3061880bc4eSEran Ben Elisha 3071880bc4eSEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); 3081880bc4eSEran Ben Elisha if (err) 3091880bc4eSEran Ben Elisha return err; 3101880bc4eSEran Ben Elisha 3111880bc4eSEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 3121880bc4eSEran Ben Elisha } 3131880bc4eSEran Ben Elisha 3141880bc4eSEran Ben Elisha static int 315145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 316145e5637SEran Ben Elisha struct devlink_fmsg *fmsg) 317145e5637SEran Ben Elisha { 318145e5637SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 319145e5637SEran Ben Elisha struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; 32024c22dd0SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 321145e5637SEran Ben Elisha struct mlx5e_ptpsq *generic_ptpsq; 322145e5637SEran Ben Elisha int err; 323145e5637SEran Ben Elisha 324145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); 325145e5637SEran Ben Elisha if (err) 326145e5637SEran Ben Elisha return err; 327145e5637SEran Ben Elisha 328145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); 329145e5637SEran Ben Elisha if (err) 330145e5637SEran Ben Elisha return err; 331145e5637SEran Ben Elisha 33224c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 333145e5637SEran Ben Elisha goto out; 334145e5637SEran Ben Elisha 33524c22dd0SAya Levin generic_ptpsq = &ptp_ch->ptpsq[0]; 33624c22dd0SAya Levin 337145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 338145e5637SEran Ben Elisha if (err) 339145e5637SEran Ben Elisha return err; 340145e5637SEran Ben Elisha 341145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); 342145e5637SEran Ben Elisha if (err) 343145e5637SEran Ben Elisha return err; 344145e5637SEran Ben Elisha 3451880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); 3461880bc4eSEran Ben Elisha if (err) 3471880bc4eSEran Ben Elisha return err; 3481880bc4eSEran Ben Elisha 349145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 350145e5637SEran Ben Elisha if (err) 351145e5637SEran Ben Elisha return err; 352145e5637SEran Ben Elisha 353145e5637SEran Ben Elisha out: 354145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 355145e5637SEran Ben Elisha } 356145e5637SEran Ben Elisha 357de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, 358e7a98105SJiri Pirko struct devlink_fmsg *fmsg, 359e7a98105SJiri Pirko struct netlink_ext_ack *extack) 360de8650a8SEran Ben Elisha { 361de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 362b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 3632d708887SAya Levin 3642d708887SAya Levin int i, tc, err = 0; 365de8650a8SEran Ben Elisha 366de8650a8SEran Ben Elisha mutex_lock(&priv->state_lock); 367de8650a8SEran Ben Elisha 368de8650a8SEran Ben Elisha if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 369de8650a8SEran Ben Elisha goto unlock; 370de8650a8SEran Ben Elisha 371145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); 3722d708887SAya Levin if (err) 3732d708887SAya Levin goto unlock; 3742d708887SAya Levin 375de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 376de8650a8SEran Ben Elisha if (err) 377de8650a8SEran Ben Elisha goto unlock; 378de8650a8SEran Ben Elisha 3792d708887SAya Levin for (i = 0; i < priv->channels.num; i++) { 3802d708887SAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 381de8650a8SEran Ben Elisha 38286d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 3832d708887SAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 3842d708887SAya Levin 3852d708887SAya Levin err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); 386de8650a8SEran Ben Elisha if (err) 38799d31cbdSAya Levin goto unlock; 388de8650a8SEran Ben Elisha } 3892d708887SAya Levin } 390145e5637SEran Ben Elisha 39124c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 392145e5637SEran Ben Elisha goto close_sqs_nest; 393145e5637SEran Ben Elisha 39486d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 395145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, 396145e5637SEran Ben Elisha &ptp_ch->ptpsq[tc], 397145e5637SEran Ben Elisha tc); 398145e5637SEran Ben Elisha if (err) 399145e5637SEran Ben Elisha goto unlock; 400145e5637SEran Ben Elisha } 401145e5637SEran Ben Elisha 402145e5637SEran Ben Elisha close_sqs_nest: 403de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_end(fmsg); 404de8650a8SEran Ben Elisha if (err) 405de8650a8SEran Ben Elisha goto unlock; 406de8650a8SEran Ben Elisha 407de8650a8SEran Ben Elisha unlock: 408de8650a8SEran Ben Elisha mutex_unlock(&priv->state_lock); 409de8650a8SEran Ben Elisha return err; 410de8650a8SEran Ben Elisha } 411de8650a8SEran Ben Elisha 4125f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 4135f29458bSAya Levin void *ctx) 4145f29458bSAya Levin { 4155f29458bSAya Levin struct mlx5_rsc_key key = {}; 4165f29458bSAya Levin struct mlx5e_txqsq *sq = ctx; 4175f29458bSAya Levin int err; 4185f29458bSAya Levin 4195f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4205f29458bSAya Levin return 0; 4215f29458bSAya Levin 422d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4235f29458bSAya Levin if (err) 4245f29458bSAya Levin return err; 4255f29458bSAya Levin 4265f29458bSAya Levin key.size = PAGE_SIZE; 4275f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4285f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4295f29458bSAya Levin if (err) 4305f29458bSAya Levin return err; 4315f29458bSAya Levin 432d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4335f29458bSAya Levin if (err) 4345f29458bSAya Levin return err; 4355f29458bSAya Levin 436d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 4375f29458bSAya Levin if (err) 4385f29458bSAya Levin return err; 4395f29458bSAya Levin 440d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 4415f29458bSAya Levin if (err) 4425f29458bSAya Levin return err; 4435f29458bSAya Levin 4445f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 4455f29458bSAya Levin key.index1 = sq->sqn; 4465f29458bSAya Levin key.num_of_obj1 = 1; 4475f29458bSAya Levin 4485f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4495f29458bSAya Levin if (err) 4505f29458bSAya Levin return err; 4515f29458bSAya Levin 452d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4535f29458bSAya Levin if (err) 4545f29458bSAya Levin return err; 4555f29458bSAya Levin 456d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 4575f29458bSAya Levin if (err) 4585f29458bSAya Levin return err; 4595f29458bSAya Levin 4605f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 4615f29458bSAya Levin key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 4625f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4635f29458bSAya Levin if (err) 4645f29458bSAya Levin return err; 4655f29458bSAya Levin 466d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4675f29458bSAya Levin if (err) 4685f29458bSAya Levin return err; 4695f29458bSAya Levin 470d5cbedd7SAya Levin return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4715f29458bSAya Levin } 4725f29458bSAya Levin 473918fc385SAmir Tzin static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 474918fc385SAmir Tzin void *ctx) 475918fc385SAmir Tzin { 476918fc385SAmir Tzin struct mlx5e_tx_timeout_ctx *to_ctx = ctx; 477918fc385SAmir Tzin 478918fc385SAmir Tzin return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); 479918fc385SAmir Tzin } 480918fc385SAmir Tzin 4815f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, 4825f29458bSAya Levin struct devlink_fmsg *fmsg) 4835f29458bSAya Levin { 484b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 4855f29458bSAya Levin struct mlx5_rsc_key key = {}; 4865f29458bSAya Levin int i, tc, err; 4875f29458bSAya Levin 4885f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4895f29458bSAya Levin return 0; 4905f29458bSAya Levin 491d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4925f29458bSAya Levin if (err) 4935f29458bSAya Levin return err; 4945f29458bSAya Levin 4955f29458bSAya Levin key.size = PAGE_SIZE; 4965f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4975f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4985f29458bSAya Levin if (err) 4995f29458bSAya Levin return err; 5005f29458bSAya Levin 501d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 5025f29458bSAya Levin if (err) 5035f29458bSAya Levin return err; 5045f29458bSAya Levin 5055f29458bSAya Levin err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 5065f29458bSAya Levin if (err) 5075f29458bSAya Levin return err; 5085f29458bSAya Levin 5095f29458bSAya Levin for (i = 0; i < priv->channels.num; i++) { 5105f29458bSAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 5115f29458bSAya Levin 51286d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 5135f29458bSAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 5145f29458bSAya Levin 5155f29458bSAya Levin err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); 5165f29458bSAya Levin if (err) 5175f29458bSAya Levin return err; 5185f29458bSAya Levin } 5195f29458bSAya Levin } 520145e5637SEran Ben Elisha 52124c22dd0SAya Levin if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { 52286d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 523145e5637SEran Ben Elisha struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; 524145e5637SEran Ben Elisha 525145e5637SEran Ben Elisha err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); 526145e5637SEran Ben Elisha if (err) 527145e5637SEran Ben Elisha return err; 528145e5637SEran Ben Elisha } 529145e5637SEran Ben Elisha } 530145e5637SEran Ben Elisha 5315f29458bSAya Levin return devlink_fmsg_arr_pair_nest_end(fmsg); 5325f29458bSAya Levin } 5335f29458bSAya Levin 5345f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 5355f29458bSAya Levin struct mlx5e_err_ctx *err_ctx, 5365f29458bSAya Levin struct devlink_fmsg *fmsg) 5375f29458bSAya Levin { 5385f29458bSAya Levin return err_ctx->dump(priv, fmsg, err_ctx->ctx); 5395f29458bSAya Levin } 5405f29458bSAya Levin 5415f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, 5425f29458bSAya Levin struct devlink_fmsg *fmsg, void *context, 5435f29458bSAya Levin struct netlink_ext_ack *extack) 5445f29458bSAya Levin { 5455f29458bSAya Levin struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 5465f29458bSAya Levin struct mlx5e_err_ctx *err_ctx = context; 5475f29458bSAya Levin 5485f29458bSAya Levin return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 5495f29458bSAya Levin mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); 5505f29458bSAya Levin } 5515f29458bSAya Levin 5520a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) 5530a56be3cSAya Levin { 5540a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 5554ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5560a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5570a56be3cSAya Levin 5580a56be3cSAya Levin err_ctx.ctx = sq; 5590a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; 5605f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 561b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); 5620a56be3cSAya Levin 5630a56be3cSAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 5640a56be3cSAya Levin } 5650a56be3cSAya Levin 5660a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) 5670a56be3cSAya Levin { 5680a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 569e6205564SAya Levin struct mlx5e_tx_timeout_ctx to_ctx = {}; 5704ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5710a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5720a56be3cSAya Levin 573e6205564SAya Levin to_ctx.sq = sq; 574e6205564SAya Levin err_ctx.ctx = &to_ctx; 5750a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 576918fc385SAmir Tzin err_ctx.dump = mlx5e_tx_reporter_timeout_dump; 577b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), 578b21aef7eSJoe Perches "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", 5794ad40d8eSEran Ben Elisha sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 5805337824fSEric Dumazet jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start))); 5810a56be3cSAya Levin 582e6205564SAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 583e6205564SAya Levin return to_ctx.status; 5840a56be3cSAya Levin } 5850a56be3cSAya Levin 586de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { 587de8650a8SEran Ben Elisha .name = "tx", 588de8650a8SEran Ben Elisha .recover = mlx5e_tx_reporter_recover, 589de8650a8SEran Ben Elisha .diagnose = mlx5e_tx_reporter_diagnose, 5905f29458bSAya Levin .dump = mlx5e_tx_reporter_dump, 591de8650a8SEran Ben Elisha }; 592de8650a8SEran Ben Elisha 593de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 594de8650a8SEran Ben Elisha 595b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) 596de8650a8SEran Ben Elisha { 597baf6dfdbSAya Levin struct devlink_health_reporter *reporter; 598de8650a8SEran Ben Elisha 599*bc1536f3SJiri Pirko reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, 600*bc1536f3SJiri Pirko &mlx5_tx_reporter_ops, 601b7e93bb6SVladyslav Tarasiuk MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); 602baf6dfdbSAya Levin if (IS_ERR(reporter)) { 603de8650a8SEran Ben Elisha netdev_warn(priv->netdev, 604de8650a8SEran Ben Elisha "Failed to create tx reporter, err = %ld\n", 605baf6dfdbSAya Levin PTR_ERR(reporter)); 606b3ea4c4fSEran Ben Elisha return; 6077f7cc235SAya Levin } 608baf6dfdbSAya Levin priv->tx_reporter = reporter; 609de8650a8SEran Ben Elisha } 610de8650a8SEran Ben Elisha 61106293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) 612de8650a8SEran Ben Elisha { 613baf6dfdbSAya Levin if (!priv->tx_reporter) 614de8650a8SEran Ben Elisha return; 615de8650a8SEran Ben Elisha 6169f167327SJiri Pirko devlink_health_reporter_destroy(priv->tx_reporter); 6177a9fb35eSRoi Dayan priv->tx_reporter = NULL; 618de8650a8SEran Ben Elisha } 619