1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */ 2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */ 3de8650a8SEran Ben Elisha 44edc17fdSAya Levin #include "health.h" 5145e5637SEran Ben Elisha #include "en/ptp.h" 6c27971d0SRoi Dayan #include "en/devlink.h" 732def412SAmir Tzin #include "lib/tout.h" 8de8650a8SEran Ben Elisha 9de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 10de8650a8SEran Ben Elisha { 1132def412SAmir Tzin struct mlx5_core_dev *dev = sq->mdev; 1232def412SAmir Tzin unsigned long exp_time; 1332def412SAmir Tzin 1432def412SAmir Tzin exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); 15de8650a8SEran Ben Elisha 16de8650a8SEran Ben Elisha while (time_before(jiffies, exp_time)) { 17de8650a8SEran Ben Elisha if (sq->cc == sq->pc) 18de8650a8SEran Ben Elisha return 0; 19de8650a8SEran Ben Elisha 20de8650a8SEran Ben Elisha msleep(20); 21de8650a8SEran Ben Elisha } 22de8650a8SEran Ben Elisha 234ad40d8eSEran Ben Elisha netdev_err(sq->netdev, 24de8650a8SEran Ben Elisha "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", 25de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 26de8650a8SEran Ben Elisha 27de8650a8SEran Ben Elisha return -ETIMEDOUT; 28de8650a8SEran Ben Elisha } 29de8650a8SEran Ben Elisha 30de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 31de8650a8SEran Ben Elisha { 32de8650a8SEran Ben Elisha WARN_ONCE(sq->cc != sq->pc, 33de8650a8SEran Ben Elisha "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", 34de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 35de8650a8SEran Ben Elisha sq->cc = 0; 36de8650a8SEran Ben Elisha sq->dma_fifo_cc = 0; 37de8650a8SEran Ben Elisha sq->pc = 0; 38de8650a8SEran Ben Elisha } 39de8650a8SEran Ben Elisha 40c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) 41de8650a8SEran Ben Elisha { 42c50de4afSAya Levin struct mlx5_core_dev *mdev; 43c50de4afSAya Levin struct net_device *dev; 44c50de4afSAya Levin struct mlx5e_txqsq *sq; 45de8650a8SEran Ben Elisha u8 state; 46de8650a8SEran Ben Elisha int err; 47de8650a8SEran Ben Elisha 48c50de4afSAya Levin sq = ctx; 494ad40d8eSEran Ben Elisha mdev = sq->mdev; 504ad40d8eSEran Ben Elisha dev = sq->netdev; 51c50de4afSAya Levin 52c50de4afSAya Levin if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 53c50de4afSAya Levin return 0; 54c50de4afSAya Levin 55de8650a8SEran Ben Elisha err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 56de8650a8SEran Ben Elisha if (err) { 57de8650a8SEran Ben Elisha netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", 58de8650a8SEran Ben Elisha sq->sqn, err); 59276d197eSAya Levin goto out; 60de8650a8SEran Ben Elisha } 61de8650a8SEran Ben Elisha 62d9a2fcf5SAya Levin if (state != MLX5_SQC_STATE_ERR) 63276d197eSAya Levin goto out; 64de8650a8SEran Ben Elisha 65de8650a8SEran Ben Elisha mlx5e_tx_disable_queue(sq->txq); 66de8650a8SEran Ben Elisha 67de8650a8SEran Ben Elisha err = mlx5e_wait_for_sq_flush(sq); 68de8650a8SEran Ben Elisha if (err) 69276d197eSAya Levin goto out; 70de8650a8SEran Ben Elisha 71de8650a8SEran Ben Elisha /* At this point, no new packets will arrive from the stack as TXQ is 72de8650a8SEran Ben Elisha * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all 73de8650a8SEran Ben Elisha * pending WQEs. SQ can safely reset the SQ. 74de8650a8SEran Ben Elisha */ 75de8650a8SEran Ben Elisha 764ad40d8eSEran Ben Elisha err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); 77de8650a8SEran Ben Elisha if (err) 78276d197eSAya Levin goto out; 79de8650a8SEran Ben Elisha 80de8650a8SEran Ben Elisha mlx5e_reset_txqsq_cc_pc(sq); 81de8650a8SEran Ben Elisha sq->stats->recover++; 82276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 83de8650a8SEran Ben Elisha mlx5e_activate_txqsq(sq); 84de8650a8SEran Ben Elisha 85de8650a8SEran Ben Elisha return 0; 86276d197eSAya Levin out: 87276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 88276d197eSAya Levin return err; 89de8650a8SEran Ben Elisha } 90de8650a8SEran Ben Elisha 91e6205564SAya Levin struct mlx5e_tx_timeout_ctx { 92e6205564SAya Levin struct mlx5e_txqsq *sq; 93e6205564SAya Levin signed int status; 94e6205564SAya Levin }; 95e6205564SAya Levin 96c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx) 977d91126bSEran Ben Elisha { 98e6205564SAya Levin struct mlx5e_tx_timeout_ctx *to_ctx; 99e6205564SAya Levin struct mlx5e_priv *priv; 100c50de4afSAya Levin struct mlx5_eq_comp *eq; 101c50de4afSAya Levin struct mlx5e_txqsq *sq; 102c50de4afSAya Levin int err; 1037d91126bSEran Ben Elisha 104e6205564SAya Levin to_ctx = ctx; 105e6205564SAya Levin sq = to_ctx->sq; 106c50de4afSAya Levin eq = sq->cq.mcq.eq; 1074ad40d8eSEran Ben Elisha priv = sq->priv; 1084ad40d8eSEran Ben Elisha err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 109e6205564SAya Levin if (!err) { 110e6205564SAya Levin to_ctx->status = 0; /* this sq recovered */ 111e6205564SAya Levin return err; 112e6205564SAya Levin } 113e6205564SAya Levin 114e6205564SAya Levin err = mlx5e_safe_reopen_channels(priv); 115e6205564SAya Levin if (!err) { 116e6205564SAya Levin to_ctx->status = 1; /* all channels recovered */ 117e6205564SAya Levin return err; 118e6205564SAya Levin } 119e6205564SAya Levin 120e6205564SAya Levin to_ctx->status = err; 1217d91126bSEran Ben Elisha clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 122e6205564SAya Levin netdev_err(priv->netdev, 123e6205564SAya Levin "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 124e6205564SAya Levin err); 1257d91126bSEran Ben Elisha 126c50de4afSAya Levin return err; 1277d91126bSEran Ben Elisha } 1287d91126bSEran Ben Elisha 129de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function. 130de8650a8SEran Ben Elisha * It can cause a dead lock or a read-after-free. 131de8650a8SEran Ben Elisha */ 132c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 133de8650a8SEran Ben Elisha { 134c50de4afSAya Levin return err_ctx->recover(err_ctx->ctx); 135de8650a8SEran Ben Elisha } 136de8650a8SEran Ben Elisha 137de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, 138e7a98105SJiri Pirko void *context, 139e7a98105SJiri Pirko struct netlink_ext_ack *extack) 140de8650a8SEran Ben Elisha { 141de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 142c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx = context; 143de8650a8SEran Ben Elisha 144de8650a8SEran Ben Elisha return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : 145c50de4afSAya Levin mlx5e_health_recover_channels(priv); 146de8650a8SEran Ben Elisha } 147de8650a8SEran Ben Elisha 148de8650a8SEran Ben Elisha static int 149145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, 1502d708887SAya Levin struct mlx5e_txqsq *sq, int tc) 151de8650a8SEran Ben Elisha { 152dd921fd2SAya Levin bool stopped = netif_xmit_stopped(sq->txq); 1534ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 154dd921fd2SAya Levin u8 state; 155de8650a8SEran Ben Elisha int err; 156de8650a8SEran Ben Elisha 157dd921fd2SAya Levin err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); 158dd921fd2SAya Levin if (err) 159dd921fd2SAya Levin return err; 160dd921fd2SAya Levin 1612d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); 1622d708887SAya Levin if (err) 1632d708887SAya Levin return err; 1642d708887SAya Levin 1652d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); 1662d708887SAya Levin if (err) 1672d708887SAya Levin return err; 1682d708887SAya Levin 169dd921fd2SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); 170de8650a8SEran Ben Elisha if (err) 171de8650a8SEran Ben Elisha return err; 172de8650a8SEran Ben Elisha 173de8650a8SEran Ben Elisha err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); 174de8650a8SEran Ben Elisha if (err) 175de8650a8SEran Ben Elisha return err; 176de8650a8SEran Ben Elisha 177de8650a8SEran Ben Elisha err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); 178de8650a8SEran Ben Elisha if (err) 179de8650a8SEran Ben Elisha return err; 180de8650a8SEran Ben Elisha 1812d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); 1822d708887SAya Levin if (err) 1832d708887SAya Levin return err; 1842d708887SAya Levin 1852d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); 1862d708887SAya Levin if (err) 1872d708887SAya Levin return err; 1882d708887SAya Levin 189d5cbedd7SAya Levin err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); 1902bf09e60SAya Levin if (err) 1912bf09e60SAya Levin return err; 1922bf09e60SAya Levin 193145e5637SEran Ben Elisha return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); 194145e5637SEran Ben Elisha } 195145e5637SEran Ben Elisha 196145e5637SEran Ben Elisha static int 197145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, 198145e5637SEran Ben Elisha struct mlx5e_txqsq *sq, int tc) 199145e5637SEran Ben Elisha { 200145e5637SEran Ben Elisha int err; 201145e5637SEran Ben Elisha 202145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 203145e5637SEran Ben Elisha if (err) 204145e5637SEran Ben Elisha return err; 205145e5637SEran Ben Elisha 206145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); 207145e5637SEran Ben Elisha if (err) 208145e5637SEran Ben Elisha return err; 209145e5637SEran Ben Elisha 210145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); 21156837c2aSAya Levin if (err) 21256837c2aSAya Levin return err; 21356837c2aSAya Levin 214de8650a8SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 215de8650a8SEran Ben Elisha if (err) 216de8650a8SEran Ben Elisha return err; 217de8650a8SEran Ben Elisha 218de8650a8SEran Ben Elisha return 0; 219de8650a8SEran Ben Elisha } 220de8650a8SEran Ben Elisha 221145e5637SEran Ben Elisha static int 222145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, 223145e5637SEran Ben Elisha struct mlx5e_ptpsq *ptpsq, int tc) 224145e5637SEran Ben Elisha { 225145e5637SEran Ben Elisha int err; 226145e5637SEran Ben Elisha 227145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 228145e5637SEran Ben Elisha if (err) 229145e5637SEran Ben Elisha return err; 230145e5637SEran Ben Elisha 231145e5637SEran Ben Elisha err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 232145e5637SEran Ben Elisha if (err) 233145e5637SEran Ben Elisha return err; 234145e5637SEran Ben Elisha 2351880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); 2361880bc4eSEran Ben Elisha if (err) 2371880bc4eSEran Ben Elisha return err; 2381880bc4eSEran Ben Elisha 2391880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 2401880bc4eSEran Ben Elisha if (err) 2411880bc4eSEran Ben Elisha return err; 2421880bc4eSEran Ben Elisha 2431880bc4eSEran Ben Elisha err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); 2441880bc4eSEran Ben Elisha if (err) 2451880bc4eSEran Ben Elisha return err; 2461880bc4eSEran Ben Elisha 2471880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 248145e5637SEran Ben Elisha if (err) 249145e5637SEran Ben Elisha return err; 250145e5637SEran Ben Elisha 251145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 252145e5637SEran Ben Elisha if (err) 253145e5637SEran Ben Elisha return err; 254145e5637SEran Ben Elisha 255145e5637SEran Ben Elisha return 0; 256145e5637SEran Ben Elisha } 257145e5637SEran Ben Elisha 258145e5637SEran Ben Elisha static int 259145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, 260145e5637SEran Ben Elisha struct mlx5e_txqsq *txqsq) 261145e5637SEran Ben Elisha { 262145e5637SEran Ben Elisha u32 sq_stride, sq_sz; 26395742c1cSAya Levin bool real_time; 264145e5637SEran Ben Elisha int err; 265145e5637SEran Ben Elisha 266145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 267145e5637SEran Ben Elisha if (err) 268145e5637SEran Ben Elisha return err; 269145e5637SEran Ben Elisha 27095742c1cSAya Levin real_time = mlx5_is_real_time_sq(txqsq->mdev); 271145e5637SEran Ben Elisha sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); 272145e5637SEran Ben Elisha sq_stride = MLX5_SEND_WQE_BB; 273145e5637SEran Ben Elisha 274145e5637SEran Ben Elisha err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); 275145e5637SEran Ben Elisha if (err) 276145e5637SEran Ben Elisha return err; 277145e5637SEran Ben Elisha 278145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); 279145e5637SEran Ben Elisha if (err) 280145e5637SEran Ben Elisha return err; 281145e5637SEran Ben Elisha 28295742c1cSAya Levin err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); 28395742c1cSAya Levin if (err) 28495742c1cSAya Levin return err; 28595742c1cSAya Levin 286145e5637SEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); 287145e5637SEran Ben Elisha if (err) 288145e5637SEran Ben Elisha return err; 289145e5637SEran Ben Elisha 290145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 291145e5637SEran Ben Elisha } 292145e5637SEran Ben Elisha 293145e5637SEran Ben Elisha static int 2941880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, 2951880bc4eSEran Ben Elisha struct mlx5e_ptpsq *ptpsq) 2961880bc4eSEran Ben Elisha { 2971880bc4eSEran Ben Elisha int err; 2981880bc4eSEran Ben Elisha 2991880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 3001880bc4eSEran Ben Elisha if (err) 3011880bc4eSEran Ben Elisha return err; 3021880bc4eSEran Ben Elisha 3031880bc4eSEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); 3041880bc4eSEran Ben Elisha if (err) 3051880bc4eSEran Ben Elisha return err; 3061880bc4eSEran Ben Elisha 3071880bc4eSEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 3081880bc4eSEran Ben Elisha } 3091880bc4eSEran Ben Elisha 3101880bc4eSEran Ben Elisha static int 311145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 312145e5637SEran Ben Elisha struct devlink_fmsg *fmsg) 313145e5637SEran Ben Elisha { 314145e5637SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 315145e5637SEran Ben Elisha struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; 31624c22dd0SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 317145e5637SEran Ben Elisha struct mlx5e_ptpsq *generic_ptpsq; 318145e5637SEran Ben Elisha int err; 319145e5637SEran Ben Elisha 320145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); 321145e5637SEran Ben Elisha if (err) 322145e5637SEran Ben Elisha return err; 323145e5637SEran Ben Elisha 324145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); 325145e5637SEran Ben Elisha if (err) 326145e5637SEran Ben Elisha return err; 327145e5637SEran Ben Elisha 32824c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 329145e5637SEran Ben Elisha goto out; 330145e5637SEran Ben Elisha 33124c22dd0SAya Levin generic_ptpsq = &ptp_ch->ptpsq[0]; 33224c22dd0SAya Levin 333145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 334145e5637SEran Ben Elisha if (err) 335145e5637SEran Ben Elisha return err; 336145e5637SEran Ben Elisha 337145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); 338145e5637SEran Ben Elisha if (err) 339145e5637SEran Ben Elisha return err; 340145e5637SEran Ben Elisha 3411880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); 3421880bc4eSEran Ben Elisha if (err) 3431880bc4eSEran Ben Elisha return err; 3441880bc4eSEran Ben Elisha 345145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 346145e5637SEran Ben Elisha if (err) 347145e5637SEran Ben Elisha return err; 348145e5637SEran Ben Elisha 349145e5637SEran Ben Elisha out: 350145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 351145e5637SEran Ben Elisha } 352145e5637SEran Ben Elisha 353de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, 354e7a98105SJiri Pirko struct devlink_fmsg *fmsg, 355e7a98105SJiri Pirko struct netlink_ext_ack *extack) 356de8650a8SEran Ben Elisha { 357de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 358b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 3592d708887SAya Levin 3602d708887SAya Levin int i, tc, err = 0; 361de8650a8SEran Ben Elisha 362de8650a8SEran Ben Elisha mutex_lock(&priv->state_lock); 363de8650a8SEran Ben Elisha 364de8650a8SEran Ben Elisha if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 365de8650a8SEran Ben Elisha goto unlock; 366de8650a8SEran Ben Elisha 367145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); 3682d708887SAya Levin if (err) 3692d708887SAya Levin goto unlock; 3702d708887SAya Levin 371de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 372de8650a8SEran Ben Elisha if (err) 373de8650a8SEran Ben Elisha goto unlock; 374de8650a8SEran Ben Elisha 3752d708887SAya Levin for (i = 0; i < priv->channels.num; i++) { 3762d708887SAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 377de8650a8SEran Ben Elisha 37886d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 3792d708887SAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 3802d708887SAya Levin 3812d708887SAya Levin err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); 382de8650a8SEran Ben Elisha if (err) 38399d31cbdSAya Levin goto unlock; 384de8650a8SEran Ben Elisha } 3852d708887SAya Levin } 386145e5637SEran Ben Elisha 38724c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 388145e5637SEran Ben Elisha goto close_sqs_nest; 389145e5637SEran Ben Elisha 39086d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 391145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, 392145e5637SEran Ben Elisha &ptp_ch->ptpsq[tc], 393145e5637SEran Ben Elisha tc); 394145e5637SEran Ben Elisha if (err) 395145e5637SEran Ben Elisha goto unlock; 396145e5637SEran Ben Elisha } 397145e5637SEran Ben Elisha 398145e5637SEran Ben Elisha close_sqs_nest: 399de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_end(fmsg); 400de8650a8SEran Ben Elisha if (err) 401de8650a8SEran Ben Elisha goto unlock; 402de8650a8SEran Ben Elisha 403de8650a8SEran Ben Elisha unlock: 404de8650a8SEran Ben Elisha mutex_unlock(&priv->state_lock); 405de8650a8SEran Ben Elisha return err; 406de8650a8SEran Ben Elisha } 407de8650a8SEran Ben Elisha 4085f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 4095f29458bSAya Levin void *ctx) 4105f29458bSAya Levin { 4115f29458bSAya Levin struct mlx5_rsc_key key = {}; 4125f29458bSAya Levin struct mlx5e_txqsq *sq = ctx; 4135f29458bSAya Levin int err; 4145f29458bSAya Levin 4155f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4165f29458bSAya Levin return 0; 4175f29458bSAya Levin 418d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4195f29458bSAya Levin if (err) 4205f29458bSAya Levin return err; 4215f29458bSAya Levin 4225f29458bSAya Levin key.size = PAGE_SIZE; 4235f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4245f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4255f29458bSAya Levin if (err) 4265f29458bSAya Levin return err; 4275f29458bSAya Levin 428d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4295f29458bSAya Levin if (err) 4305f29458bSAya Levin return err; 4315f29458bSAya Levin 432d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 4335f29458bSAya Levin if (err) 4345f29458bSAya Levin return err; 4355f29458bSAya Levin 436d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 4375f29458bSAya Levin if (err) 4385f29458bSAya Levin return err; 4395f29458bSAya Levin 4405f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 4415f29458bSAya Levin key.index1 = sq->sqn; 4425f29458bSAya Levin key.num_of_obj1 = 1; 4435f29458bSAya Levin 4445f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4455f29458bSAya Levin if (err) 4465f29458bSAya Levin return err; 4475f29458bSAya Levin 448d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4495f29458bSAya Levin if (err) 4505f29458bSAya Levin return err; 4515f29458bSAya Levin 452d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 4535f29458bSAya Levin if (err) 4545f29458bSAya Levin return err; 4555f29458bSAya Levin 4565f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 4575f29458bSAya Levin key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 4585f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4595f29458bSAya Levin if (err) 4605f29458bSAya Levin return err; 4615f29458bSAya Levin 462d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4635f29458bSAya Levin if (err) 4645f29458bSAya Levin return err; 4655f29458bSAya Levin 466d5cbedd7SAya Levin return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4675f29458bSAya Levin } 4685f29458bSAya Levin 469*918fc385SAmir Tzin static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 470*918fc385SAmir Tzin void *ctx) 471*918fc385SAmir Tzin { 472*918fc385SAmir Tzin struct mlx5e_tx_timeout_ctx *to_ctx = ctx; 473*918fc385SAmir Tzin 474*918fc385SAmir Tzin return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); 475*918fc385SAmir Tzin } 476*918fc385SAmir Tzin 4775f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, 4785f29458bSAya Levin struct devlink_fmsg *fmsg) 4795f29458bSAya Levin { 480b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 4815f29458bSAya Levin struct mlx5_rsc_key key = {}; 4825f29458bSAya Levin int i, tc, err; 4835f29458bSAya Levin 4845f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4855f29458bSAya Levin return 0; 4865f29458bSAya Levin 487d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4885f29458bSAya Levin if (err) 4895f29458bSAya Levin return err; 4905f29458bSAya Levin 4915f29458bSAya Levin key.size = PAGE_SIZE; 4925f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4935f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4945f29458bSAya Levin if (err) 4955f29458bSAya Levin return err; 4965f29458bSAya Levin 497d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4985f29458bSAya Levin if (err) 4995f29458bSAya Levin return err; 5005f29458bSAya Levin 5015f29458bSAya Levin err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 5025f29458bSAya Levin if (err) 5035f29458bSAya Levin return err; 5045f29458bSAya Levin 5055f29458bSAya Levin for (i = 0; i < priv->channels.num; i++) { 5065f29458bSAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 5075f29458bSAya Levin 50886d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 5095f29458bSAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 5105f29458bSAya Levin 5115f29458bSAya Levin err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); 5125f29458bSAya Levin if (err) 5135f29458bSAya Levin return err; 5145f29458bSAya Levin } 5155f29458bSAya Levin } 516145e5637SEran Ben Elisha 51724c22dd0SAya Levin if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { 51886d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 519145e5637SEran Ben Elisha struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; 520145e5637SEran Ben Elisha 521145e5637SEran Ben Elisha err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); 522145e5637SEran Ben Elisha if (err) 523145e5637SEran Ben Elisha return err; 524145e5637SEran Ben Elisha } 525145e5637SEran Ben Elisha } 526145e5637SEran Ben Elisha 5275f29458bSAya Levin return devlink_fmsg_arr_pair_nest_end(fmsg); 5285f29458bSAya Levin } 5295f29458bSAya Levin 5305f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 5315f29458bSAya Levin struct mlx5e_err_ctx *err_ctx, 5325f29458bSAya Levin struct devlink_fmsg *fmsg) 5335f29458bSAya Levin { 5345f29458bSAya Levin return err_ctx->dump(priv, fmsg, err_ctx->ctx); 5355f29458bSAya Levin } 5365f29458bSAya Levin 5375f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, 5385f29458bSAya Levin struct devlink_fmsg *fmsg, void *context, 5395f29458bSAya Levin struct netlink_ext_ack *extack) 5405f29458bSAya Levin { 5415f29458bSAya Levin struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 5425f29458bSAya Levin struct mlx5e_err_ctx *err_ctx = context; 5435f29458bSAya Levin 5445f29458bSAya Levin return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 5455f29458bSAya Levin mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); 5465f29458bSAya Levin } 5475f29458bSAya Levin 5480a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) 5490a56be3cSAya Levin { 5500a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 5514ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5520a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5530a56be3cSAya Levin 5540a56be3cSAya Levin err_ctx.ctx = sq; 5550a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; 5565f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 557b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); 5580a56be3cSAya Levin 5590a56be3cSAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 5600a56be3cSAya Levin } 5610a56be3cSAya Levin 5620a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) 5630a56be3cSAya Levin { 5640a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 565e6205564SAya Levin struct mlx5e_tx_timeout_ctx to_ctx = {}; 5664ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5670a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5680a56be3cSAya Levin 569e6205564SAya Levin to_ctx.sq = sq; 570e6205564SAya Levin err_ctx.ctx = &to_ctx; 5710a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 572*918fc385SAmir Tzin err_ctx.dump = mlx5e_tx_reporter_timeout_dump; 573b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), 574b21aef7eSJoe Perches "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", 5754ad40d8eSEran Ben Elisha sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 5760a56be3cSAya Levin jiffies_to_usecs(jiffies - sq->txq->trans_start)); 5770a56be3cSAya Levin 578e6205564SAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 579e6205564SAya Levin return to_ctx.status; 5800a56be3cSAya Levin } 5810a56be3cSAya Levin 582de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { 583de8650a8SEran Ben Elisha .name = "tx", 584de8650a8SEran Ben Elisha .recover = mlx5e_tx_reporter_recover, 585de8650a8SEran Ben Elisha .diagnose = mlx5e_tx_reporter_diagnose, 5865f29458bSAya Levin .dump = mlx5e_tx_reporter_dump, 587de8650a8SEran Ben Elisha }; 588de8650a8SEran Ben Elisha 589de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 590de8650a8SEran Ben Elisha 591b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) 592de8650a8SEran Ben Elisha { 593c27971d0SRoi Dayan struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); 594baf6dfdbSAya Levin struct devlink_health_reporter *reporter; 595de8650a8SEran Ben Elisha 596c27971d0SRoi Dayan reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops, 597b7e93bb6SVladyslav Tarasiuk MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); 598baf6dfdbSAya Levin if (IS_ERR(reporter)) { 599de8650a8SEran Ben Elisha netdev_warn(priv->netdev, 600de8650a8SEran Ben Elisha "Failed to create tx reporter, err = %ld\n", 601baf6dfdbSAya Levin PTR_ERR(reporter)); 602b3ea4c4fSEran Ben Elisha return; 6037f7cc235SAya Levin } 604baf6dfdbSAya Levin priv->tx_reporter = reporter; 605de8650a8SEran Ben Elisha } 606de8650a8SEran Ben Elisha 60706293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) 608de8650a8SEran Ben Elisha { 609baf6dfdbSAya Levin if (!priv->tx_reporter) 610de8650a8SEran Ben Elisha return; 611de8650a8SEran Ben Elisha 612b7e93bb6SVladyslav Tarasiuk devlink_port_health_reporter_destroy(priv->tx_reporter); 6137a9fb35eSRoi Dayan priv->tx_reporter = NULL; 614de8650a8SEran Ben Elisha } 615