1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */ 2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */ 3de8650a8SEran Ben Elisha 44edc17fdSAya Levin #include "health.h" 5145e5637SEran Ben Elisha #include "en/ptp.h" 6c27971d0SRoi Dayan #include "en/devlink.h" 732def412SAmir Tzin #include "lib/tout.h" 8de8650a8SEran Ben Elisha 9*fc9d982aSAdham Faris /* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */ 10*fc9d982aSAdham Faris static const char * const sq_sw_state_type_name[] = { 11*fc9d982aSAdham Faris [MLX5E_SQ_STATE_ENABLED] = "enabled", 12*fc9d982aSAdham Faris [MLX5E_SQ_STATE_MPWQE] = "mpwqe", 13*fc9d982aSAdham Faris [MLX5E_SQ_STATE_RECOVERING] = "recovering", 14*fc9d982aSAdham Faris [MLX5E_SQ_STATE_IPSEC] = "ipsec", 15*fc9d982aSAdham Faris [MLX5E_SQ_STATE_DIM] = "dim", 16*fc9d982aSAdham Faris [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", 17*fc9d982aSAdham Faris [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", 18*fc9d982aSAdham Faris [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", 19*fc9d982aSAdham Faris [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", 20*fc9d982aSAdham Faris }; 21*fc9d982aSAdham Faris 22de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 23de8650a8SEran Ben Elisha { 2432def412SAmir Tzin struct mlx5_core_dev *dev = sq->mdev; 2532def412SAmir Tzin unsigned long exp_time; 2632def412SAmir Tzin 2732def412SAmir Tzin exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); 28de8650a8SEran Ben Elisha 29de8650a8SEran Ben Elisha while (time_before(jiffies, exp_time)) { 30de8650a8SEran Ben Elisha if (sq->cc == sq->pc) 31de8650a8SEran Ben Elisha return 0; 32de8650a8SEran Ben Elisha 33de8650a8SEran Ben Elisha msleep(20); 34de8650a8SEran Ben Elisha } 35de8650a8SEran Ben Elisha 364ad40d8eSEran Ben Elisha netdev_err(sq->netdev, 37de8650a8SEran Ben Elisha "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", 38de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 39de8650a8SEran Ben Elisha 40de8650a8SEran Ben Elisha return -ETIMEDOUT; 41de8650a8SEran Ben Elisha } 42de8650a8SEran Ben Elisha 43de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 44de8650a8SEran Ben Elisha { 45de8650a8SEran Ben Elisha WARN_ONCE(sq->cc != sq->pc, 46de8650a8SEran Ben Elisha "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", 47de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 48de8650a8SEran Ben Elisha sq->cc = 0; 49de8650a8SEran Ben Elisha sq->dma_fifo_cc = 0; 50de8650a8SEran Ben Elisha sq->pc = 0; 51de8650a8SEran Ben Elisha } 52de8650a8SEran Ben Elisha 53*fc9d982aSAdham Faris static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq) 54*fc9d982aSAdham Faris { 55*fc9d982aSAdham Faris int err; 56*fc9d982aSAdham Faris int i; 57*fc9d982aSAdham Faris 58*fc9d982aSAdham Faris BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES, 59*fc9d982aSAdham Faris "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h"); 60*fc9d982aSAdham Faris err = devlink_fmsg_obj_nest_start(fmsg); 61*fc9d982aSAdham Faris if (err) 62*fc9d982aSAdham Faris return err; 63*fc9d982aSAdham Faris 64*fc9d982aSAdham Faris err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); 65*fc9d982aSAdham Faris if (err) 66*fc9d982aSAdham Faris return err; 67*fc9d982aSAdham Faris 68*fc9d982aSAdham Faris for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) { 69*fc9d982aSAdham Faris err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i], 70*fc9d982aSAdham Faris test_bit(i, &sq->state)); 71*fc9d982aSAdham Faris if (err) 72*fc9d982aSAdham Faris return err; 73*fc9d982aSAdham Faris } 74*fc9d982aSAdham Faris 75*fc9d982aSAdham Faris err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 76*fc9d982aSAdham Faris if (err) 77*fc9d982aSAdham Faris return err; 78*fc9d982aSAdham Faris 79*fc9d982aSAdham Faris return devlink_fmsg_obj_nest_end(fmsg); 80*fc9d982aSAdham Faris } 81*fc9d982aSAdham Faris 82c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) 83de8650a8SEran Ben Elisha { 84c50de4afSAya Levin struct mlx5_core_dev *mdev; 85c50de4afSAya Levin struct net_device *dev; 86c50de4afSAya Levin struct mlx5e_txqsq *sq; 87de8650a8SEran Ben Elisha u8 state; 88de8650a8SEran Ben Elisha int err; 89de8650a8SEran Ben Elisha 90c50de4afSAya Levin sq = ctx; 914ad40d8eSEran Ben Elisha mdev = sq->mdev; 924ad40d8eSEran Ben Elisha dev = sq->netdev; 93c50de4afSAya Levin 94c50de4afSAya Levin if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 95c50de4afSAya Levin return 0; 96c50de4afSAya Levin 97de8650a8SEran Ben Elisha err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 98de8650a8SEran Ben Elisha if (err) { 99de8650a8SEran Ben Elisha netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", 100de8650a8SEran Ben Elisha sq->sqn, err); 101276d197eSAya Levin goto out; 102de8650a8SEran Ben Elisha } 103de8650a8SEran Ben Elisha 104d9a2fcf5SAya Levin if (state != MLX5_SQC_STATE_ERR) 105276d197eSAya Levin goto out; 106de8650a8SEran Ben Elisha 107de8650a8SEran Ben Elisha mlx5e_tx_disable_queue(sq->txq); 108de8650a8SEran Ben Elisha 109de8650a8SEran Ben Elisha err = mlx5e_wait_for_sq_flush(sq); 110de8650a8SEran Ben Elisha if (err) 111276d197eSAya Levin goto out; 112de8650a8SEran Ben Elisha 113de8650a8SEran Ben Elisha /* At this point, no new packets will arrive from the stack as TXQ is 114de8650a8SEran Ben Elisha * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all 115de8650a8SEran Ben Elisha * pending WQEs. SQ can safely reset the SQ. 116de8650a8SEran Ben Elisha */ 117de8650a8SEran Ben Elisha 1184ad40d8eSEran Ben Elisha err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); 119de8650a8SEran Ben Elisha if (err) 120276d197eSAya Levin goto out; 121de8650a8SEran Ben Elisha 122de8650a8SEran Ben Elisha mlx5e_reset_txqsq_cc_pc(sq); 123de8650a8SEran Ben Elisha sq->stats->recover++; 124276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 125de8650a8SEran Ben Elisha mlx5e_activate_txqsq(sq); 12679efecb4SMaxim Mikityanskiy if (sq->channel) 12779efecb4SMaxim Mikityanskiy mlx5e_trigger_napi_icosq(sq->channel); 12879efecb4SMaxim Mikityanskiy else 12979efecb4SMaxim Mikityanskiy mlx5e_trigger_napi_sched(sq->cq.napi); 130de8650a8SEran Ben Elisha 131de8650a8SEran Ben Elisha return 0; 132276d197eSAya Levin out: 133276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 134276d197eSAya Levin return err; 135de8650a8SEran Ben Elisha } 136de8650a8SEran Ben Elisha 137e6205564SAya Levin struct mlx5e_tx_timeout_ctx { 138e6205564SAya Levin struct mlx5e_txqsq *sq; 139e6205564SAya Levin signed int status; 140e6205564SAya Levin }; 141e6205564SAya Levin 142c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx) 1437d91126bSEran Ben Elisha { 144e6205564SAya Levin struct mlx5e_tx_timeout_ctx *to_ctx; 145e6205564SAya Levin struct mlx5e_priv *priv; 146c50de4afSAya Levin struct mlx5_eq_comp *eq; 147c50de4afSAya Levin struct mlx5e_txqsq *sq; 148c50de4afSAya Levin int err; 1497d91126bSEran Ben Elisha 150e6205564SAya Levin to_ctx = ctx; 151e6205564SAya Levin sq = to_ctx->sq; 152c50de4afSAya Levin eq = sq->cq.mcq.eq; 1534ad40d8eSEran Ben Elisha priv = sq->priv; 1544ad40d8eSEran Ben Elisha err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 155e6205564SAya Levin if (!err) { 156e6205564SAya Levin to_ctx->status = 0; /* this sq recovered */ 157e6205564SAya Levin return err; 158e6205564SAya Levin } 159e6205564SAya Levin 160e6205564SAya Levin err = mlx5e_safe_reopen_channels(priv); 161e6205564SAya Levin if (!err) { 162e6205564SAya Levin to_ctx->status = 1; /* all channels recovered */ 163e6205564SAya Levin return err; 164e6205564SAya Levin } 165e6205564SAya Levin 166e6205564SAya Levin to_ctx->status = err; 1677d91126bSEran Ben Elisha clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 168e6205564SAya Levin netdev_err(priv->netdev, 169e6205564SAya Levin "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 170e6205564SAya Levin err); 1717d91126bSEran Ben Elisha 172c50de4afSAya Levin return err; 1737d91126bSEran Ben Elisha } 1747d91126bSEran Ben Elisha 175de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function. 176de8650a8SEran Ben Elisha * It can cause a dead lock or a read-after-free. 177de8650a8SEran Ben Elisha */ 178c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 179de8650a8SEran Ben Elisha { 180c50de4afSAya Levin return err_ctx->recover(err_ctx->ctx); 181de8650a8SEran Ben Elisha } 182de8650a8SEran Ben Elisha 183de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, 184e7a98105SJiri Pirko void *context, 185e7a98105SJiri Pirko struct netlink_ext_ack *extack) 186de8650a8SEran Ben Elisha { 187de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 188c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx = context; 189de8650a8SEran Ben Elisha 190de8650a8SEran Ben Elisha return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : 191c50de4afSAya Levin mlx5e_health_recover_channels(priv); 192de8650a8SEran Ben Elisha } 193de8650a8SEran Ben Elisha 194de8650a8SEran Ben Elisha static int 195145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, 1962d708887SAya Levin struct mlx5e_txqsq *sq, int tc) 197de8650a8SEran Ben Elisha { 198dd921fd2SAya Levin bool stopped = netif_xmit_stopped(sq->txq); 1994ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 200dd921fd2SAya Levin u8 state; 201de8650a8SEran Ben Elisha int err; 202de8650a8SEran Ben Elisha 203dd921fd2SAya Levin err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); 204dd921fd2SAya Levin if (err) 205dd921fd2SAya Levin return err; 206dd921fd2SAya Levin 2072d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); 2082d708887SAya Levin if (err) 2092d708887SAya Levin return err; 2102d708887SAya Levin 2112d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); 2122d708887SAya Levin if (err) 2132d708887SAya Levin return err; 2142d708887SAya Levin 215dd921fd2SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); 216de8650a8SEran Ben Elisha if (err) 217de8650a8SEran Ben Elisha return err; 218de8650a8SEran Ben Elisha 219de8650a8SEran Ben Elisha err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); 220de8650a8SEran Ben Elisha if (err) 221de8650a8SEran Ben Elisha return err; 222de8650a8SEran Ben Elisha 223de8650a8SEran Ben Elisha err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); 224de8650a8SEran Ben Elisha if (err) 225de8650a8SEran Ben Elisha return err; 226de8650a8SEran Ben Elisha 2272d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); 2282d708887SAya Levin if (err) 2292d708887SAya Levin return err; 2302d708887SAya Levin 2312d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); 2322d708887SAya Levin if (err) 2332d708887SAya Levin return err; 2342d708887SAya Levin 235*fc9d982aSAdham Faris err = mlx5e_health_sq_put_sw_state(fmsg, sq); 236*fc9d982aSAdham Faris if (err) 237*fc9d982aSAdham Faris return err; 238*fc9d982aSAdham Faris 239d5cbedd7SAya Levin err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); 2402bf09e60SAya Levin if (err) 2412bf09e60SAya Levin return err; 2422bf09e60SAya Levin 243145e5637SEran Ben Elisha return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); 244145e5637SEran Ben Elisha } 245145e5637SEran Ben Elisha 246145e5637SEran Ben Elisha static int 247145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, 248145e5637SEran Ben Elisha struct mlx5e_txqsq *sq, int tc) 249145e5637SEran Ben Elisha { 250145e5637SEran Ben Elisha int err; 251145e5637SEran Ben Elisha 252145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 253145e5637SEran Ben Elisha if (err) 254145e5637SEran Ben Elisha return err; 255145e5637SEran Ben Elisha 256145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); 257145e5637SEran Ben Elisha if (err) 258145e5637SEran Ben Elisha return err; 259145e5637SEran Ben Elisha 260145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); 26156837c2aSAya Levin if (err) 26256837c2aSAya Levin return err; 26356837c2aSAya Levin 264de8650a8SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 265de8650a8SEran Ben Elisha if (err) 266de8650a8SEran Ben Elisha return err; 267de8650a8SEran Ben Elisha 268de8650a8SEran Ben Elisha return 0; 269de8650a8SEran Ben Elisha } 270de8650a8SEran Ben Elisha 271145e5637SEran Ben Elisha static int 272145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, 273145e5637SEran Ben Elisha struct mlx5e_ptpsq *ptpsq, int tc) 274145e5637SEran Ben Elisha { 275145e5637SEran Ben Elisha int err; 276145e5637SEran Ben Elisha 277145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 278145e5637SEran Ben Elisha if (err) 279145e5637SEran Ben Elisha return err; 280145e5637SEran Ben Elisha 281145e5637SEran Ben Elisha err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 282145e5637SEran Ben Elisha if (err) 283145e5637SEran Ben Elisha return err; 284145e5637SEran Ben Elisha 2851880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); 2861880bc4eSEran Ben Elisha if (err) 2871880bc4eSEran Ben Elisha return err; 2881880bc4eSEran Ben Elisha 2891880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 2901880bc4eSEran Ben Elisha if (err) 2911880bc4eSEran Ben Elisha return err; 2921880bc4eSEran Ben Elisha 2931880bc4eSEran Ben Elisha err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); 2941880bc4eSEran Ben Elisha if (err) 2951880bc4eSEran Ben Elisha return err; 2961880bc4eSEran Ben Elisha 2971880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 298145e5637SEran Ben Elisha if (err) 299145e5637SEran Ben Elisha return err; 300145e5637SEran Ben Elisha 301145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 302145e5637SEran Ben Elisha if (err) 303145e5637SEran Ben Elisha return err; 304145e5637SEran Ben Elisha 305145e5637SEran Ben Elisha return 0; 306145e5637SEran Ben Elisha } 307145e5637SEran Ben Elisha 308145e5637SEran Ben Elisha static int 309145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, 310145e5637SEran Ben Elisha struct mlx5e_txqsq *txqsq) 311145e5637SEran Ben Elisha { 312145e5637SEran Ben Elisha u32 sq_stride, sq_sz; 31395742c1cSAya Levin bool real_time; 314145e5637SEran Ben Elisha int err; 315145e5637SEran Ben Elisha 316145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 317145e5637SEran Ben Elisha if (err) 318145e5637SEran Ben Elisha return err; 319145e5637SEran Ben Elisha 32095742c1cSAya Levin real_time = mlx5_is_real_time_sq(txqsq->mdev); 321145e5637SEran Ben Elisha sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); 322145e5637SEran Ben Elisha sq_stride = MLX5_SEND_WQE_BB; 323145e5637SEran Ben Elisha 324145e5637SEran Ben Elisha err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); 325145e5637SEran Ben Elisha if (err) 326145e5637SEran Ben Elisha return err; 327145e5637SEran Ben Elisha 328145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); 329145e5637SEran Ben Elisha if (err) 330145e5637SEran Ben Elisha return err; 331145e5637SEran Ben Elisha 33295742c1cSAya Levin err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); 33395742c1cSAya Levin if (err) 33495742c1cSAya Levin return err; 33595742c1cSAya Levin 336145e5637SEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); 337145e5637SEran Ben Elisha if (err) 338145e5637SEran Ben Elisha return err; 339145e5637SEran Ben Elisha 340145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 341145e5637SEran Ben Elisha } 342145e5637SEran Ben Elisha 343145e5637SEran Ben Elisha static int 3441880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, 3451880bc4eSEran Ben Elisha struct mlx5e_ptpsq *ptpsq) 3461880bc4eSEran Ben Elisha { 3471880bc4eSEran Ben Elisha int err; 3481880bc4eSEran Ben Elisha 3491880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 3501880bc4eSEran Ben Elisha if (err) 3511880bc4eSEran Ben Elisha return err; 3521880bc4eSEran Ben Elisha 3531880bc4eSEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); 3541880bc4eSEran Ben Elisha if (err) 3551880bc4eSEran Ben Elisha return err; 3561880bc4eSEran Ben Elisha 3571880bc4eSEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 3581880bc4eSEran Ben Elisha } 3591880bc4eSEran Ben Elisha 3601880bc4eSEran Ben Elisha static int 361145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 362145e5637SEran Ben Elisha struct devlink_fmsg *fmsg) 363145e5637SEran Ben Elisha { 364145e5637SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 365145e5637SEran Ben Elisha struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; 36624c22dd0SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 367145e5637SEran Ben Elisha struct mlx5e_ptpsq *generic_ptpsq; 368145e5637SEran Ben Elisha int err; 369145e5637SEran Ben Elisha 370145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); 371145e5637SEran Ben Elisha if (err) 372145e5637SEran Ben Elisha return err; 373145e5637SEran Ben Elisha 374145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); 375145e5637SEran Ben Elisha if (err) 376145e5637SEran Ben Elisha return err; 377145e5637SEran Ben Elisha 37824c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 379145e5637SEran Ben Elisha goto out; 380145e5637SEran Ben Elisha 38124c22dd0SAya Levin generic_ptpsq = &ptp_ch->ptpsq[0]; 38224c22dd0SAya Levin 383145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 384145e5637SEran Ben Elisha if (err) 385145e5637SEran Ben Elisha return err; 386145e5637SEran Ben Elisha 387145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); 388145e5637SEran Ben Elisha if (err) 389145e5637SEran Ben Elisha return err; 390145e5637SEran Ben Elisha 3911880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); 3921880bc4eSEran Ben Elisha if (err) 3931880bc4eSEran Ben Elisha return err; 3941880bc4eSEran Ben Elisha 395145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 396145e5637SEran Ben Elisha if (err) 397145e5637SEran Ben Elisha return err; 398145e5637SEran Ben Elisha 399145e5637SEran Ben Elisha out: 400145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 401145e5637SEran Ben Elisha } 402145e5637SEran Ben Elisha 403de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, 404e7a98105SJiri Pirko struct devlink_fmsg *fmsg, 405e7a98105SJiri Pirko struct netlink_ext_ack *extack) 406de8650a8SEran Ben Elisha { 407de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 408b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 4092d708887SAya Levin 4102d708887SAya Levin int i, tc, err = 0; 411de8650a8SEran Ben Elisha 412de8650a8SEran Ben Elisha mutex_lock(&priv->state_lock); 413de8650a8SEran Ben Elisha 414de8650a8SEran Ben Elisha if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 415de8650a8SEran Ben Elisha goto unlock; 416de8650a8SEran Ben Elisha 417145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); 4182d708887SAya Levin if (err) 4192d708887SAya Levin goto unlock; 4202d708887SAya Levin 421de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 422de8650a8SEran Ben Elisha if (err) 423de8650a8SEran Ben Elisha goto unlock; 424de8650a8SEran Ben Elisha 4252d708887SAya Levin for (i = 0; i < priv->channels.num; i++) { 4262d708887SAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 427de8650a8SEran Ben Elisha 42886d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 4292d708887SAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 4302d708887SAya Levin 4312d708887SAya Levin err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); 432de8650a8SEran Ben Elisha if (err) 43399d31cbdSAya Levin goto unlock; 434de8650a8SEran Ben Elisha } 4352d708887SAya Levin } 436145e5637SEran Ben Elisha 43724c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 438145e5637SEran Ben Elisha goto close_sqs_nest; 439145e5637SEran Ben Elisha 44086d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 441145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, 442145e5637SEran Ben Elisha &ptp_ch->ptpsq[tc], 443145e5637SEran Ben Elisha tc); 444145e5637SEran Ben Elisha if (err) 445145e5637SEran Ben Elisha goto unlock; 446145e5637SEran Ben Elisha } 447145e5637SEran Ben Elisha 448145e5637SEran Ben Elisha close_sqs_nest: 449de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_end(fmsg); 450de8650a8SEran Ben Elisha if (err) 451de8650a8SEran Ben Elisha goto unlock; 452de8650a8SEran Ben Elisha 453de8650a8SEran Ben Elisha unlock: 454de8650a8SEran Ben Elisha mutex_unlock(&priv->state_lock); 455de8650a8SEran Ben Elisha return err; 456de8650a8SEran Ben Elisha } 457de8650a8SEran Ben Elisha 4585f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 4595f29458bSAya Levin void *ctx) 4605f29458bSAya Levin { 4615f29458bSAya Levin struct mlx5_rsc_key key = {}; 4625f29458bSAya Levin struct mlx5e_txqsq *sq = ctx; 4635f29458bSAya Levin int err; 4645f29458bSAya Levin 4655f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4665f29458bSAya Levin return 0; 4675f29458bSAya Levin 468d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4695f29458bSAya Levin if (err) 4705f29458bSAya Levin return err; 4715f29458bSAya Levin 4725f29458bSAya Levin key.size = PAGE_SIZE; 4735f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4745f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4755f29458bSAya Levin if (err) 4765f29458bSAya Levin return err; 4775f29458bSAya Levin 478d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4795f29458bSAya Levin if (err) 4805f29458bSAya Levin return err; 4815f29458bSAya Levin 482d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 4835f29458bSAya Levin if (err) 4845f29458bSAya Levin return err; 4855f29458bSAya Levin 486d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 4875f29458bSAya Levin if (err) 4885f29458bSAya Levin return err; 4895f29458bSAya Levin 4905f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 4915f29458bSAya Levin key.index1 = sq->sqn; 4925f29458bSAya Levin key.num_of_obj1 = 1; 4935f29458bSAya Levin 4945f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4955f29458bSAya Levin if (err) 4965f29458bSAya Levin return err; 4975f29458bSAya Levin 498d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4995f29458bSAya Levin if (err) 5005f29458bSAya Levin return err; 5015f29458bSAya Levin 502d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 5035f29458bSAya Levin if (err) 5045f29458bSAya Levin return err; 5055f29458bSAya Levin 5065f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 5075f29458bSAya Levin key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 5085f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 5095f29458bSAya Levin if (err) 5105f29458bSAya Levin return err; 5115f29458bSAya Levin 512d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 5135f29458bSAya Levin if (err) 5145f29458bSAya Levin return err; 5155f29458bSAya Levin 516d5cbedd7SAya Levin return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 5175f29458bSAya Levin } 5185f29458bSAya Levin 519918fc385SAmir Tzin static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 520918fc385SAmir Tzin void *ctx) 521918fc385SAmir Tzin { 522918fc385SAmir Tzin struct mlx5e_tx_timeout_ctx *to_ctx = ctx; 523918fc385SAmir Tzin 524918fc385SAmir Tzin return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); 525918fc385SAmir Tzin } 526918fc385SAmir Tzin 5275f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, 5285f29458bSAya Levin struct devlink_fmsg *fmsg) 5295f29458bSAya Levin { 530b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 5315f29458bSAya Levin struct mlx5_rsc_key key = {}; 5325f29458bSAya Levin int i, tc, err; 5335f29458bSAya Levin 5345f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 5355f29458bSAya Levin return 0; 5365f29458bSAya Levin 537d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 5385f29458bSAya Levin if (err) 5395f29458bSAya Levin return err; 5405f29458bSAya Levin 5415f29458bSAya Levin key.size = PAGE_SIZE; 5425f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 5435f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 5445f29458bSAya Levin if (err) 5455f29458bSAya Levin return err; 5465f29458bSAya Levin 547d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 5485f29458bSAya Levin if (err) 5495f29458bSAya Levin return err; 5505f29458bSAya Levin 5515f29458bSAya Levin err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 5525f29458bSAya Levin if (err) 5535f29458bSAya Levin return err; 5545f29458bSAya Levin 5555f29458bSAya Levin for (i = 0; i < priv->channels.num; i++) { 5565f29458bSAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 5575f29458bSAya Levin 55886d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 5595f29458bSAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 5605f29458bSAya Levin 5615f29458bSAya Levin err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); 5625f29458bSAya Levin if (err) 5635f29458bSAya Levin return err; 5645f29458bSAya Levin } 5655f29458bSAya Levin } 566145e5637SEran Ben Elisha 56724c22dd0SAya Levin if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { 56886d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 569145e5637SEran Ben Elisha struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; 570145e5637SEran Ben Elisha 571145e5637SEran Ben Elisha err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); 572145e5637SEran Ben Elisha if (err) 573145e5637SEran Ben Elisha return err; 574145e5637SEran Ben Elisha } 575145e5637SEran Ben Elisha } 576145e5637SEran Ben Elisha 5775f29458bSAya Levin return devlink_fmsg_arr_pair_nest_end(fmsg); 5785f29458bSAya Levin } 5795f29458bSAya Levin 5805f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 5815f29458bSAya Levin struct mlx5e_err_ctx *err_ctx, 5825f29458bSAya Levin struct devlink_fmsg *fmsg) 5835f29458bSAya Levin { 5845f29458bSAya Levin return err_ctx->dump(priv, fmsg, err_ctx->ctx); 5855f29458bSAya Levin } 5865f29458bSAya Levin 5875f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, 5885f29458bSAya Levin struct devlink_fmsg *fmsg, void *context, 5895f29458bSAya Levin struct netlink_ext_ack *extack) 5905f29458bSAya Levin { 5915f29458bSAya Levin struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 5925f29458bSAya Levin struct mlx5e_err_ctx *err_ctx = context; 5935f29458bSAya Levin 5945f29458bSAya Levin return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 5955f29458bSAya Levin mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); 5965f29458bSAya Levin } 5975f29458bSAya Levin 5980a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) 5990a56be3cSAya Levin { 6000a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 6014ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 6020a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 6030a56be3cSAya Levin 6040a56be3cSAya Levin err_ctx.ctx = sq; 6050a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; 6065f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 607b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); 6080a56be3cSAya Levin 6090a56be3cSAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 6100a56be3cSAya Levin } 6110a56be3cSAya Levin 6120a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) 6130a56be3cSAya Levin { 6140a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 615e6205564SAya Levin struct mlx5e_tx_timeout_ctx to_ctx = {}; 6164ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 6170a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 6180a56be3cSAya Levin 619e6205564SAya Levin to_ctx.sq = sq; 620e6205564SAya Levin err_ctx.ctx = &to_ctx; 6210a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 622918fc385SAmir Tzin err_ctx.dump = mlx5e_tx_reporter_timeout_dump; 623b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), 624b21aef7eSJoe Perches "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", 6254ad40d8eSEran Ben Elisha sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 6265337824fSEric Dumazet jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start))); 6270a56be3cSAya Levin 628e6205564SAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 629e6205564SAya Levin return to_ctx.status; 6300a56be3cSAya Levin } 6310a56be3cSAya Levin 632de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { 633de8650a8SEran Ben Elisha .name = "tx", 634de8650a8SEran Ben Elisha .recover = mlx5e_tx_reporter_recover, 635de8650a8SEran Ben Elisha .diagnose = mlx5e_tx_reporter_diagnose, 6365f29458bSAya Levin .dump = mlx5e_tx_reporter_dump, 637de8650a8SEran Ben Elisha }; 638de8650a8SEran Ben Elisha 639de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 640de8650a8SEran Ben Elisha 641b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) 642de8650a8SEran Ben Elisha { 643baf6dfdbSAya Levin struct devlink_health_reporter *reporter; 644de8650a8SEran Ben Elisha 645bc1536f3SJiri Pirko reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, 646bc1536f3SJiri Pirko &mlx5_tx_reporter_ops, 647b7e93bb6SVladyslav Tarasiuk MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); 648baf6dfdbSAya Levin if (IS_ERR(reporter)) { 649de8650a8SEran Ben Elisha netdev_warn(priv->netdev, 650de8650a8SEran Ben Elisha "Failed to create tx reporter, err = %ld\n", 651baf6dfdbSAya Levin PTR_ERR(reporter)); 652b3ea4c4fSEran Ben Elisha return; 6537f7cc235SAya Levin } 654baf6dfdbSAya Levin priv->tx_reporter = reporter; 655de8650a8SEran Ben Elisha } 656de8650a8SEran Ben Elisha 65706293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) 658de8650a8SEran Ben Elisha { 659baf6dfdbSAya Levin if (!priv->tx_reporter) 660de8650a8SEran Ben Elisha return; 661de8650a8SEran Ben Elisha 6629f167327SJiri Pirko devlink_health_reporter_destroy(priv->tx_reporter); 6637a9fb35eSRoi Dayan priv->tx_reporter = NULL; 664de8650a8SEran Ben Elisha } 665