1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */ 2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */ 3de8650a8SEran Ben Elisha 44edc17fdSAya Levin #include "health.h" 5145e5637SEran Ben Elisha #include "en/ptp.h" 6c27971d0SRoi Dayan #include "en/devlink.h" 732def412SAmir Tzin #include "lib/tout.h" 8de8650a8SEran Ben Elisha 9fc9d982aSAdham Faris /* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */ 10fc9d982aSAdham Faris static const char * const sq_sw_state_type_name[] = { 11fc9d982aSAdham Faris [MLX5E_SQ_STATE_ENABLED] = "enabled", 12fc9d982aSAdham Faris [MLX5E_SQ_STATE_MPWQE] = "mpwqe", 13fc9d982aSAdham Faris [MLX5E_SQ_STATE_RECOVERING] = "recovering", 14fc9d982aSAdham Faris [MLX5E_SQ_STATE_IPSEC] = "ipsec", 15fc9d982aSAdham Faris [MLX5E_SQ_STATE_DIM] = "dim", 16fc9d982aSAdham Faris [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", 17fc9d982aSAdham Faris [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", 18fc9d982aSAdham Faris [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", 19fc9d982aSAdham Faris [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", 20fc9d982aSAdham Faris }; 21fc9d982aSAdham Faris 22de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 23de8650a8SEran Ben Elisha { 2432def412SAmir Tzin struct mlx5_core_dev *dev = sq->mdev; 2532def412SAmir Tzin unsigned long exp_time; 2632def412SAmir Tzin 2732def412SAmir Tzin exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); 28de8650a8SEran Ben Elisha 29de8650a8SEran Ben Elisha while (time_before(jiffies, exp_time)) { 30de8650a8SEran Ben Elisha if (sq->cc == sq->pc) 31de8650a8SEran Ben Elisha return 0; 32de8650a8SEran Ben Elisha 33de8650a8SEran Ben Elisha msleep(20); 34de8650a8SEran Ben Elisha } 35de8650a8SEran Ben Elisha 364ad40d8eSEran Ben Elisha netdev_err(sq->netdev, 37de8650a8SEran Ben Elisha "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", 38de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 39de8650a8SEran Ben Elisha 40de8650a8SEran Ben Elisha return -ETIMEDOUT; 41de8650a8SEran Ben Elisha } 42de8650a8SEran Ben Elisha 43de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 44de8650a8SEran Ben Elisha { 45de8650a8SEran Ben Elisha WARN_ONCE(sq->cc != sq->pc, 46de8650a8SEran Ben Elisha "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", 47de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 48de8650a8SEran Ben Elisha sq->cc = 0; 49de8650a8SEran Ben Elisha sq->dma_fifo_cc = 0; 50de8650a8SEran Ben Elisha sq->pc = 0; 51de8650a8SEran Ben Elisha } 52de8650a8SEran Ben Elisha 53fc9d982aSAdham Faris static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq) 54fc9d982aSAdham Faris { 55fc9d982aSAdham Faris int err; 56fc9d982aSAdham Faris int i; 57fc9d982aSAdham Faris 58fc9d982aSAdham Faris BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES, 59fc9d982aSAdham Faris "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h"); 60fc9d982aSAdham Faris err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); 61fc9d982aSAdham Faris if (err) 62fc9d982aSAdham Faris return err; 63fc9d982aSAdham Faris 64fc9d982aSAdham Faris for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) { 65fc9d982aSAdham Faris err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i], 66fc9d982aSAdham Faris test_bit(i, &sq->state)); 67fc9d982aSAdham Faris if (err) 68fc9d982aSAdham Faris return err; 69fc9d982aSAdham Faris } 70fc9d982aSAdham Faris 71*b0d87ed2SAdham Faris return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 72fc9d982aSAdham Faris } 73fc9d982aSAdham Faris 74c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) 75de8650a8SEran Ben Elisha { 76c50de4afSAya Levin struct mlx5_core_dev *mdev; 77c50de4afSAya Levin struct net_device *dev; 78c50de4afSAya Levin struct mlx5e_txqsq *sq; 79de8650a8SEran Ben Elisha u8 state; 80de8650a8SEran Ben Elisha int err; 81de8650a8SEran Ben Elisha 82c50de4afSAya Levin sq = ctx; 834ad40d8eSEran Ben Elisha mdev = sq->mdev; 844ad40d8eSEran Ben Elisha dev = sq->netdev; 85c50de4afSAya Levin 86c50de4afSAya Levin if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 87c50de4afSAya Levin return 0; 88c50de4afSAya Levin 89de8650a8SEran Ben Elisha err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 90de8650a8SEran Ben Elisha if (err) { 91de8650a8SEran Ben Elisha netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", 92de8650a8SEran Ben Elisha sq->sqn, err); 93276d197eSAya Levin goto out; 94de8650a8SEran Ben Elisha } 95de8650a8SEran Ben Elisha 96d9a2fcf5SAya Levin if (state != MLX5_SQC_STATE_ERR) 97276d197eSAya Levin goto out; 98de8650a8SEran Ben Elisha 99de8650a8SEran Ben Elisha mlx5e_tx_disable_queue(sq->txq); 100de8650a8SEran Ben Elisha 101de8650a8SEran Ben Elisha err = mlx5e_wait_for_sq_flush(sq); 102de8650a8SEran Ben Elisha if (err) 103276d197eSAya Levin goto out; 104de8650a8SEran Ben Elisha 105de8650a8SEran Ben Elisha /* At this point, no new packets will arrive from the stack as TXQ is 106de8650a8SEran Ben Elisha * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all 107de8650a8SEran Ben Elisha * pending WQEs. SQ can safely reset the SQ. 108de8650a8SEran Ben Elisha */ 109de8650a8SEran Ben Elisha 1104ad40d8eSEran Ben Elisha err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); 111de8650a8SEran Ben Elisha if (err) 112276d197eSAya Levin goto out; 113de8650a8SEran Ben Elisha 114de8650a8SEran Ben Elisha mlx5e_reset_txqsq_cc_pc(sq); 115de8650a8SEran Ben Elisha sq->stats->recover++; 116276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 117de8650a8SEran Ben Elisha mlx5e_activate_txqsq(sq); 11879efecb4SMaxim Mikityanskiy if (sq->channel) 11979efecb4SMaxim Mikityanskiy mlx5e_trigger_napi_icosq(sq->channel); 12079efecb4SMaxim Mikityanskiy else 12179efecb4SMaxim Mikityanskiy mlx5e_trigger_napi_sched(sq->cq.napi); 122de8650a8SEran Ben Elisha 123de8650a8SEran Ben Elisha return 0; 124276d197eSAya Levin out: 125276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 126276d197eSAya Levin return err; 127de8650a8SEran Ben Elisha } 128de8650a8SEran Ben Elisha 129e6205564SAya Levin struct mlx5e_tx_timeout_ctx { 130e6205564SAya Levin struct mlx5e_txqsq *sq; 131e6205564SAya Levin signed int status; 132e6205564SAya Levin }; 133e6205564SAya Levin 134c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx) 1357d91126bSEran Ben Elisha { 136e6205564SAya Levin struct mlx5e_tx_timeout_ctx *to_ctx; 137e6205564SAya Levin struct mlx5e_priv *priv; 138c50de4afSAya Levin struct mlx5_eq_comp *eq; 139c50de4afSAya Levin struct mlx5e_txqsq *sq; 140c50de4afSAya Levin int err; 1417d91126bSEran Ben Elisha 142e6205564SAya Levin to_ctx = ctx; 143e6205564SAya Levin sq = to_ctx->sq; 144c50de4afSAya Levin eq = sq->cq.mcq.eq; 1454ad40d8eSEran Ben Elisha priv = sq->priv; 1464ad40d8eSEran Ben Elisha err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 147e6205564SAya Levin if (!err) { 148e6205564SAya Levin to_ctx->status = 0; /* this sq recovered */ 149e6205564SAya Levin return err; 150e6205564SAya Levin } 151e6205564SAya Levin 152e6205564SAya Levin err = mlx5e_safe_reopen_channels(priv); 153e6205564SAya Levin if (!err) { 154e6205564SAya Levin to_ctx->status = 1; /* all channels recovered */ 155e6205564SAya Levin return err; 156e6205564SAya Levin } 157e6205564SAya Levin 158e6205564SAya Levin to_ctx->status = err; 1597d91126bSEran Ben Elisha clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 160e6205564SAya Levin netdev_err(priv->netdev, 161e6205564SAya Levin "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 162e6205564SAya Levin err); 1637d91126bSEran Ben Elisha 164c50de4afSAya Levin return err; 1657d91126bSEran Ben Elisha } 1667d91126bSEran Ben Elisha 167de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function. 168de8650a8SEran Ben Elisha * It can cause a dead lock or a read-after-free. 169de8650a8SEran Ben Elisha */ 170c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 171de8650a8SEran Ben Elisha { 172c50de4afSAya Levin return err_ctx->recover(err_ctx->ctx); 173de8650a8SEran Ben Elisha } 174de8650a8SEran Ben Elisha 175de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, 176e7a98105SJiri Pirko void *context, 177e7a98105SJiri Pirko struct netlink_ext_ack *extack) 178de8650a8SEran Ben Elisha { 179de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 180c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx = context; 181de8650a8SEran Ben Elisha 182de8650a8SEran Ben Elisha return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : 183c50de4afSAya Levin mlx5e_health_recover_channels(priv); 184de8650a8SEran Ben Elisha } 185de8650a8SEran Ben Elisha 186de8650a8SEran Ben Elisha static int 187145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, 1882d708887SAya Levin struct mlx5e_txqsq *sq, int tc) 189de8650a8SEran Ben Elisha { 190dd921fd2SAya Levin bool stopped = netif_xmit_stopped(sq->txq); 1914ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 192dd921fd2SAya Levin u8 state; 193de8650a8SEran Ben Elisha int err; 194de8650a8SEran Ben Elisha 195dd921fd2SAya Levin err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); 196dd921fd2SAya Levin if (err) 197dd921fd2SAya Levin return err; 198dd921fd2SAya Levin 1992d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); 2002d708887SAya Levin if (err) 2012d708887SAya Levin return err; 2022d708887SAya Levin 2032d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); 2042d708887SAya Levin if (err) 2052d708887SAya Levin return err; 2062d708887SAya Levin 207dd921fd2SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); 208de8650a8SEran Ben Elisha if (err) 209de8650a8SEran Ben Elisha return err; 210de8650a8SEran Ben Elisha 211de8650a8SEran Ben Elisha err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); 212de8650a8SEran Ben Elisha if (err) 213de8650a8SEran Ben Elisha return err; 214de8650a8SEran Ben Elisha 215de8650a8SEran Ben Elisha err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); 216de8650a8SEran Ben Elisha if (err) 217de8650a8SEran Ben Elisha return err; 218de8650a8SEran Ben Elisha 2192d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); 2202d708887SAya Levin if (err) 2212d708887SAya Levin return err; 2222d708887SAya Levin 2232d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); 2242d708887SAya Levin if (err) 2252d708887SAya Levin return err; 2262d708887SAya Levin 227fc9d982aSAdham Faris err = mlx5e_health_sq_put_sw_state(fmsg, sq); 228fc9d982aSAdham Faris if (err) 229fc9d982aSAdham Faris return err; 230fc9d982aSAdham Faris 231d5cbedd7SAya Levin err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); 2322bf09e60SAya Levin if (err) 2332bf09e60SAya Levin return err; 2342bf09e60SAya Levin 235145e5637SEran Ben Elisha return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); 236145e5637SEran Ben Elisha } 237145e5637SEran Ben Elisha 238145e5637SEran Ben Elisha static int 239145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, 240145e5637SEran Ben Elisha struct mlx5e_txqsq *sq, int tc) 241145e5637SEran Ben Elisha { 242145e5637SEran Ben Elisha int err; 243145e5637SEran Ben Elisha 244145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 245145e5637SEran Ben Elisha if (err) 246145e5637SEran Ben Elisha return err; 247145e5637SEran Ben Elisha 248145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); 249145e5637SEran Ben Elisha if (err) 250145e5637SEran Ben Elisha return err; 251145e5637SEran Ben Elisha 252145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); 25356837c2aSAya Levin if (err) 25456837c2aSAya Levin return err; 25556837c2aSAya Levin 256de8650a8SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 257de8650a8SEran Ben Elisha if (err) 258de8650a8SEran Ben Elisha return err; 259de8650a8SEran Ben Elisha 260de8650a8SEran Ben Elisha return 0; 261de8650a8SEran Ben Elisha } 262de8650a8SEran Ben Elisha 263145e5637SEran Ben Elisha static int 264145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, 265145e5637SEran Ben Elisha struct mlx5e_ptpsq *ptpsq, int tc) 266145e5637SEran Ben Elisha { 267145e5637SEran Ben Elisha int err; 268145e5637SEran Ben Elisha 269145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 270145e5637SEran Ben Elisha if (err) 271145e5637SEran Ben Elisha return err; 272145e5637SEran Ben Elisha 273145e5637SEran Ben Elisha err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 274145e5637SEran Ben Elisha if (err) 275145e5637SEran Ben Elisha return err; 276145e5637SEran Ben Elisha 2771880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); 2781880bc4eSEran Ben Elisha if (err) 2791880bc4eSEran Ben Elisha return err; 2801880bc4eSEran Ben Elisha 2811880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 2821880bc4eSEran Ben Elisha if (err) 2831880bc4eSEran Ben Elisha return err; 2841880bc4eSEran Ben Elisha 2851880bc4eSEran Ben Elisha err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); 2861880bc4eSEran Ben Elisha if (err) 2871880bc4eSEran Ben Elisha return err; 2881880bc4eSEran Ben Elisha 2891880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 290145e5637SEran Ben Elisha if (err) 291145e5637SEran Ben Elisha return err; 292145e5637SEran Ben Elisha 293145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 294145e5637SEran Ben Elisha if (err) 295145e5637SEran Ben Elisha return err; 296145e5637SEran Ben Elisha 297145e5637SEran Ben Elisha return 0; 298145e5637SEran Ben Elisha } 299145e5637SEran Ben Elisha 300145e5637SEran Ben Elisha static int 301145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, 302145e5637SEran Ben Elisha struct mlx5e_txqsq *txqsq) 303145e5637SEran Ben Elisha { 304145e5637SEran Ben Elisha u32 sq_stride, sq_sz; 30595742c1cSAya Levin bool real_time; 306145e5637SEran Ben Elisha int err; 307145e5637SEran Ben Elisha 308145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 309145e5637SEran Ben Elisha if (err) 310145e5637SEran Ben Elisha return err; 311145e5637SEran Ben Elisha 31295742c1cSAya Levin real_time = mlx5_is_real_time_sq(txqsq->mdev); 313145e5637SEran Ben Elisha sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); 314145e5637SEran Ben Elisha sq_stride = MLX5_SEND_WQE_BB; 315145e5637SEran Ben Elisha 316145e5637SEran Ben Elisha err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); 317145e5637SEran Ben Elisha if (err) 318145e5637SEran Ben Elisha return err; 319145e5637SEran Ben Elisha 320145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); 321145e5637SEran Ben Elisha if (err) 322145e5637SEran Ben Elisha return err; 323145e5637SEran Ben Elisha 32495742c1cSAya Levin err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); 32595742c1cSAya Levin if (err) 32695742c1cSAya Levin return err; 32795742c1cSAya Levin 328145e5637SEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); 329145e5637SEran Ben Elisha if (err) 330145e5637SEran Ben Elisha return err; 331145e5637SEran Ben Elisha 332145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 333145e5637SEran Ben Elisha } 334145e5637SEran Ben Elisha 335145e5637SEran Ben Elisha static int 3361880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, 3371880bc4eSEran Ben Elisha struct mlx5e_ptpsq *ptpsq) 3381880bc4eSEran Ben Elisha { 3391880bc4eSEran Ben Elisha int err; 3401880bc4eSEran Ben Elisha 3411880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 3421880bc4eSEran Ben Elisha if (err) 3431880bc4eSEran Ben Elisha return err; 3441880bc4eSEran Ben Elisha 3451880bc4eSEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); 3461880bc4eSEran Ben Elisha if (err) 3471880bc4eSEran Ben Elisha return err; 3481880bc4eSEran Ben Elisha 3491880bc4eSEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 3501880bc4eSEran Ben Elisha } 3511880bc4eSEran Ben Elisha 3521880bc4eSEran Ben Elisha static int 353145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 354145e5637SEran Ben Elisha struct devlink_fmsg *fmsg) 355145e5637SEran Ben Elisha { 356145e5637SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 357145e5637SEran Ben Elisha struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; 35824c22dd0SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 359145e5637SEran Ben Elisha struct mlx5e_ptpsq *generic_ptpsq; 360145e5637SEran Ben Elisha int err; 361145e5637SEran Ben Elisha 362145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); 363145e5637SEran Ben Elisha if (err) 364145e5637SEran Ben Elisha return err; 365145e5637SEran Ben Elisha 366145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); 367145e5637SEran Ben Elisha if (err) 368145e5637SEran Ben Elisha return err; 369145e5637SEran Ben Elisha 37024c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 371145e5637SEran Ben Elisha goto out; 372145e5637SEran Ben Elisha 37324c22dd0SAya Levin generic_ptpsq = &ptp_ch->ptpsq[0]; 37424c22dd0SAya Levin 375145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 376145e5637SEran Ben Elisha if (err) 377145e5637SEran Ben Elisha return err; 378145e5637SEran Ben Elisha 379145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); 380145e5637SEran Ben Elisha if (err) 381145e5637SEran Ben Elisha return err; 382145e5637SEran Ben Elisha 3831880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); 3841880bc4eSEran Ben Elisha if (err) 3851880bc4eSEran Ben Elisha return err; 3861880bc4eSEran Ben Elisha 387145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 388145e5637SEran Ben Elisha if (err) 389145e5637SEran Ben Elisha return err; 390145e5637SEran Ben Elisha 391145e5637SEran Ben Elisha out: 392145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 393145e5637SEran Ben Elisha } 394145e5637SEran Ben Elisha 395de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, 396e7a98105SJiri Pirko struct devlink_fmsg *fmsg, 397e7a98105SJiri Pirko struct netlink_ext_ack *extack) 398de8650a8SEran Ben Elisha { 399de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 400b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 4012d708887SAya Levin 4022d708887SAya Levin int i, tc, err = 0; 403de8650a8SEran Ben Elisha 404de8650a8SEran Ben Elisha mutex_lock(&priv->state_lock); 405de8650a8SEran Ben Elisha 406de8650a8SEran Ben Elisha if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 407de8650a8SEran Ben Elisha goto unlock; 408de8650a8SEran Ben Elisha 409145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); 4102d708887SAya Levin if (err) 4112d708887SAya Levin goto unlock; 4122d708887SAya Levin 413de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 414de8650a8SEran Ben Elisha if (err) 415de8650a8SEran Ben Elisha goto unlock; 416de8650a8SEran Ben Elisha 4172d708887SAya Levin for (i = 0; i < priv->channels.num; i++) { 4182d708887SAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 419de8650a8SEran Ben Elisha 42086d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 4212d708887SAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 4222d708887SAya Levin 4232d708887SAya Levin err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); 424de8650a8SEran Ben Elisha if (err) 42599d31cbdSAya Levin goto unlock; 426de8650a8SEran Ben Elisha } 4272d708887SAya Levin } 428145e5637SEran Ben Elisha 42924c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 430145e5637SEran Ben Elisha goto close_sqs_nest; 431145e5637SEran Ben Elisha 43286d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 433145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, 434145e5637SEran Ben Elisha &ptp_ch->ptpsq[tc], 435145e5637SEran Ben Elisha tc); 436145e5637SEran Ben Elisha if (err) 437145e5637SEran Ben Elisha goto unlock; 438145e5637SEran Ben Elisha } 439145e5637SEran Ben Elisha 440145e5637SEran Ben Elisha close_sqs_nest: 441de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_end(fmsg); 442de8650a8SEran Ben Elisha if (err) 443de8650a8SEran Ben Elisha goto unlock; 444de8650a8SEran Ben Elisha 445de8650a8SEran Ben Elisha unlock: 446de8650a8SEran Ben Elisha mutex_unlock(&priv->state_lock); 447de8650a8SEran Ben Elisha return err; 448de8650a8SEran Ben Elisha } 449de8650a8SEran Ben Elisha 4505f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 4515f29458bSAya Levin void *ctx) 4525f29458bSAya Levin { 4535f29458bSAya Levin struct mlx5_rsc_key key = {}; 4545f29458bSAya Levin struct mlx5e_txqsq *sq = ctx; 4555f29458bSAya Levin int err; 4565f29458bSAya Levin 4575f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4585f29458bSAya Levin return 0; 4595f29458bSAya Levin 460d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4615f29458bSAya Levin if (err) 4625f29458bSAya Levin return err; 4635f29458bSAya Levin 4645f29458bSAya Levin key.size = PAGE_SIZE; 4655f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4665f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4675f29458bSAya Levin if (err) 4685f29458bSAya Levin return err; 4695f29458bSAya Levin 470d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4715f29458bSAya Levin if (err) 4725f29458bSAya Levin return err; 4735f29458bSAya Levin 474d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 4755f29458bSAya Levin if (err) 4765f29458bSAya Levin return err; 4775f29458bSAya Levin 478d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 4795f29458bSAya Levin if (err) 4805f29458bSAya Levin return err; 4815f29458bSAya Levin 4825f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 4835f29458bSAya Levin key.index1 = sq->sqn; 4845f29458bSAya Levin key.num_of_obj1 = 1; 4855f29458bSAya Levin 4865f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4875f29458bSAya Levin if (err) 4885f29458bSAya Levin return err; 4895f29458bSAya Levin 490d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4915f29458bSAya Levin if (err) 4925f29458bSAya Levin return err; 4935f29458bSAya Levin 494d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 4955f29458bSAya Levin if (err) 4965f29458bSAya Levin return err; 4975f29458bSAya Levin 4985f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 4995f29458bSAya Levin key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 5005f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 5015f29458bSAya Levin if (err) 5025f29458bSAya Levin return err; 5035f29458bSAya Levin 504d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 5055f29458bSAya Levin if (err) 5065f29458bSAya Levin return err; 5075f29458bSAya Levin 508d5cbedd7SAya Levin return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 5095f29458bSAya Levin } 5105f29458bSAya Levin 511918fc385SAmir Tzin static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 512918fc385SAmir Tzin void *ctx) 513918fc385SAmir Tzin { 514918fc385SAmir Tzin struct mlx5e_tx_timeout_ctx *to_ctx = ctx; 515918fc385SAmir Tzin 516918fc385SAmir Tzin return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); 517918fc385SAmir Tzin } 518918fc385SAmir Tzin 5195f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, 5205f29458bSAya Levin struct devlink_fmsg *fmsg) 5215f29458bSAya Levin { 522b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 5235f29458bSAya Levin struct mlx5_rsc_key key = {}; 5245f29458bSAya Levin int i, tc, err; 5255f29458bSAya Levin 5265f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 5275f29458bSAya Levin return 0; 5285f29458bSAya Levin 529d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 5305f29458bSAya Levin if (err) 5315f29458bSAya Levin return err; 5325f29458bSAya Levin 5335f29458bSAya Levin key.size = PAGE_SIZE; 5345f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 5355f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 5365f29458bSAya Levin if (err) 5375f29458bSAya Levin return err; 5385f29458bSAya Levin 539d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 5405f29458bSAya Levin if (err) 5415f29458bSAya Levin return err; 5425f29458bSAya Levin 5435f29458bSAya Levin err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 5445f29458bSAya Levin if (err) 5455f29458bSAya Levin return err; 5465f29458bSAya Levin 5475f29458bSAya Levin for (i = 0; i < priv->channels.num; i++) { 5485f29458bSAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 5495f29458bSAya Levin 55086d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 5515f29458bSAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 5525f29458bSAya Levin 5535f29458bSAya Levin err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); 5545f29458bSAya Levin if (err) 5555f29458bSAya Levin return err; 5565f29458bSAya Levin } 5575f29458bSAya Levin } 558145e5637SEran Ben Elisha 55924c22dd0SAya Levin if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { 56086d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 561145e5637SEran Ben Elisha struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; 562145e5637SEran Ben Elisha 563145e5637SEran Ben Elisha err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); 564145e5637SEran Ben Elisha if (err) 565145e5637SEran Ben Elisha return err; 566145e5637SEran Ben Elisha } 567145e5637SEran Ben Elisha } 568145e5637SEran Ben Elisha 5695f29458bSAya Levin return devlink_fmsg_arr_pair_nest_end(fmsg); 5705f29458bSAya Levin } 5715f29458bSAya Levin 5725f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 5735f29458bSAya Levin struct mlx5e_err_ctx *err_ctx, 5745f29458bSAya Levin struct devlink_fmsg *fmsg) 5755f29458bSAya Levin { 5765f29458bSAya Levin return err_ctx->dump(priv, fmsg, err_ctx->ctx); 5775f29458bSAya Levin } 5785f29458bSAya Levin 5795f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, 5805f29458bSAya Levin struct devlink_fmsg *fmsg, void *context, 5815f29458bSAya Levin struct netlink_ext_ack *extack) 5825f29458bSAya Levin { 5835f29458bSAya Levin struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 5845f29458bSAya Levin struct mlx5e_err_ctx *err_ctx = context; 5855f29458bSAya Levin 5865f29458bSAya Levin return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 5875f29458bSAya Levin mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); 5885f29458bSAya Levin } 5895f29458bSAya Levin 5900a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) 5910a56be3cSAya Levin { 5920a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 5934ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5940a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5950a56be3cSAya Levin 5960a56be3cSAya Levin err_ctx.ctx = sq; 5970a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; 5985f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 599b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); 6000a56be3cSAya Levin 6010a56be3cSAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 6020a56be3cSAya Levin } 6030a56be3cSAya Levin 6040a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) 6050a56be3cSAya Levin { 6060a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 607e6205564SAya Levin struct mlx5e_tx_timeout_ctx to_ctx = {}; 6084ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 6090a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 6100a56be3cSAya Levin 611e6205564SAya Levin to_ctx.sq = sq; 612e6205564SAya Levin err_ctx.ctx = &to_ctx; 6130a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 614918fc385SAmir Tzin err_ctx.dump = mlx5e_tx_reporter_timeout_dump; 615b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), 616b21aef7eSJoe Perches "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", 6174ad40d8eSEran Ben Elisha sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 6185337824fSEric Dumazet jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start))); 6190a56be3cSAya Levin 620e6205564SAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 621e6205564SAya Levin return to_ctx.status; 6220a56be3cSAya Levin } 6230a56be3cSAya Levin 624de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { 625de8650a8SEran Ben Elisha .name = "tx", 626de8650a8SEran Ben Elisha .recover = mlx5e_tx_reporter_recover, 627de8650a8SEran Ben Elisha .diagnose = mlx5e_tx_reporter_diagnose, 6285f29458bSAya Levin .dump = mlx5e_tx_reporter_dump, 629de8650a8SEran Ben Elisha }; 630de8650a8SEran Ben Elisha 631de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 632de8650a8SEran Ben Elisha 633b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) 634de8650a8SEran Ben Elisha { 635baf6dfdbSAya Levin struct devlink_health_reporter *reporter; 636de8650a8SEran Ben Elisha 637bc1536f3SJiri Pirko reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, 638bc1536f3SJiri Pirko &mlx5_tx_reporter_ops, 639b7e93bb6SVladyslav Tarasiuk MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); 640baf6dfdbSAya Levin if (IS_ERR(reporter)) { 641de8650a8SEran Ben Elisha netdev_warn(priv->netdev, 642de8650a8SEran Ben Elisha "Failed to create tx reporter, err = %ld\n", 643baf6dfdbSAya Levin PTR_ERR(reporter)); 644b3ea4c4fSEran Ben Elisha return; 6457f7cc235SAya Levin } 646baf6dfdbSAya Levin priv->tx_reporter = reporter; 647de8650a8SEran Ben Elisha } 648de8650a8SEran Ben Elisha 64906293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) 650de8650a8SEran Ben Elisha { 651baf6dfdbSAya Levin if (!priv->tx_reporter) 652de8650a8SEran Ben Elisha return; 653de8650a8SEran Ben Elisha 6549f167327SJiri Pirko devlink_health_reporter_destroy(priv->tx_reporter); 6557a9fb35eSRoi Dayan priv->tx_reporter = NULL; 656de8650a8SEran Ben Elisha } 657