1de8650a8SEran Ben Elisha /* SPDX-License-Identifier: GPL-2.0 */ 2de8650a8SEran Ben Elisha /* Copyright (c) 2019 Mellanox Technologies. */ 3de8650a8SEran Ben Elisha 44edc17fdSAya Levin #include "health.h" 5145e5637SEran Ben Elisha #include "en/ptp.h" 6c27971d0SRoi Dayan #include "en/devlink.h" 7de8650a8SEran Ben Elisha 8de8650a8SEran Ben Elisha static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 9de8650a8SEran Ben Elisha { 10e74e28aeSAya Levin unsigned long exp_time = jiffies + 11e74e28aeSAya Levin msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); 12de8650a8SEran Ben Elisha 13de8650a8SEran Ben Elisha while (time_before(jiffies, exp_time)) { 14de8650a8SEran Ben Elisha if (sq->cc == sq->pc) 15de8650a8SEran Ben Elisha return 0; 16de8650a8SEran Ben Elisha 17de8650a8SEran Ben Elisha msleep(20); 18de8650a8SEran Ben Elisha } 19de8650a8SEran Ben Elisha 204ad40d8eSEran Ben Elisha netdev_err(sq->netdev, 21de8650a8SEran Ben Elisha "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", 22de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 23de8650a8SEran Ben Elisha 24de8650a8SEran Ben Elisha return -ETIMEDOUT; 25de8650a8SEran Ben Elisha } 26de8650a8SEran Ben Elisha 27de8650a8SEran Ben Elisha static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 28de8650a8SEran Ben Elisha { 29de8650a8SEran Ben Elisha WARN_ONCE(sq->cc != sq->pc, 30de8650a8SEran Ben Elisha "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", 31de8650a8SEran Ben Elisha sq->sqn, sq->cc, sq->pc); 32de8650a8SEran Ben Elisha sq->cc = 0; 33de8650a8SEran Ben Elisha sq->dma_fifo_cc = 0; 34de8650a8SEran Ben Elisha sq->pc = 0; 35de8650a8SEran Ben Elisha } 36de8650a8SEran Ben Elisha 37c50de4afSAya Levin static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) 38de8650a8SEran Ben Elisha { 39c50de4afSAya Levin struct mlx5_core_dev *mdev; 40c50de4afSAya Levin struct net_device *dev; 41c50de4afSAya Levin struct mlx5e_txqsq *sq; 42de8650a8SEran Ben Elisha u8 state; 43de8650a8SEran Ben Elisha int err; 44de8650a8SEran Ben Elisha 45c50de4afSAya Levin sq = ctx; 464ad40d8eSEran Ben Elisha mdev = sq->mdev; 474ad40d8eSEran Ben Elisha dev = sq->netdev; 48c50de4afSAya Levin 49c50de4afSAya Levin if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 50c50de4afSAya Levin return 0; 51c50de4afSAya Levin 52de8650a8SEran Ben Elisha err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 53de8650a8SEran Ben Elisha if (err) { 54de8650a8SEran Ben Elisha netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", 55de8650a8SEran Ben Elisha sq->sqn, err); 56276d197eSAya Levin goto out; 57de8650a8SEran Ben Elisha } 58de8650a8SEran Ben Elisha 59d9a2fcf5SAya Levin if (state != MLX5_SQC_STATE_ERR) 60276d197eSAya Levin goto out; 61de8650a8SEran Ben Elisha 62de8650a8SEran Ben Elisha mlx5e_tx_disable_queue(sq->txq); 63de8650a8SEran Ben Elisha 64de8650a8SEran Ben Elisha err = mlx5e_wait_for_sq_flush(sq); 65de8650a8SEran Ben Elisha if (err) 66276d197eSAya Levin goto out; 67de8650a8SEran Ben Elisha 68de8650a8SEran Ben Elisha /* At this point, no new packets will arrive from the stack as TXQ is 69de8650a8SEran Ben Elisha * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all 70de8650a8SEran Ben Elisha * pending WQEs. SQ can safely reset the SQ. 71de8650a8SEran Ben Elisha */ 72de8650a8SEran Ben Elisha 734ad40d8eSEran Ben Elisha err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); 74de8650a8SEran Ben Elisha if (err) 75276d197eSAya Levin goto out; 76de8650a8SEran Ben Elisha 77de8650a8SEran Ben Elisha mlx5e_reset_txqsq_cc_pc(sq); 78de8650a8SEran Ben Elisha sq->stats->recover++; 79276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 80de8650a8SEran Ben Elisha mlx5e_activate_txqsq(sq); 81de8650a8SEran Ben Elisha 82de8650a8SEran Ben Elisha return 0; 83276d197eSAya Levin out: 84276d197eSAya Levin clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 85276d197eSAya Levin return err; 86de8650a8SEran Ben Elisha } 87de8650a8SEran Ben Elisha 88e6205564SAya Levin struct mlx5e_tx_timeout_ctx { 89e6205564SAya Levin struct mlx5e_txqsq *sq; 90e6205564SAya Levin signed int status; 91e6205564SAya Levin }; 92e6205564SAya Levin 93c50de4afSAya Levin static int mlx5e_tx_reporter_timeout_recover(void *ctx) 947d91126bSEran Ben Elisha { 95e6205564SAya Levin struct mlx5e_tx_timeout_ctx *to_ctx; 96e6205564SAya Levin struct mlx5e_priv *priv; 97c50de4afSAya Levin struct mlx5_eq_comp *eq; 98c50de4afSAya Levin struct mlx5e_txqsq *sq; 99c50de4afSAya Levin int err; 1007d91126bSEran Ben Elisha 101e6205564SAya Levin to_ctx = ctx; 102e6205564SAya Levin sq = to_ctx->sq; 103c50de4afSAya Levin eq = sq->cq.mcq.eq; 1044ad40d8eSEran Ben Elisha priv = sq->priv; 1054ad40d8eSEran Ben Elisha err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 106e6205564SAya Levin if (!err) { 107e6205564SAya Levin to_ctx->status = 0; /* this sq recovered */ 108e6205564SAya Levin return err; 109e6205564SAya Levin } 110e6205564SAya Levin 111e6205564SAya Levin err = mlx5e_safe_reopen_channels(priv); 112e6205564SAya Levin if (!err) { 113e6205564SAya Levin to_ctx->status = 1; /* all channels recovered */ 114e6205564SAya Levin return err; 115e6205564SAya Levin } 116e6205564SAya Levin 117e6205564SAya Levin to_ctx->status = err; 1187d91126bSEran Ben Elisha clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 119e6205564SAya Levin netdev_err(priv->netdev, 120e6205564SAya Levin "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 121e6205564SAya Levin err); 1227d91126bSEran Ben Elisha 123c50de4afSAya Levin return err; 1247d91126bSEran Ben Elisha } 1257d91126bSEran Ben Elisha 126de8650a8SEran Ben Elisha /* state lock cannot be grabbed within this function. 127de8650a8SEran Ben Elisha * It can cause a dead lock or a read-after-free. 128de8650a8SEran Ben Elisha */ 129c50de4afSAya Levin static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 130de8650a8SEran Ben Elisha { 131c50de4afSAya Levin return err_ctx->recover(err_ctx->ctx); 132de8650a8SEran Ben Elisha } 133de8650a8SEran Ben Elisha 134de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, 135e7a98105SJiri Pirko void *context, 136e7a98105SJiri Pirko struct netlink_ext_ack *extack) 137de8650a8SEran Ben Elisha { 138de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 139c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx = context; 140de8650a8SEran Ben Elisha 141de8650a8SEran Ben Elisha return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : 142c50de4afSAya Levin mlx5e_health_recover_channels(priv); 143de8650a8SEran Ben Elisha } 144de8650a8SEran Ben Elisha 145de8650a8SEran Ben Elisha static int 146145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, 1472d708887SAya Levin struct mlx5e_txqsq *sq, int tc) 148de8650a8SEran Ben Elisha { 149dd921fd2SAya Levin bool stopped = netif_xmit_stopped(sq->txq); 1504ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 151dd921fd2SAya Levin u8 state; 152de8650a8SEran Ben Elisha int err; 153de8650a8SEran Ben Elisha 154dd921fd2SAya Levin err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); 155dd921fd2SAya Levin if (err) 156dd921fd2SAya Levin return err; 157dd921fd2SAya Levin 1582d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); 1592d708887SAya Levin if (err) 1602d708887SAya Levin return err; 1612d708887SAya Levin 1622d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); 1632d708887SAya Levin if (err) 1642d708887SAya Levin return err; 1652d708887SAya Levin 166dd921fd2SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); 167de8650a8SEran Ben Elisha if (err) 168de8650a8SEran Ben Elisha return err; 169de8650a8SEran Ben Elisha 170de8650a8SEran Ben Elisha err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); 171de8650a8SEran Ben Elisha if (err) 172de8650a8SEran Ben Elisha return err; 173de8650a8SEran Ben Elisha 174de8650a8SEran Ben Elisha err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); 175de8650a8SEran Ben Elisha if (err) 176de8650a8SEran Ben Elisha return err; 177de8650a8SEran Ben Elisha 1782d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); 1792d708887SAya Levin if (err) 1802d708887SAya Levin return err; 1812d708887SAya Levin 1822d708887SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); 1832d708887SAya Levin if (err) 1842d708887SAya Levin return err; 1852d708887SAya Levin 186d5cbedd7SAya Levin err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); 1872bf09e60SAya Levin if (err) 1882bf09e60SAya Levin return err; 1892bf09e60SAya Levin 190145e5637SEran Ben Elisha return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); 191145e5637SEran Ben Elisha } 192145e5637SEran Ben Elisha 193145e5637SEran Ben Elisha static int 194145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, 195145e5637SEran Ben Elisha struct mlx5e_txqsq *sq, int tc) 196145e5637SEran Ben Elisha { 197145e5637SEran Ben Elisha int err; 198145e5637SEran Ben Elisha 199145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 200145e5637SEran Ben Elisha if (err) 201145e5637SEran Ben Elisha return err; 202145e5637SEran Ben Elisha 203145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); 204145e5637SEran Ben Elisha if (err) 205145e5637SEran Ben Elisha return err; 206145e5637SEran Ben Elisha 207145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); 20856837c2aSAya Levin if (err) 20956837c2aSAya Levin return err; 21056837c2aSAya Levin 211de8650a8SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 212de8650a8SEran Ben Elisha if (err) 213de8650a8SEran Ben Elisha return err; 214de8650a8SEran Ben Elisha 215de8650a8SEran Ben Elisha return 0; 216de8650a8SEran Ben Elisha } 217de8650a8SEran Ben Elisha 218145e5637SEran Ben Elisha static int 219145e5637SEran Ben Elisha mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, 220145e5637SEran Ben Elisha struct mlx5e_ptpsq *ptpsq, int tc) 221145e5637SEran Ben Elisha { 222145e5637SEran Ben Elisha int err; 223145e5637SEran Ben Elisha 224145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_start(fmsg); 225145e5637SEran Ben Elisha if (err) 226145e5637SEran Ben Elisha return err; 227145e5637SEran Ben Elisha 228145e5637SEran Ben Elisha err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 229145e5637SEran Ben Elisha if (err) 230145e5637SEran Ben Elisha return err; 231145e5637SEran Ben Elisha 2321880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); 2331880bc4eSEran Ben Elisha if (err) 2341880bc4eSEran Ben Elisha return err; 2351880bc4eSEran Ben Elisha 2361880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 2371880bc4eSEran Ben Elisha if (err) 2381880bc4eSEran Ben Elisha return err; 2391880bc4eSEran Ben Elisha 2401880bc4eSEran Ben Elisha err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); 2411880bc4eSEran Ben Elisha if (err) 2421880bc4eSEran Ben Elisha return err; 2431880bc4eSEran Ben Elisha 2441880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 245145e5637SEran Ben Elisha if (err) 246145e5637SEran Ben Elisha return err; 247145e5637SEran Ben Elisha 248145e5637SEran Ben Elisha err = devlink_fmsg_obj_nest_end(fmsg); 249145e5637SEran Ben Elisha if (err) 250145e5637SEran Ben Elisha return err; 251145e5637SEran Ben Elisha 252145e5637SEran Ben Elisha return 0; 253145e5637SEran Ben Elisha } 254145e5637SEran Ben Elisha 255145e5637SEran Ben Elisha static int 256145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, 257145e5637SEran Ben Elisha struct mlx5e_txqsq *txqsq) 258145e5637SEran Ben Elisha { 259145e5637SEran Ben Elisha u32 sq_stride, sq_sz; 26095742c1cSAya Levin bool real_time; 261145e5637SEran Ben Elisha int err; 262145e5637SEran Ben Elisha 263145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 264145e5637SEran Ben Elisha if (err) 265145e5637SEran Ben Elisha return err; 266145e5637SEran Ben Elisha 26795742c1cSAya Levin real_time = mlx5_is_real_time_sq(txqsq->mdev); 268145e5637SEran Ben Elisha sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); 269145e5637SEran Ben Elisha sq_stride = MLX5_SEND_WQE_BB; 270145e5637SEran Ben Elisha 271145e5637SEran Ben Elisha err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); 272145e5637SEran Ben Elisha if (err) 273145e5637SEran Ben Elisha return err; 274145e5637SEran Ben Elisha 275145e5637SEran Ben Elisha err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); 276145e5637SEran Ben Elisha if (err) 277145e5637SEran Ben Elisha return err; 278145e5637SEran Ben Elisha 27995742c1cSAya Levin err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); 28095742c1cSAya Levin if (err) 28195742c1cSAya Levin return err; 28295742c1cSAya Levin 283145e5637SEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); 284145e5637SEran Ben Elisha if (err) 285145e5637SEran Ben Elisha return err; 286145e5637SEran Ben Elisha 287145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 288145e5637SEran Ben Elisha } 289145e5637SEran Ben Elisha 290145e5637SEran Ben Elisha static int 2911880bc4eSEran Ben Elisha mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, 2921880bc4eSEran Ben Elisha struct mlx5e_ptpsq *ptpsq) 2931880bc4eSEran Ben Elisha { 2941880bc4eSEran Ben Elisha int err; 2951880bc4eSEran Ben Elisha 2961880bc4eSEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 2971880bc4eSEran Ben Elisha if (err) 2981880bc4eSEran Ben Elisha return err; 2991880bc4eSEran Ben Elisha 3001880bc4eSEran Ben Elisha err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); 3011880bc4eSEran Ben Elisha if (err) 3021880bc4eSEran Ben Elisha return err; 3031880bc4eSEran Ben Elisha 3041880bc4eSEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 3051880bc4eSEran Ben Elisha } 3061880bc4eSEran Ben Elisha 3071880bc4eSEran Ben Elisha static int 308145e5637SEran Ben Elisha mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 309145e5637SEran Ben Elisha struct devlink_fmsg *fmsg) 310145e5637SEran Ben Elisha { 311145e5637SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 312145e5637SEran Ben Elisha struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; 31324c22dd0SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 314145e5637SEran Ben Elisha struct mlx5e_ptpsq *generic_ptpsq; 315145e5637SEran Ben Elisha int err; 316145e5637SEran Ben Elisha 317145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); 318145e5637SEran Ben Elisha if (err) 319145e5637SEran Ben Elisha return err; 320145e5637SEran Ben Elisha 321145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); 322145e5637SEran Ben Elisha if (err) 323145e5637SEran Ben Elisha return err; 324145e5637SEran Ben Elisha 32524c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 326145e5637SEran Ben Elisha goto out; 327145e5637SEran Ben Elisha 32824c22dd0SAya Levin generic_ptpsq = &ptp_ch->ptpsq[0]; 32924c22dd0SAya Levin 330145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 331145e5637SEran Ben Elisha if (err) 332145e5637SEran Ben Elisha return err; 333145e5637SEran Ben Elisha 334145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); 335145e5637SEran Ben Elisha if (err) 336145e5637SEran Ben Elisha return err; 337145e5637SEran Ben Elisha 3381880bc4eSEran Ben Elisha err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); 3391880bc4eSEran Ben Elisha if (err) 3401880bc4eSEran Ben Elisha return err; 3411880bc4eSEran Ben Elisha 342145e5637SEran Ben Elisha err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 343145e5637SEran Ben Elisha if (err) 344145e5637SEran Ben Elisha return err; 345145e5637SEran Ben Elisha 346145e5637SEran Ben Elisha out: 347145e5637SEran Ben Elisha return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 348145e5637SEran Ben Elisha } 349145e5637SEran Ben Elisha 350de8650a8SEran Ben Elisha static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, 351e7a98105SJiri Pirko struct devlink_fmsg *fmsg, 352e7a98105SJiri Pirko struct netlink_ext_ack *extack) 353de8650a8SEran Ben Elisha { 354de8650a8SEran Ben Elisha struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 355b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 3562d708887SAya Levin 3572d708887SAya Levin int i, tc, err = 0; 358de8650a8SEran Ben Elisha 359de8650a8SEran Ben Elisha mutex_lock(&priv->state_lock); 360de8650a8SEran Ben Elisha 361de8650a8SEran Ben Elisha if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 362de8650a8SEran Ben Elisha goto unlock; 363de8650a8SEran Ben Elisha 364145e5637SEran Ben Elisha err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); 3652d708887SAya Levin if (err) 3662d708887SAya Levin goto unlock; 3672d708887SAya Levin 368de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 369de8650a8SEran Ben Elisha if (err) 370de8650a8SEran Ben Elisha goto unlock; 371de8650a8SEran Ben Elisha 3722d708887SAya Levin for (i = 0; i < priv->channels.num; i++) { 3732d708887SAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 374de8650a8SEran Ben Elisha 375*86d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 3762d708887SAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 3772d708887SAya Levin 3782d708887SAya Levin err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); 379de8650a8SEran Ben Elisha if (err) 38099d31cbdSAya Levin goto unlock; 381de8650a8SEran Ben Elisha } 3822d708887SAya Levin } 383145e5637SEran Ben Elisha 38424c22dd0SAya Levin if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 385145e5637SEran Ben Elisha goto close_sqs_nest; 386145e5637SEran Ben Elisha 387*86d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 388145e5637SEran Ben Elisha err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, 389145e5637SEran Ben Elisha &ptp_ch->ptpsq[tc], 390145e5637SEran Ben Elisha tc); 391145e5637SEran Ben Elisha if (err) 392145e5637SEran Ben Elisha goto unlock; 393145e5637SEran Ben Elisha } 394145e5637SEran Ben Elisha 395145e5637SEran Ben Elisha close_sqs_nest: 396de8650a8SEran Ben Elisha err = devlink_fmsg_arr_pair_nest_end(fmsg); 397de8650a8SEran Ben Elisha if (err) 398de8650a8SEran Ben Elisha goto unlock; 399de8650a8SEran Ben Elisha 400de8650a8SEran Ben Elisha unlock: 401de8650a8SEran Ben Elisha mutex_unlock(&priv->state_lock); 402de8650a8SEran Ben Elisha return err; 403de8650a8SEran Ben Elisha } 404de8650a8SEran Ben Elisha 4055f29458bSAya Levin static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 4065f29458bSAya Levin void *ctx) 4075f29458bSAya Levin { 4085f29458bSAya Levin struct mlx5_rsc_key key = {}; 4095f29458bSAya Levin struct mlx5e_txqsq *sq = ctx; 4105f29458bSAya Levin int err; 4115f29458bSAya Levin 4125f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4135f29458bSAya Levin return 0; 4145f29458bSAya Levin 415d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4165f29458bSAya Levin if (err) 4175f29458bSAya Levin return err; 4185f29458bSAya Levin 4195f29458bSAya Levin key.size = PAGE_SIZE; 4205f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4215f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4225f29458bSAya Levin if (err) 4235f29458bSAya Levin return err; 4245f29458bSAya Levin 425d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4265f29458bSAya Levin if (err) 4275f29458bSAya Levin return err; 4285f29458bSAya Levin 429d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 4305f29458bSAya Levin if (err) 4315f29458bSAya Levin return err; 4325f29458bSAya Levin 433d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 4345f29458bSAya Levin if (err) 4355f29458bSAya Levin return err; 4365f29458bSAya Levin 4375f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 4385f29458bSAya Levin key.index1 = sq->sqn; 4395f29458bSAya Levin key.num_of_obj1 = 1; 4405f29458bSAya Levin 4415f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4425f29458bSAya Levin if (err) 4435f29458bSAya Levin return err; 4445f29458bSAya Levin 445d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4465f29458bSAya Levin if (err) 4475f29458bSAya Levin return err; 4485f29458bSAya Levin 449d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 4505f29458bSAya Levin if (err) 4515f29458bSAya Levin return err; 4525f29458bSAya Levin 4535f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 4545f29458bSAya Levin key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 4555f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4565f29458bSAya Levin if (err) 4575f29458bSAya Levin return err; 4585f29458bSAya Levin 459d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4605f29458bSAya Levin if (err) 4615f29458bSAya Levin return err; 4625f29458bSAya Levin 463d5cbedd7SAya Levin return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4645f29458bSAya Levin } 4655f29458bSAya Levin 4665f29458bSAya Levin static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, 4675f29458bSAya Levin struct devlink_fmsg *fmsg) 4685f29458bSAya Levin { 469b0d35de4SAya Levin struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 4705f29458bSAya Levin struct mlx5_rsc_key key = {}; 4715f29458bSAya Levin int i, tc, err; 4725f29458bSAya Levin 4735f29458bSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 4745f29458bSAya Levin return 0; 4755f29458bSAya Levin 476d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 4775f29458bSAya Levin if (err) 4785f29458bSAya Levin return err; 4795f29458bSAya Levin 4805f29458bSAya Levin key.size = PAGE_SIZE; 4815f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 4825f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 4835f29458bSAya Levin if (err) 4845f29458bSAya Levin return err; 4855f29458bSAya Levin 486d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 4875f29458bSAya Levin if (err) 4885f29458bSAya Levin return err; 4895f29458bSAya Levin 4905f29458bSAya Levin err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 4915f29458bSAya Levin if (err) 4925f29458bSAya Levin return err; 4935f29458bSAya Levin 4945f29458bSAya Levin for (i = 0; i < priv->channels.num; i++) { 4955f29458bSAya Levin struct mlx5e_channel *c = priv->channels.c[i]; 4965f29458bSAya Levin 497*86d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 4985f29458bSAya Levin struct mlx5e_txqsq *sq = &c->sq[tc]; 4995f29458bSAya Levin 5005f29458bSAya Levin err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); 5015f29458bSAya Levin if (err) 5025f29458bSAya Levin return err; 5035f29458bSAya Levin } 5045f29458bSAya Levin } 505145e5637SEran Ben Elisha 50624c22dd0SAya Levin if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { 507*86d747a3STariq Toukan for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 508145e5637SEran Ben Elisha struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; 509145e5637SEran Ben Elisha 510145e5637SEran Ben Elisha err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); 511145e5637SEran Ben Elisha if (err) 512145e5637SEran Ben Elisha return err; 513145e5637SEran Ben Elisha } 514145e5637SEran Ben Elisha } 515145e5637SEran Ben Elisha 5165f29458bSAya Levin return devlink_fmsg_arr_pair_nest_end(fmsg); 5175f29458bSAya Levin } 5185f29458bSAya Levin 5195f29458bSAya Levin static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 5205f29458bSAya Levin struct mlx5e_err_ctx *err_ctx, 5215f29458bSAya Levin struct devlink_fmsg *fmsg) 5225f29458bSAya Levin { 5235f29458bSAya Levin return err_ctx->dump(priv, fmsg, err_ctx->ctx); 5245f29458bSAya Levin } 5255f29458bSAya Levin 5265f29458bSAya Levin static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, 5275f29458bSAya Levin struct devlink_fmsg *fmsg, void *context, 5285f29458bSAya Levin struct netlink_ext_ack *extack) 5295f29458bSAya Levin { 5305f29458bSAya Levin struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 5315f29458bSAya Levin struct mlx5e_err_ctx *err_ctx = context; 5325f29458bSAya Levin 5335f29458bSAya Levin return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 5345f29458bSAya Levin mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); 5355f29458bSAya Levin } 5365f29458bSAya Levin 5370a56be3cSAya Levin void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) 5380a56be3cSAya Levin { 5390a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 5404ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5410a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5420a56be3cSAya Levin 5430a56be3cSAya Levin err_ctx.ctx = sq; 5440a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; 5455f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 546b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); 5470a56be3cSAya Levin 5480a56be3cSAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 5490a56be3cSAya Levin } 5500a56be3cSAya Levin 5510a56be3cSAya Levin int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) 5520a56be3cSAya Levin { 5530a56be3cSAya Levin char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 554e6205564SAya Levin struct mlx5e_tx_timeout_ctx to_ctx = {}; 5554ad40d8eSEran Ben Elisha struct mlx5e_priv *priv = sq->priv; 5560a56be3cSAya Levin struct mlx5e_err_ctx err_ctx = {}; 5570a56be3cSAya Levin 558e6205564SAya Levin to_ctx.sq = sq; 559e6205564SAya Levin err_ctx.ctx = &to_ctx; 5600a56be3cSAya Levin err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 5615f29458bSAya Levin err_ctx.dump = mlx5e_tx_reporter_dump_sq; 562b21aef7eSJoe Perches snprintf(err_str, sizeof(err_str), 563b21aef7eSJoe Perches "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", 5644ad40d8eSEran Ben Elisha sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 5650a56be3cSAya Levin jiffies_to_usecs(jiffies - sq->txq->trans_start)); 5660a56be3cSAya Levin 567e6205564SAya Levin mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 568e6205564SAya Levin return to_ctx.status; 5690a56be3cSAya Levin } 5700a56be3cSAya Levin 571de8650a8SEran Ben Elisha static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { 572de8650a8SEran Ben Elisha .name = "tx", 573de8650a8SEran Ben Elisha .recover = mlx5e_tx_reporter_recover, 574de8650a8SEran Ben Elisha .diagnose = mlx5e_tx_reporter_diagnose, 5755f29458bSAya Levin .dump = mlx5e_tx_reporter_dump, 576de8650a8SEran Ben Elisha }; 577de8650a8SEran Ben Elisha 578de8650a8SEran Ben Elisha #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 579de8650a8SEran Ben Elisha 580b3ea4c4fSEran Ben Elisha void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) 581de8650a8SEran Ben Elisha { 582c27971d0SRoi Dayan struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); 583baf6dfdbSAya Levin struct devlink_health_reporter *reporter; 584de8650a8SEran Ben Elisha 585c27971d0SRoi Dayan reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops, 586b7e93bb6SVladyslav Tarasiuk MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); 587baf6dfdbSAya Levin if (IS_ERR(reporter)) { 588de8650a8SEran Ben Elisha netdev_warn(priv->netdev, 589de8650a8SEran Ben Elisha "Failed to create tx reporter, err = %ld\n", 590baf6dfdbSAya Levin PTR_ERR(reporter)); 591b3ea4c4fSEran Ben Elisha return; 5927f7cc235SAya Levin } 593baf6dfdbSAya Levin priv->tx_reporter = reporter; 594de8650a8SEran Ben Elisha } 595de8650a8SEran Ben Elisha 59606293ae4SAya Levin void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) 597de8650a8SEran Ben Elisha { 598baf6dfdbSAya Levin if (!priv->tx_reporter) 599de8650a8SEran Ben Elisha return; 600de8650a8SEran Ben Elisha 601b7e93bb6SVladyslav Tarasiuk devlink_port_health_reporter_destroy(priv->tx_reporter); 6027a9fb35eSRoi Dayan priv->tx_reporter = NULL; 603de8650a8SEran Ben Elisha } 604