1c50de4afSAya Levin // SPDX-License-Identifier: GPL-2.0 2c50de4afSAya Levin // Copyright (c) 2019 Mellanox Technologies. 3c50de4afSAya Levin 4c50de4afSAya Levin #include "health.h" 5c50de4afSAya Levin #include "lib/eq.h" 6c50de4afSAya Levin 72d708887SAya Levin int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) 82d708887SAya Levin { 92d708887SAya Levin int err; 102d708887SAya Levin 112d708887SAya Levin err = devlink_fmsg_pair_nest_start(fmsg, name); 122d708887SAya Levin if (err) 132d708887SAya Levin return err; 142d708887SAya Levin 152d708887SAya Levin err = devlink_fmsg_obj_nest_start(fmsg); 162d708887SAya Levin if (err) 172d708887SAya Levin return err; 182d708887SAya Levin 192d708887SAya Levin return 0; 202d708887SAya Levin } 212d708887SAya Levin 222d708887SAya Levin int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg) 232d708887SAya Levin { 242d708887SAya Levin int err; 252d708887SAya Levin 262d708887SAya Levin err = devlink_fmsg_obj_nest_end(fmsg); 272d708887SAya Levin if (err) 282d708887SAya Levin return err; 292d708887SAya Levin 302d708887SAya Levin err = devlink_fmsg_pair_nest_end(fmsg); 312d708887SAya Levin if (err) 322d708887SAya Levin return err; 332d708887SAya Levin 342d708887SAya Levin return 0; 352d708887SAya Levin } 362d708887SAya Levin 372bf09e60SAya Levin int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) 382bf09e60SAya Levin { 392bf09e60SAya Levin struct mlx5e_priv *priv = cq->channel->priv; 402bf09e60SAya Levin u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; 412bf09e60SAya Levin u8 hw_status; 422bf09e60SAya Levin void *cqc; 432bf09e60SAya Levin int err; 442bf09e60SAya Levin 452bf09e60SAya Levin err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out)); 462bf09e60SAya Levin if (err) 472bf09e60SAya Levin return err; 482bf09e60SAya Levin 492bf09e60SAya Levin cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); 502bf09e60SAya Levin hw_status = MLX5_GET(cqc, cqc, status); 512bf09e60SAya Levin 522bf09e60SAya Levin err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); 532bf09e60SAya Levin if (err) 542bf09e60SAya Levin return err; 552bf09e60SAya Levin 562bf09e60SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn); 572bf09e60SAya Levin if (err) 582bf09e60SAya Levin return err; 592bf09e60SAya Levin 602bf09e60SAya Levin err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status); 612bf09e60SAya Levin if (err) 622bf09e60SAya Levin return err; 632bf09e60SAya Levin 642bf09e60SAya Levin err = mlx5e_reporter_named_obj_nest_end(fmsg); 652bf09e60SAya Levin if (err) 662bf09e60SAya Levin return err; 672bf09e60SAya Levin 682bf09e60SAya Levin return 0; 692bf09e60SAya Levin } 702bf09e60SAya Levin 712bf09e60SAya Levin int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) 722bf09e60SAya Levin { 732bf09e60SAya Levin u8 cq_log_stride; 742bf09e60SAya Levin u32 cq_sz; 752bf09e60SAya Levin int err; 762bf09e60SAya Levin 772bf09e60SAya Levin cq_sz = mlx5_cqwq_get_size(&cq->wq); 782bf09e60SAya Levin cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); 792bf09e60SAya Levin 802bf09e60SAya Levin err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); 812bf09e60SAya Levin if (err) 822bf09e60SAya Levin return err; 832bf09e60SAya Levin 842bf09e60SAya Levin err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride)); 852bf09e60SAya Levin if (err) 862bf09e60SAya Levin return err; 872bf09e60SAya Levin 882bf09e60SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz); 892bf09e60SAya Levin if (err) 902bf09e60SAya Levin return err; 912bf09e60SAya Levin 922bf09e60SAya Levin err = mlx5e_reporter_named_obj_nest_end(fmsg); 932bf09e60SAya Levin if (err) 942bf09e60SAya Levin return err; 952bf09e60SAya Levin 962bf09e60SAya Levin return 0; 972bf09e60SAya Levin } 982bf09e60SAya Levin 9911af6a6dSAya Levin int mlx5e_health_create_reporters(struct mlx5e_priv *priv) 10011af6a6dSAya Levin { 1019032e719SAya Levin int err; 1029032e719SAya Levin 1039032e719SAya Levin err = mlx5e_reporter_tx_create(priv); 1049032e719SAya Levin if (err) 1059032e719SAya Levin return err; 1069032e719SAya Levin 1079032e719SAya Levin err = mlx5e_reporter_rx_create(priv); 1089032e719SAya Levin if (err) 1099032e719SAya Levin return err; 1109032e719SAya Levin 1119032e719SAya Levin return 0; 11211af6a6dSAya Levin } 11311af6a6dSAya Levin 11411af6a6dSAya Levin void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) 11511af6a6dSAya Levin { 1169032e719SAya Levin mlx5e_reporter_rx_destroy(priv); 11711af6a6dSAya Levin mlx5e_reporter_tx_destroy(priv); 11811af6a6dSAya Levin } 11911af6a6dSAya Levin 12011af6a6dSAya Levin void mlx5e_health_channels_update(struct mlx5e_priv *priv) 12111af6a6dSAya Levin { 12211af6a6dSAya Levin if (priv->tx_reporter) 12311af6a6dSAya Levin devlink_health_reporter_state_update(priv->tx_reporter, 12411af6a6dSAya Levin DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 1259032e719SAya Levin if (priv->rx_reporter) 1269032e719SAya Levin devlink_health_reporter_state_update(priv->rx_reporter, 1279032e719SAya Levin DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 12811af6a6dSAya Levin } 12911af6a6dSAya Levin 130c50de4afSAya Levin int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn) 131c50de4afSAya Levin { 132c50de4afSAya Levin struct mlx5_core_dev *mdev = channel->mdev; 133c50de4afSAya Levin struct net_device *dev = channel->netdev; 134c50de4afSAya Levin struct mlx5e_modify_sq_param msp = {}; 135c50de4afSAya Levin int err; 136c50de4afSAya Levin 137c50de4afSAya Levin msp.curr_state = MLX5_SQC_STATE_ERR; 138c50de4afSAya Levin msp.next_state = MLX5_SQC_STATE_RST; 139c50de4afSAya Levin 140c50de4afSAya Levin err = mlx5e_modify_sq(mdev, sqn, &msp); 141c50de4afSAya Levin if (err) { 142c50de4afSAya Levin netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn); 143c50de4afSAya Levin return err; 144c50de4afSAya Levin } 145c50de4afSAya Levin 146c50de4afSAya Levin memset(&msp, 0, sizeof(msp)); 147c50de4afSAya Levin msp.curr_state = MLX5_SQC_STATE_RST; 148c50de4afSAya Levin msp.next_state = MLX5_SQC_STATE_RDY; 149c50de4afSAya Levin 150c50de4afSAya Levin err = mlx5e_modify_sq(mdev, sqn, &msp); 151c50de4afSAya Levin if (err) { 152c50de4afSAya Levin netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn); 153c50de4afSAya Levin return err; 154c50de4afSAya Levin } 155c50de4afSAya Levin 156c50de4afSAya Levin return 0; 157c50de4afSAya Levin } 158c50de4afSAya Levin 159c50de4afSAya Levin int mlx5e_health_recover_channels(struct mlx5e_priv *priv) 160c50de4afSAya Levin { 161c50de4afSAya Levin int err = 0; 162c50de4afSAya Levin 163c50de4afSAya Levin rtnl_lock(); 164c50de4afSAya Levin mutex_lock(&priv->state_lock); 165c50de4afSAya Levin 166c50de4afSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 167c50de4afSAya Levin goto out; 168c50de4afSAya Levin 169c50de4afSAya Levin err = mlx5e_safe_reopen_channels(priv); 170c50de4afSAya Levin 171c50de4afSAya Levin out: 172c50de4afSAya Levin mutex_unlock(&priv->state_lock); 173c50de4afSAya Levin rtnl_unlock(); 174c50de4afSAya Levin 175c50de4afSAya Levin return err; 176c50de4afSAya Levin } 177c50de4afSAya Levin 178c50de4afSAya Levin int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel) 179c50de4afSAya Levin { 180c50de4afSAya Levin u32 eqe_count; 181c50de4afSAya Levin 182c50de4afSAya Levin netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", 183c50de4afSAya Levin eq->core.eqn, eq->core.cons_index, eq->core.irqn); 184c50de4afSAya Levin 185c50de4afSAya Levin eqe_count = mlx5_eq_poll_irq_disabled(eq); 186c50de4afSAya Levin if (!eqe_count) 187c50de4afSAya Levin return -EIO; 188c50de4afSAya Levin 189c50de4afSAya Levin netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n", 190c50de4afSAya Levin eqe_count, eq->core.eqn); 191c50de4afSAya Levin 192c50de4afSAya Levin channel->stats->eq_rearm++; 193c50de4afSAya Levin return 0; 194c50de4afSAya Levin } 195c50de4afSAya Levin 196c50de4afSAya Levin int mlx5e_health_report(struct mlx5e_priv *priv, 197c50de4afSAya Levin struct devlink_health_reporter *reporter, char *err_str, 198c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx) 199c50de4afSAya Levin { 200c50de4afSAya Levin netdev_err(priv->netdev, err_str); 20199cda454SEran Ben Elisha 20299cda454SEran Ben Elisha if (!reporter) 2031ad6c43cSAya Levin return err_ctx->recover(err_ctx->ctx); 20499cda454SEran Ben Elisha 205c50de4afSAya Levin return devlink_health_report(reporter, err_str, err_ctx); 206c50de4afSAya Levin } 207