1c50de4afSAya Levin // SPDX-License-Identifier: GPL-2.0 2c50de4afSAya Levin // Copyright (c) 2019 Mellanox Technologies. 3c50de4afSAya Levin 4c50de4afSAya Levin #include "health.h" 5c50de4afSAya Levin #include "lib/eq.h" 65f29458bSAya Levin #include "lib/mlx5.h" 7c50de4afSAya Levin 8d5cbedd7SAya Levin int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) 92d708887SAya Levin { 102d708887SAya Levin int err; 112d708887SAya Levin 122d708887SAya Levin err = devlink_fmsg_pair_nest_start(fmsg, name); 132d708887SAya Levin if (err) 142d708887SAya Levin return err; 152d708887SAya Levin 162d708887SAya Levin err = devlink_fmsg_obj_nest_start(fmsg); 172d708887SAya Levin if (err) 182d708887SAya Levin return err; 192d708887SAya Levin 202d708887SAya Levin return 0; 212d708887SAya Levin } 222d708887SAya Levin 23d5cbedd7SAya Levin int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg) 242d708887SAya Levin { 252d708887SAya Levin int err; 262d708887SAya Levin 272d708887SAya Levin err = devlink_fmsg_obj_nest_end(fmsg); 282d708887SAya Levin if (err) 292d708887SAya Levin return err; 302d708887SAya Levin 312d708887SAya Levin err = devlink_fmsg_pair_nest_end(fmsg); 322d708887SAya Levin if (err) 332d708887SAya Levin return err; 342d708887SAya Levin 352d708887SAya Levin return 0; 362d708887SAya Levin } 372d708887SAya Levin 38d5cbedd7SAya Levin int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) 392bf09e60SAya Levin { 402bf09e60SAya Levin struct mlx5e_priv *priv = cq->channel->priv; 412bf09e60SAya Levin u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; 422bf09e60SAya Levin u8 hw_status; 432bf09e60SAya Levin void *cqc; 442bf09e60SAya Levin int err; 452bf09e60SAya Levin 46d1f62050SLeon Romanovsky err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out); 472bf09e60SAya Levin if (err) 482bf09e60SAya Levin return err; 492bf09e60SAya Levin 502bf09e60SAya Levin cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); 512bf09e60SAya Levin hw_status = MLX5_GET(cqc, cqc, status); 522bf09e60SAya Levin 53d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); 542bf09e60SAya Levin if (err) 552bf09e60SAya Levin return err; 562bf09e60SAya Levin 572bf09e60SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn); 582bf09e60SAya Levin if (err) 592bf09e60SAya Levin return err; 602bf09e60SAya Levin 612bf09e60SAya Levin err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status); 622bf09e60SAya Levin if (err) 632bf09e60SAya Levin return err; 642bf09e60SAya Levin 653c9d1699SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq)); 663c9d1699SAya Levin if (err) 673c9d1699SAya Levin return err; 683c9d1699SAya Levin 693c9d1699SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq)); 703c9d1699SAya Levin if (err) 713c9d1699SAya Levin return err; 723c9d1699SAya Levin 73d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 742bf09e60SAya Levin if (err) 752bf09e60SAya Levin return err; 762bf09e60SAya Levin 772bf09e60SAya Levin return 0; 782bf09e60SAya Levin } 792bf09e60SAya Levin 80d5cbedd7SAya Levin int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) 812bf09e60SAya Levin { 822bf09e60SAya Levin u8 cq_log_stride; 832bf09e60SAya Levin u32 cq_sz; 842bf09e60SAya Levin int err; 852bf09e60SAya Levin 862bf09e60SAya Levin cq_sz = mlx5_cqwq_get_size(&cq->wq); 872bf09e60SAya Levin cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); 882bf09e60SAya Levin 89d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); 902bf09e60SAya Levin if (err) 912bf09e60SAya Levin return err; 922bf09e60SAya Levin 932bf09e60SAya Levin err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride)); 942bf09e60SAya Levin if (err) 952bf09e60SAya Levin return err; 962bf09e60SAya Levin 972bf09e60SAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz); 982bf09e60SAya Levin if (err) 992bf09e60SAya Levin return err; 1002bf09e60SAya Levin 101d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 1022bf09e60SAya Levin if (err) 1032bf09e60SAya Levin return err; 1042bf09e60SAya Levin 1052bf09e60SAya Levin return 0; 1062bf09e60SAya Levin } 1072bf09e60SAya Levin 10856837c2aSAya Levin int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg) 10956837c2aSAya Levin { 11056837c2aSAya Levin int err; 11156837c2aSAya Levin 11256837c2aSAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ"); 11356837c2aSAya Levin if (err) 11456837c2aSAya Levin return err; 11556837c2aSAya Levin 11656837c2aSAya Levin err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn); 11756837c2aSAya Levin if (err) 11856837c2aSAya Levin return err; 11956837c2aSAya Levin 12056837c2aSAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn); 12156837c2aSAya Levin if (err) 12256837c2aSAya Levin return err; 12356837c2aSAya Levin 12456837c2aSAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx); 12556837c2aSAya Levin if (err) 12656837c2aSAya Levin return err; 12756837c2aSAya Levin 12856837c2aSAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index); 12956837c2aSAya Levin if (err) 13056837c2aSAya Levin return err; 13156837c2aSAya Levin 13256837c2aSAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "size", eq->core.nent); 13356837c2aSAya Levin if (err) 13456837c2aSAya Levin return err; 13556837c2aSAya Levin 13656837c2aSAya Levin return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 13756837c2aSAya Levin } 13856837c2aSAya Levin 139b3ea4c4fSEran Ben Elisha void mlx5e_health_create_reporters(struct mlx5e_priv *priv) 14011af6a6dSAya Levin { 141b3ea4c4fSEran Ben Elisha mlx5e_reporter_tx_create(priv); 142b3ea4c4fSEran Ben Elisha mlx5e_reporter_rx_create(priv); 14311af6a6dSAya Levin } 14411af6a6dSAya Levin 14511af6a6dSAya Levin void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) 14611af6a6dSAya Levin { 1479032e719SAya Levin mlx5e_reporter_rx_destroy(priv); 14811af6a6dSAya Levin mlx5e_reporter_tx_destroy(priv); 14911af6a6dSAya Levin } 15011af6a6dSAya Levin 15111af6a6dSAya Levin void mlx5e_health_channels_update(struct mlx5e_priv *priv) 15211af6a6dSAya Levin { 15311af6a6dSAya Levin if (priv->tx_reporter) 15411af6a6dSAya Levin devlink_health_reporter_state_update(priv->tx_reporter, 15511af6a6dSAya Levin DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 1569032e719SAya Levin if (priv->rx_reporter) 1579032e719SAya Levin devlink_health_reporter_state_update(priv->rx_reporter, 1589032e719SAya Levin DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 15911af6a6dSAya Levin } 16011af6a6dSAya Levin 161c50de4afSAya Levin int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn) 162c50de4afSAya Levin { 163c50de4afSAya Levin struct mlx5_core_dev *mdev = channel->mdev; 164c50de4afSAya Levin struct net_device *dev = channel->netdev; 165c50de4afSAya Levin struct mlx5e_modify_sq_param msp = {}; 166c50de4afSAya Levin int err; 167c50de4afSAya Levin 168c50de4afSAya Levin msp.curr_state = MLX5_SQC_STATE_ERR; 169c50de4afSAya Levin msp.next_state = MLX5_SQC_STATE_RST; 170c50de4afSAya Levin 171c50de4afSAya Levin err = mlx5e_modify_sq(mdev, sqn, &msp); 172c50de4afSAya Levin if (err) { 173c50de4afSAya Levin netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn); 174c50de4afSAya Levin return err; 175c50de4afSAya Levin } 176c50de4afSAya Levin 177c50de4afSAya Levin memset(&msp, 0, sizeof(msp)); 178c50de4afSAya Levin msp.curr_state = MLX5_SQC_STATE_RST; 179c50de4afSAya Levin msp.next_state = MLX5_SQC_STATE_RDY; 180c50de4afSAya Levin 181c50de4afSAya Levin err = mlx5e_modify_sq(mdev, sqn, &msp); 182c50de4afSAya Levin if (err) { 183c50de4afSAya Levin netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn); 184c50de4afSAya Levin return err; 185c50de4afSAya Levin } 186c50de4afSAya Levin 187c50de4afSAya Levin return 0; 188c50de4afSAya Levin } 189c50de4afSAya Levin 190c50de4afSAya Levin int mlx5e_health_recover_channels(struct mlx5e_priv *priv) 191c50de4afSAya Levin { 192c50de4afSAya Levin int err = 0; 193c50de4afSAya Levin 194c50de4afSAya Levin rtnl_lock(); 195c50de4afSAya Levin mutex_lock(&priv->state_lock); 196c50de4afSAya Levin 197c50de4afSAya Levin if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 198c50de4afSAya Levin goto out; 199c50de4afSAya Levin 200c50de4afSAya Levin err = mlx5e_safe_reopen_channels(priv); 201c50de4afSAya Levin 202c50de4afSAya Levin out: 203c50de4afSAya Levin mutex_unlock(&priv->state_lock); 204c50de4afSAya Levin rtnl_unlock(); 205c50de4afSAya Levin 206c50de4afSAya Levin return err; 207c50de4afSAya Levin } 208c50de4afSAya Levin 209c50de4afSAya Levin int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel) 210c50de4afSAya Levin { 211c50de4afSAya Levin u32 eqe_count; 212c50de4afSAya Levin 213c50de4afSAya Levin netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", 214c50de4afSAya Levin eq->core.eqn, eq->core.cons_index, eq->core.irqn); 215c50de4afSAya Levin 216c50de4afSAya Levin eqe_count = mlx5_eq_poll_irq_disabled(eq); 217c50de4afSAya Levin if (!eqe_count) 218c50de4afSAya Levin return -EIO; 219c50de4afSAya Levin 220c50de4afSAya Levin netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n", 221c50de4afSAya Levin eqe_count, eq->core.eqn); 222c50de4afSAya Levin 223c50de4afSAya Levin channel->stats->eq_rearm++; 224c50de4afSAya Levin return 0; 225c50de4afSAya Levin } 226c50de4afSAya Levin 227c50de4afSAya Levin int mlx5e_health_report(struct mlx5e_priv *priv, 228c50de4afSAya Levin struct devlink_health_reporter *reporter, char *err_str, 229c50de4afSAya Levin struct mlx5e_err_ctx *err_ctx) 230c50de4afSAya Levin { 231b21aef7eSJoe Perches netdev_err(priv->netdev, "%s\n", err_str); 23299cda454SEran Ben Elisha 23399cda454SEran Ben Elisha if (!reporter) 2341ad6c43cSAya Levin return err_ctx->recover(err_ctx->ctx); 23599cda454SEran Ben Elisha 236c50de4afSAya Levin return devlink_health_report(reporter, err_str, err_ctx); 237c50de4afSAya Levin } 2385f29458bSAya Levin 2395f29458bSAya Levin #define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 2405f29458bSAya Levin static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, 2415f29458bSAya Levin const void *value, u32 value_len) 2425f29458bSAya Levin 2435f29458bSAya Levin { 2445f29458bSAya Levin u32 data_size; 24519f5b63bSMoshe Tal int err = 0; 2465f29458bSAya Levin u32 offset; 2475f29458bSAya Levin 2485f29458bSAya Levin for (offset = 0; offset < value_len; offset += data_size) { 2495f29458bSAya Levin data_size = value_len - offset; 2505f29458bSAya Levin if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) 2515f29458bSAya Levin data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; 2525f29458bSAya Levin err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); 2535f29458bSAya Levin if (err) 2545f29458bSAya Levin break; 2555f29458bSAya Levin } 2565f29458bSAya Levin return err; 2575f29458bSAya Levin } 2585f29458bSAya Levin 2595f29458bSAya Levin int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, 2605f29458bSAya Levin struct devlink_fmsg *fmsg) 2615f29458bSAya Levin { 2625f29458bSAya Levin struct mlx5_core_dev *mdev = priv->mdev; 2635f29458bSAya Levin struct mlx5_rsc_dump_cmd *cmd; 2645f29458bSAya Levin struct page *page; 2655f29458bSAya Levin int cmd_err, err; 2665f29458bSAya Levin int end_err; 2675f29458bSAya Levin int size; 2685f29458bSAya Levin 2695f29458bSAya Levin if (IS_ERR_OR_NULL(mdev->rsc_dump)) 2705f29458bSAya Levin return -EOPNOTSUPP; 2715f29458bSAya Levin 2725f29458bSAya Levin page = alloc_page(GFP_KERNEL); 2735f29458bSAya Levin if (!page) 2745f29458bSAya Levin return -ENOMEM; 2755f29458bSAya Levin 2765f29458bSAya Levin err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); 2775f29458bSAya Levin if (err) 2785f29458bSAya Levin return err; 2795f29458bSAya Levin 2805f29458bSAya Levin cmd = mlx5_rsc_dump_cmd_create(mdev, key); 2815f29458bSAya Levin if (IS_ERR(cmd)) { 2825f29458bSAya Levin err = PTR_ERR(cmd); 2835f29458bSAya Levin goto free_page; 2845f29458bSAya Levin } 2855f29458bSAya Levin 2865f29458bSAya Levin do { 2875f29458bSAya Levin cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); 2885f29458bSAya Levin if (cmd_err < 0) { 2895f29458bSAya Levin err = cmd_err; 2905f29458bSAya Levin goto destroy_cmd; 2915f29458bSAya Levin } 2925f29458bSAya Levin 2935f29458bSAya Levin err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size); 2945f29458bSAya Levin if (err) 2955f29458bSAya Levin goto destroy_cmd; 2965f29458bSAya Levin 2975f29458bSAya Levin } while (cmd_err > 0); 2985f29458bSAya Levin 2995f29458bSAya Levin destroy_cmd: 3005f29458bSAya Levin mlx5_rsc_dump_cmd_destroy(cmd); 3015f29458bSAya Levin end_err = devlink_fmsg_binary_pair_nest_end(fmsg); 3025f29458bSAya Levin if (end_err) 3035f29458bSAya Levin err = end_err; 3045f29458bSAya Levin free_page: 3055f29458bSAya Levin __free_page(page); 3065f29458bSAya Levin return err; 3075f29458bSAya Levin } 3085f29458bSAya Levin 3095f29458bSAya Levin int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 3105f29458bSAya Levin int queue_idx, char *lbl) 3115f29458bSAya Levin { 3125f29458bSAya Levin struct mlx5_rsc_key key = {}; 3135f29458bSAya Levin int err; 3145f29458bSAya Levin 3155f29458bSAya Levin key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 3165f29458bSAya Levin key.index1 = queue_idx; 3175f29458bSAya Levin key.size = PAGE_SIZE; 3185f29458bSAya Levin key.num_of_obj1 = 1; 3195f29458bSAya Levin 3205f29458bSAya Levin err = devlink_fmsg_obj_nest_start(fmsg); 3215f29458bSAya Levin if (err) 3225f29458bSAya Levin return err; 3235f29458bSAya Levin 324d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl); 3255f29458bSAya Levin if (err) 3265f29458bSAya Levin return err; 3275f29458bSAya Levin 3285f29458bSAya Levin err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx); 3295f29458bSAya Levin if (err) 3305f29458bSAya Levin return err; 3315f29458bSAya Levin 3325f29458bSAya Levin err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 3335f29458bSAya Levin if (err) 3345f29458bSAya Levin return err; 3355f29458bSAya Levin 336d5cbedd7SAya Levin err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 3375f29458bSAya Levin if (err) 3385f29458bSAya Levin return err; 3395f29458bSAya Levin 3405f29458bSAya Levin return devlink_fmsg_obj_nest_end(fmsg); 3415f29458bSAya Levin } 342