1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2019 Mellanox Technologies. 3 4 #include "health.h" 5 #include "params.h" 6 #include "txrx.h" 7 #include "devlink.h" 8 #include "ptp.h" 9 10 static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) 11 { 12 int outlen = MLX5_ST_SZ_BYTES(query_rq_out); 13 void *out; 14 void *rqc; 15 int err; 16 17 out = kvzalloc(outlen, GFP_KERNEL); 18 if (!out) 19 return -ENOMEM; 20 21 err = mlx5_core_query_rq(dev, rqn, out); 22 if (err) 23 goto out; 24 25 rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); 26 *state = MLX5_GET(rqc, rqc, state); 27 28 out: 29 kvfree(out); 30 return err; 31 } 32 33 static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) 34 { 35 unsigned long exp_time = jiffies + 36 msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); 37 38 while (time_before(jiffies, exp_time)) { 39 if (icosq->cc == icosq->pc) 40 return 0; 41 42 msleep(20); 43 } 44 45 netdev_err(icosq->channel->netdev, 46 "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n", 47 icosq->sqn, icosq->cc, icosq->pc); 48 49 return -ETIMEDOUT; 50 } 51 52 static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) 53 { 54 WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n", 55 icosq->sqn, icosq->cc, icosq->pc); 56 icosq->cc = 0; 57 icosq->pc = 0; 58 } 59 60 static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) 61 { 62 struct mlx5e_rq *xskrq = NULL; 63 struct mlx5_core_dev *mdev; 64 struct mlx5e_icosq *icosq; 65 struct net_device *dev; 66 struct mlx5e_rq *rq; 67 u8 state; 68 int err; 69 70 icosq = ctx; 71 72 mutex_lock(&icosq->channel->icosq_recovery_lock); 73 74 /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ 75 rq = &icosq->channel->rq; 76 if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) 77 xskrq = &icosq->channel->xskrq; 78 mdev = icosq->channel->mdev; 79 dev = icosq->channel->netdev; 80 err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); 81 if (err) { 82 netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n", 83 icosq->sqn, err); 84 goto out; 85 } 86 87 if (state != MLX5_SQC_STATE_ERR) 88 goto out; 89 90 mlx5e_deactivate_rq(rq); 91 if (xskrq) 92 mlx5e_deactivate_rq(xskrq); 93 94 err = mlx5e_wait_for_icosq_flush(icosq); 95 if (err) 96 goto out; 97 98 mlx5e_deactivate_icosq(icosq); 99 100 /* At this point, both the rq and the icosq are disabled */ 101 102 err = mlx5e_health_sq_to_ready(mdev, dev, icosq->sqn); 103 if (err) 104 goto out; 105 106 mlx5e_reset_icosq_cc_pc(icosq); 107 108 mlx5e_free_rx_in_progress_descs(rq); 109 if (xskrq) 110 mlx5e_free_rx_in_progress_descs(xskrq); 111 112 clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); 113 mlx5e_activate_icosq(icosq); 114 115 mlx5e_activate_rq(rq); 116 rq->stats->recover++; 117 118 if (xskrq) { 119 mlx5e_activate_rq(xskrq); 120 xskrq->stats->recover++; 121 } 122 123 mutex_unlock(&icosq->channel->icosq_recovery_lock); 124 125 return 0; 126 out: 127 clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); 128 mutex_unlock(&icosq->channel->icosq_recovery_lock); 129 return err; 130 } 131 132 static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) 133 { 134 struct net_device *dev = rq->netdev; 135 int err; 136 137 err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); 138 if (err) { 139 netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); 140 return err; 141 } 142 err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); 143 if (err) { 144 netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); 145 return err; 146 } 147 148 return 0; 149 } 150 151 static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) 152 { 153 struct mlx5e_rq *rq = ctx; 154 int err; 155 156 mlx5e_deactivate_rq(rq); 157 mlx5e_free_rx_descs(rq); 158 159 err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR); 160 if (err) 161 goto out; 162 163 clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); 164 mlx5e_activate_rq(rq); 165 rq->stats->recover++; 166 return 0; 167 out: 168 clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); 169 return err; 170 } 171 172 static int mlx5e_rx_reporter_timeout_recover(void *ctx) 173 { 174 struct mlx5_eq_comp *eq; 175 struct mlx5e_rq *rq; 176 int err; 177 178 rq = ctx; 179 eq = rq->cq.mcq.eq; 180 181 err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats); 182 if (err && rq->icosq) 183 clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state); 184 185 return err; 186 } 187 188 static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 189 { 190 return err_ctx->recover(err_ctx->ctx); 191 } 192 193 static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, 194 void *context, 195 struct netlink_ext_ack *extack) 196 { 197 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 198 struct mlx5e_err_ctx *err_ctx = context; 199 200 return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) : 201 mlx5e_health_recover_channels(priv); 202 } 203 204 static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, 205 struct devlink_fmsg *fmsg) 206 { 207 int err; 208 209 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); 210 if (err) 211 return err; 212 213 err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn); 214 if (err) 215 return err; 216 217 err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); 218 if (err) 219 return err; 220 221 err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc); 222 if (err) 223 return err; 224 225 err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc); 226 if (err) 227 return err; 228 229 err = devlink_fmsg_u32_pair_put(fmsg, "WQE size", 230 mlx5_wq_cyc_get_size(&icosq->wq)); 231 if (err) 232 return err; 233 234 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); 235 if (err) 236 return err; 237 238 err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn); 239 if (err) 240 return err; 241 242 err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc); 243 if (err) 244 return err; 245 246 err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq)); 247 if (err) 248 return err; 249 250 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 251 if (err) 252 return err; 253 254 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 255 } 256 257 static int 258 mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, 259 struct devlink_fmsg *fmsg) 260 { 261 u16 wqe_counter; 262 int wqes_sz; 263 u8 hw_state; 264 u16 wq_head; 265 int err; 266 267 err = mlx5e_query_rq_state(rq->mdev, rq->rqn, &hw_state); 268 if (err) 269 return err; 270 271 wqes_sz = mlx5e_rqwq_get_cur_sz(rq); 272 wq_head = mlx5e_rqwq_get_head(rq); 273 wqe_counter = mlx5e_rqwq_get_wqe_counter(rq); 274 275 err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn); 276 if (err) 277 return err; 278 279 err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); 280 if (err) 281 return err; 282 283 err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state); 284 if (err) 285 return err; 286 287 err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter); 288 if (err) 289 return err; 290 291 err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz); 292 if (err) 293 return err; 294 295 err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head); 296 if (err) 297 return err; 298 299 err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); 300 if (err) 301 return err; 302 303 err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg); 304 if (err) 305 return err; 306 307 if (rq->icosq) { 308 struct mlx5e_icosq *icosq = rq->icosq; 309 u8 icosq_hw_state; 310 311 err = mlx5_core_query_sq_state(rq->mdev, icosq->sqn, &icosq_hw_state); 312 if (err) 313 return err; 314 315 err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg); 316 if (err) 317 return err; 318 } 319 320 return 0; 321 } 322 323 static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, 324 struct devlink_fmsg *fmsg) 325 { 326 int err; 327 328 err = devlink_fmsg_obj_nest_start(fmsg); 329 if (err) 330 return err; 331 332 err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix); 333 if (err) 334 return err; 335 336 err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); 337 if (err) 338 return err; 339 340 return devlink_fmsg_obj_nest_end(fmsg); 341 } 342 343 static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, 344 struct devlink_fmsg *fmsg) 345 { 346 struct mlx5e_priv *priv = rq->priv; 347 struct mlx5e_params *params; 348 u32 rq_stride, rq_sz; 349 bool real_time; 350 int err; 351 352 params = &priv->channels.params; 353 rq_sz = mlx5e_rqwq_get_size(rq); 354 real_time = mlx5_is_real_time_rq(priv->mdev); 355 rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); 356 357 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); 358 if (err) 359 return err; 360 361 err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); 362 if (err) 363 return err; 364 365 err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride); 366 if (err) 367 return err; 368 369 err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz); 370 if (err) 371 return err; 372 373 err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); 374 if (err) 375 return err; 376 377 err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg); 378 if (err) 379 return err; 380 381 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 382 } 383 384 static int 385 mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch, 386 struct devlink_fmsg *fmsg) 387 { 388 int err; 389 390 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 391 if (err) 392 return err; 393 394 err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter); 395 if (err) 396 return err; 397 398 err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg); 399 if (err) 400 return err; 401 402 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 403 } 404 405 static int 406 mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 407 struct devlink_fmsg *fmsg) 408 { 409 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 410 struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; 411 struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 412 int err; 413 414 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); 415 if (err) 416 return err; 417 418 err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg); 419 if (err) 420 return err; 421 422 if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { 423 err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg); 424 if (err) 425 return err; 426 } 427 428 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 429 } 430 431 static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, 432 struct devlink_fmsg *fmsg) 433 { 434 int err; 435 436 err = devlink_fmsg_obj_nest_start(fmsg); 437 if (err) 438 return err; 439 440 err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 441 if (err) 442 return err; 443 444 err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); 445 if (err) 446 return err; 447 448 err = devlink_fmsg_obj_nest_end(fmsg); 449 if (err) 450 return err; 451 452 return 0; 453 } 454 455 static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, 456 struct devlink_fmsg *fmsg, 457 struct netlink_ext_ack *extack) 458 { 459 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 460 struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 461 int i, err = 0; 462 463 mutex_lock(&priv->state_lock); 464 465 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 466 goto unlock; 467 468 err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); 469 if (err) 470 goto unlock; 471 472 err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); 473 if (err) 474 goto unlock; 475 476 for (i = 0; i < priv->channels.num; i++) { 477 struct mlx5e_rq *rq = &priv->channels.c[i]->rq; 478 479 err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); 480 if (err) 481 goto unlock; 482 } 483 if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { 484 err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); 485 if (err) 486 goto unlock; 487 } 488 err = devlink_fmsg_arr_pair_nest_end(fmsg); 489 unlock: 490 mutex_unlock(&priv->state_lock); 491 return err; 492 } 493 494 static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 495 void *ctx) 496 { 497 struct mlx5e_txqsq *icosq = ctx; 498 struct mlx5_rsc_key key = {}; 499 int err; 500 501 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 502 return 0; 503 504 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 505 if (err) 506 return err; 507 508 key.size = PAGE_SIZE; 509 key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 510 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 511 if (err) 512 return err; 513 514 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 515 if (err) 516 return err; 517 518 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); 519 if (err) 520 return err; 521 522 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 523 if (err) 524 return err; 525 526 key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 527 key.index1 = icosq->sqn; 528 key.num_of_obj1 = 1; 529 530 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 531 if (err) 532 return err; 533 534 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 535 if (err) 536 return err; 537 538 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 539 if (err) 540 return err; 541 542 key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 543 key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 544 545 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 546 if (err) 547 return err; 548 549 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 550 if (err) 551 return err; 552 553 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 554 } 555 556 static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 557 void *ctx) 558 { 559 struct mlx5_rsc_key key = {}; 560 struct mlx5e_rq *rq = ctx; 561 int err; 562 563 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 564 return 0; 565 566 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); 567 if (err) 568 return err; 569 570 key.size = PAGE_SIZE; 571 key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; 572 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 573 if (err) 574 return err; 575 576 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 577 if (err) 578 return err; 579 580 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); 581 if (err) 582 return err; 583 584 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 585 if (err) 586 return err; 587 588 key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 589 key.index1 = rq->rqn; 590 key.num_of_obj1 = 1; 591 592 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 593 if (err) 594 return err; 595 596 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 597 if (err) 598 return err; 599 600 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff"); 601 if (err) 602 return err; 603 604 key.rsc = MLX5_SGMT_TYPE_RCV_BUFF; 605 key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 606 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 607 if (err) 608 return err; 609 610 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 611 if (err) 612 return err; 613 614 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 615 } 616 617 static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, 618 struct devlink_fmsg *fmsg) 619 { 620 struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 621 struct mlx5_rsc_key key = {}; 622 int i, err; 623 624 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 625 return 0; 626 627 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); 628 if (err) 629 return err; 630 631 key.size = PAGE_SIZE; 632 key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; 633 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 634 if (err) 635 return err; 636 637 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 638 if (err) 639 return err; 640 641 err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); 642 if (err) 643 return err; 644 645 for (i = 0; i < priv->channels.num; i++) { 646 struct mlx5e_rq *rq = &priv->channels.c[i]->rq; 647 648 err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ"); 649 if (err) 650 return err; 651 } 652 653 if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { 654 err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ"); 655 if (err) 656 return err; 657 } 658 659 return devlink_fmsg_arr_pair_nest_end(fmsg); 660 } 661 662 static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 663 struct mlx5e_err_ctx *err_ctx, 664 struct devlink_fmsg *fmsg) 665 { 666 return err_ctx->dump(priv, fmsg, err_ctx->ctx); 667 } 668 669 static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, 670 struct devlink_fmsg *fmsg, void *context, 671 struct netlink_ext_ack *extack) 672 { 673 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 674 struct mlx5e_err_ctx *err_ctx = context; 675 676 return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 677 mlx5e_rx_reporter_dump_all_rqs(priv, fmsg); 678 } 679 680 void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) 681 { 682 char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {}; 683 char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 684 struct mlx5e_icosq *icosq = rq->icosq; 685 struct mlx5e_priv *priv = rq->priv; 686 struct mlx5e_err_ctx err_ctx = {}; 687 688 err_ctx.ctx = rq; 689 err_ctx.recover = mlx5e_rx_reporter_timeout_recover; 690 err_ctx.dump = mlx5e_rx_reporter_dump_rq; 691 692 if (icosq) 693 snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn); 694 snprintf(err_str, sizeof(err_str), 695 "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x", 696 rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); 697 698 mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); 699 } 700 701 void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) 702 { 703 char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 704 struct mlx5e_priv *priv = rq->priv; 705 struct mlx5e_err_ctx err_ctx = {}; 706 707 err_ctx.ctx = rq; 708 err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; 709 err_ctx.dump = mlx5e_rx_reporter_dump_rq; 710 snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn); 711 712 mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); 713 } 714 715 void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) 716 { 717 struct mlx5e_priv *priv = icosq->channel->priv; 718 char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 719 struct mlx5e_err_ctx err_ctx = {}; 720 721 err_ctx.ctx = icosq; 722 err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; 723 err_ctx.dump = mlx5e_rx_reporter_dump_icosq; 724 snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn); 725 726 mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); 727 } 728 729 void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) 730 { 731 mutex_lock(&c->icosq_recovery_lock); 732 } 733 734 void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) 735 { 736 mutex_unlock(&c->icosq_recovery_lock); 737 } 738 739 static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { 740 .name = "rx", 741 .recover = mlx5e_rx_reporter_recover, 742 .diagnose = mlx5e_rx_reporter_diagnose, 743 .dump = mlx5e_rx_reporter_dump, 744 }; 745 746 #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 747 748 void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) 749 { 750 struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); 751 struct devlink_health_reporter *reporter; 752 753 reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops, 754 MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); 755 if (IS_ERR(reporter)) { 756 netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", 757 PTR_ERR(reporter)); 758 return; 759 } 760 priv->rx_reporter = reporter; 761 } 762 763 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) 764 { 765 if (!priv->rx_reporter) 766 return; 767 768 devlink_port_health_reporter_destroy(priv->rx_reporter); 769 priv->rx_reporter = NULL; 770 } 771