1 // SPDX-License-Identifier: GPL-2.0+ 2 /* Copyright (c) 2016-2017 Hisilicon Limited. */ 3 4 #include "hclge_err.h" 5 6 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { 7 { 8 .int_msk = BIT(1), 9 .msg = "imp_itcm0_ecc_mbit_err", 10 .reset_level = HNAE3_NONE_RESET 11 }, { 12 .int_msk = BIT(3), 13 .msg = "imp_itcm1_ecc_mbit_err", 14 .reset_level = HNAE3_NONE_RESET 15 }, { 16 .int_msk = BIT(5), 17 .msg = "imp_itcm2_ecc_mbit_err", 18 .reset_level = HNAE3_NONE_RESET 19 }, { 20 .int_msk = BIT(7), 21 .msg = "imp_itcm3_ecc_mbit_err", 22 .reset_level = HNAE3_NONE_RESET 23 }, { 24 .int_msk = BIT(9), 25 .msg = "imp_dtcm0_mem0_ecc_mbit_err", 26 .reset_level = HNAE3_NONE_RESET 27 }, { 28 .int_msk = BIT(11), 29 .msg = "imp_dtcm0_mem1_ecc_mbit_err", 30 .reset_level = HNAE3_NONE_RESET 31 }, { 32 .int_msk = BIT(13), 33 .msg = "imp_dtcm1_mem0_ecc_mbit_err", 34 .reset_level = HNAE3_NONE_RESET 35 }, { 36 .int_msk = BIT(15), 37 .msg = "imp_dtcm1_mem1_ecc_mbit_err", 38 .reset_level = HNAE3_NONE_RESET 39 }, { 40 .int_msk = BIT(17), 41 .msg = "imp_itcm4_ecc_mbit_err", 42 .reset_level = HNAE3_NONE_RESET 43 }, { 44 /* sentinel */ 45 } 46 }; 47 48 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { 49 { 50 .int_msk = BIT(1), 51 .msg = "cmdq_nic_rx_depth_ecc_mbit_err", 52 .reset_level = HNAE3_NONE_RESET 53 }, { 54 .int_msk = BIT(3), 55 .msg = "cmdq_nic_tx_depth_ecc_mbit_err", 56 .reset_level = HNAE3_NONE_RESET 57 }, { 58 .int_msk = BIT(5), 59 .msg = "cmdq_nic_rx_tail_ecc_mbit_err", 60 .reset_level = HNAE3_NONE_RESET 61 }, { 62 .int_msk = BIT(7), 63 .msg = "cmdq_nic_tx_tail_ecc_mbit_err", 64 .reset_level = HNAE3_NONE_RESET 65 }, { 66 .int_msk = BIT(9), 67 .msg = "cmdq_nic_rx_head_ecc_mbit_err", 68 .reset_level = HNAE3_NONE_RESET 69 }, { 70 .int_msk = BIT(11), 71 .msg = "cmdq_nic_tx_head_ecc_mbit_err", 72 .reset_level = HNAE3_NONE_RESET 73 }, { 74 .int_msk = BIT(13), 75 .msg = "cmdq_nic_rx_addr_ecc_mbit_err", 76 .reset_level = HNAE3_NONE_RESET 77 }, { 78 .int_msk = BIT(15), 79 .msg = "cmdq_nic_tx_addr_ecc_mbit_err", 80 .reset_level = HNAE3_NONE_RESET 81 }, { 82 .int_msk = BIT(17), 83 .msg = "cmdq_rocee_rx_depth_ecc_mbit_err", 84 .reset_level = HNAE3_NONE_RESET 85 }, { 86 .int_msk = BIT(19), 87 .msg = "cmdq_rocee_tx_depth_ecc_mbit_err", 88 .reset_level = HNAE3_NONE_RESET 89 }, { 90 .int_msk = BIT(21), 91 .msg = "cmdq_rocee_rx_tail_ecc_mbit_err", 92 .reset_level = HNAE3_NONE_RESET 93 }, { 94 .int_msk = BIT(23), 95 .msg = "cmdq_rocee_tx_tail_ecc_mbit_err", 96 .reset_level = HNAE3_NONE_RESET 97 }, { 98 .int_msk = BIT(25), 99 .msg = "cmdq_rocee_rx_head_ecc_mbit_err", 100 .reset_level = HNAE3_NONE_RESET 101 }, { 102 .int_msk = BIT(27), 103 .msg = "cmdq_rocee_tx_head_ecc_mbit_err", 104 .reset_level = HNAE3_NONE_RESET 105 }, { 106 .int_msk = BIT(29), 107 .msg = "cmdq_rocee_rx_addr_ecc_mbit_err", 108 .reset_level = HNAE3_NONE_RESET 109 }, { 110 .int_msk = BIT(31), 111 .msg = "cmdq_rocee_tx_addr_ecc_mbit_err", 112 .reset_level = HNAE3_NONE_RESET 113 }, { 114 /* sentinel */ 115 } 116 }; 117 118 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { 119 { 120 .int_msk = BIT(6), 121 .msg = "tqp_int_cfg_even_ecc_mbit_err", 122 .reset_level = HNAE3_NONE_RESET 123 }, { 124 .int_msk = BIT(7), 125 .msg = "tqp_int_cfg_odd_ecc_mbit_err", 126 .reset_level = HNAE3_NONE_RESET 127 }, { 128 .int_msk = BIT(8), 129 .msg = "tqp_int_ctrl_even_ecc_mbit_err", 130 .reset_level = HNAE3_NONE_RESET 131 }, { 132 .int_msk = BIT(9), 133 .msg = "tqp_int_ctrl_odd_ecc_mbit_err", 134 .reset_level = HNAE3_NONE_RESET 135 }, { 136 .int_msk = BIT(10), 137 .msg = "tx_que_scan_int_ecc_mbit_err", 138 .reset_level = HNAE3_NONE_RESET 139 }, { 140 .int_msk = BIT(11), 141 .msg = "rx_que_scan_int_ecc_mbit_err", 142 .reset_level = HNAE3_NONE_RESET 143 }, { 144 /* sentinel */ 145 } 146 }; 147 148 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = { 149 { 150 .int_msk = BIT(1), 151 .msg = "msix_nic_ecc_mbit_err", 152 .reset_level = HNAE3_NONE_RESET 153 }, { 154 .int_msk = BIT(3), 155 .msg = "msix_rocee_ecc_mbit_err", 156 .reset_level = HNAE3_NONE_RESET 157 }, { 158 /* sentinel */ 159 } 160 }; 161 162 static const struct hclge_hw_error hclge_igu_int[] = { 163 { 164 .int_msk = BIT(0), 165 .msg = "igu_rx_buf0_ecc_mbit_err", 166 .reset_level = HNAE3_GLOBAL_RESET 167 }, { 168 .int_msk = BIT(2), 169 .msg = "igu_rx_buf1_ecc_mbit_err", 170 .reset_level = HNAE3_GLOBAL_RESET 171 }, { 172 /* sentinel */ 173 } 174 }; 175 176 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { 177 { 178 .int_msk = BIT(0), 179 .msg = "rx_buf_overflow", 180 .reset_level = HNAE3_GLOBAL_RESET 181 }, { 182 .int_msk = BIT(1), 183 .msg = "rx_stp_fifo_overflow", 184 .reset_level = HNAE3_GLOBAL_RESET 185 }, { 186 .int_msk = BIT(2), 187 .msg = "rx_stp_fifo_underflow", 188 .reset_level = HNAE3_GLOBAL_RESET 189 }, { 190 .int_msk = BIT(3), 191 .msg = "tx_buf_overflow", 192 .reset_level = HNAE3_GLOBAL_RESET 193 }, { 194 .int_msk = BIT(4), 195 .msg = "tx_buf_underrun", 196 .reset_level = HNAE3_GLOBAL_RESET 197 }, { 198 .int_msk = BIT(5), 199 .msg = "rx_stp_buf_overflow", 200 .reset_level = HNAE3_GLOBAL_RESET 201 }, { 202 /* sentinel */ 203 } 204 }; 205 206 static const struct hclge_hw_error hclge_ncsi_err_int[] = { 207 { 208 .int_msk = BIT(1), 209 .msg = "ncsi_tx_ecc_mbit_err", 210 .reset_level = HNAE3_NONE_RESET 211 }, { 212 /* sentinel */ 213 } 214 }; 215 216 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = { 217 { 218 .int_msk = BIT(0), 219 .msg = "vf_vlan_ad_mem_ecc_mbit_err", 220 .reset_level = HNAE3_GLOBAL_RESET 221 }, { 222 .int_msk = BIT(1), 223 .msg = "umv_mcast_group_mem_ecc_mbit_err", 224 .reset_level = HNAE3_GLOBAL_RESET 225 }, { 226 .int_msk = BIT(2), 227 .msg = "umv_key_mem0_ecc_mbit_err", 228 .reset_level = HNAE3_GLOBAL_RESET 229 }, { 230 .int_msk = BIT(3), 231 .msg = "umv_key_mem1_ecc_mbit_err", 232 .reset_level = HNAE3_GLOBAL_RESET 233 }, { 234 .int_msk = BIT(4), 235 .msg = "umv_key_mem2_ecc_mbit_err", 236 .reset_level = HNAE3_GLOBAL_RESET 237 }, { 238 .int_msk = BIT(5), 239 .msg = "umv_key_mem3_ecc_mbit_err", 240 .reset_level = HNAE3_GLOBAL_RESET 241 }, { 242 .int_msk = BIT(6), 243 .msg = "umv_ad_mem_ecc_mbit_err", 244 .reset_level = HNAE3_GLOBAL_RESET 245 }, { 246 .int_msk = BIT(7), 247 .msg = "rss_tc_mode_mem_ecc_mbit_err", 248 .reset_level = HNAE3_GLOBAL_RESET 249 }, { 250 .int_msk = BIT(8), 251 .msg = "rss_idt_mem0_ecc_mbit_err", 252 .reset_level = HNAE3_GLOBAL_RESET 253 }, { 254 .int_msk = BIT(9), 255 .msg = "rss_idt_mem1_ecc_mbit_err", 256 .reset_level = HNAE3_GLOBAL_RESET 257 }, { 258 .int_msk = BIT(10), 259 .msg = "rss_idt_mem2_ecc_mbit_err", 260 .reset_level = HNAE3_GLOBAL_RESET 261 }, { 262 .int_msk = BIT(11), 263 .msg = "rss_idt_mem3_ecc_mbit_err", 264 .reset_level = HNAE3_GLOBAL_RESET 265 }, { 266 .int_msk = BIT(12), 267 .msg = "rss_idt_mem4_ecc_mbit_err", 268 .reset_level = HNAE3_GLOBAL_RESET 269 }, { 270 .int_msk = BIT(13), 271 .msg = "rss_idt_mem5_ecc_mbit_err", 272 .reset_level = HNAE3_GLOBAL_RESET 273 }, { 274 .int_msk = BIT(14), 275 .msg = "rss_idt_mem6_ecc_mbit_err", 276 .reset_level = HNAE3_GLOBAL_RESET 277 }, { 278 .int_msk = BIT(15), 279 .msg = "rss_idt_mem7_ecc_mbit_err", 280 .reset_level = HNAE3_GLOBAL_RESET 281 }, { 282 .int_msk = BIT(16), 283 .msg = "rss_idt_mem8_ecc_mbit_err", 284 .reset_level = HNAE3_GLOBAL_RESET 285 }, { 286 .int_msk = BIT(17), 287 .msg = "rss_idt_mem9_ecc_mbit_err", 288 .reset_level = HNAE3_GLOBAL_RESET 289 }, { 290 .int_msk = BIT(18), 291 .msg = "rss_idt_mem10_ecc_mbit_err", 292 .reset_level = HNAE3_GLOBAL_RESET 293 }, { 294 .int_msk = BIT(19), 295 .msg = "rss_idt_mem11_ecc_mbit_err", 296 .reset_level = HNAE3_GLOBAL_RESET 297 }, { 298 .int_msk = BIT(20), 299 .msg = "rss_idt_mem12_ecc_mbit_err", 300 .reset_level = HNAE3_GLOBAL_RESET 301 }, { 302 .int_msk = BIT(21), 303 .msg = "rss_idt_mem13_ecc_mbit_err", 304 .reset_level = HNAE3_GLOBAL_RESET 305 }, { 306 .int_msk = BIT(22), 307 .msg = "rss_idt_mem14_ecc_mbit_err", 308 .reset_level = HNAE3_GLOBAL_RESET 309 }, { 310 .int_msk = BIT(23), 311 .msg = "rss_idt_mem15_ecc_mbit_err", 312 .reset_level = HNAE3_GLOBAL_RESET 313 }, { 314 .int_msk = BIT(24), 315 .msg = "port_vlan_mem_ecc_mbit_err", 316 .reset_level = HNAE3_GLOBAL_RESET 317 }, { 318 .int_msk = BIT(25), 319 .msg = "mcast_linear_table_mem_ecc_mbit_err", 320 .reset_level = HNAE3_GLOBAL_RESET 321 }, { 322 .int_msk = BIT(26), 323 .msg = "mcast_result_mem_ecc_mbit_err", 324 .reset_level = HNAE3_GLOBAL_RESET 325 }, { 326 .int_msk = BIT(27), 327 .msg = "flow_director_ad_mem0_ecc_mbit_err", 328 .reset_level = HNAE3_GLOBAL_RESET 329 }, { 330 .int_msk = BIT(28), 331 .msg = "flow_director_ad_mem1_ecc_mbit_err", 332 .reset_level = HNAE3_GLOBAL_RESET 333 }, { 334 .int_msk = BIT(29), 335 .msg = "rx_vlan_tag_memory_ecc_mbit_err", 336 .reset_level = HNAE3_GLOBAL_RESET 337 }, { 338 .int_msk = BIT(30), 339 .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err", 340 .reset_level = HNAE3_GLOBAL_RESET 341 }, { 342 /* sentinel */ 343 } 344 }; 345 346 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = { 347 { 348 .int_msk = BIT(0), 349 .msg = "tx_vlan_tag_err", 350 .reset_level = HNAE3_NONE_RESET 351 }, { 352 .int_msk = BIT(1), 353 .msg = "rss_list_tc_unassigned_queue_err", 354 .reset_level = HNAE3_NONE_RESET 355 }, { 356 /* sentinel */ 357 } 358 }; 359 360 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = { 361 { 362 .int_msk = BIT(0), 363 .msg = "hfs_fifo_mem_ecc_mbit_err", 364 .reset_level = HNAE3_GLOBAL_RESET 365 }, { 366 .int_msk = BIT(1), 367 .msg = "rslt_descr_fifo_mem_ecc_mbit_err", 368 .reset_level = HNAE3_GLOBAL_RESET 369 }, { 370 .int_msk = BIT(2), 371 .msg = "tx_vlan_tag_mem_ecc_mbit_err", 372 .reset_level = HNAE3_GLOBAL_RESET 373 }, { 374 .int_msk = BIT(3), 375 .msg = "FD_CN0_memory_ecc_mbit_err", 376 .reset_level = HNAE3_GLOBAL_RESET 377 }, { 378 .int_msk = BIT(4), 379 .msg = "FD_CN1_memory_ecc_mbit_err", 380 .reset_level = HNAE3_GLOBAL_RESET 381 }, { 382 .int_msk = BIT(5), 383 .msg = "GRO_AD_memory_ecc_mbit_err", 384 .reset_level = HNAE3_GLOBAL_RESET 385 }, { 386 /* sentinel */ 387 } 388 }; 389 390 static const struct hclge_hw_error hclge_tm_sch_rint[] = { 391 { 392 .int_msk = BIT(1), 393 .msg = "tm_sch_ecc_mbit_err", 394 .reset_level = HNAE3_GLOBAL_RESET 395 }, { 396 .int_msk = BIT(2), 397 .msg = "tm_sch_port_shap_sub_fifo_wr_err", 398 .reset_level = HNAE3_GLOBAL_RESET 399 }, { 400 .int_msk = BIT(3), 401 .msg = "tm_sch_port_shap_sub_fifo_rd_err", 402 .reset_level = HNAE3_GLOBAL_RESET 403 }, { 404 .int_msk = BIT(4), 405 .msg = "tm_sch_pg_pshap_sub_fifo_wr_err", 406 .reset_level = HNAE3_GLOBAL_RESET 407 }, { 408 .int_msk = BIT(5), 409 .msg = "tm_sch_pg_pshap_sub_fifo_rd_err", 410 .reset_level = HNAE3_GLOBAL_RESET 411 }, { 412 .int_msk = BIT(6), 413 .msg = "tm_sch_pg_cshap_sub_fifo_wr_err", 414 .reset_level = HNAE3_GLOBAL_RESET 415 }, { 416 .int_msk = BIT(7), 417 .msg = "tm_sch_pg_cshap_sub_fifo_rd_err", 418 .reset_level = HNAE3_GLOBAL_RESET 419 }, { 420 .int_msk = BIT(8), 421 .msg = "tm_sch_pri_pshap_sub_fifo_wr_err", 422 .reset_level = HNAE3_GLOBAL_RESET 423 }, { 424 .int_msk = BIT(9), 425 .msg = "tm_sch_pri_pshap_sub_fifo_rd_err", 426 .reset_level = HNAE3_GLOBAL_RESET 427 }, { 428 .int_msk = BIT(10), 429 .msg = "tm_sch_pri_cshap_sub_fifo_wr_err", 430 .reset_level = HNAE3_GLOBAL_RESET 431 }, { 432 .int_msk = BIT(11), 433 .msg = "tm_sch_pri_cshap_sub_fifo_rd_err", 434 .reset_level = HNAE3_GLOBAL_RESET 435 }, { 436 .int_msk = BIT(12), 437 .msg = "tm_sch_port_shap_offset_fifo_wr_err", 438 .reset_level = HNAE3_GLOBAL_RESET 439 }, { 440 .int_msk = BIT(13), 441 .msg = "tm_sch_port_shap_offset_fifo_rd_err", 442 .reset_level = HNAE3_GLOBAL_RESET 443 }, { 444 .int_msk = BIT(14), 445 .msg = "tm_sch_pg_pshap_offset_fifo_wr_err", 446 .reset_level = HNAE3_GLOBAL_RESET 447 }, { 448 .int_msk = BIT(15), 449 .msg = "tm_sch_pg_pshap_offset_fifo_rd_err", 450 .reset_level = HNAE3_GLOBAL_RESET 451 }, { 452 .int_msk = BIT(16), 453 .msg = "tm_sch_pg_cshap_offset_fifo_wr_err", 454 .reset_level = HNAE3_GLOBAL_RESET 455 }, { 456 .int_msk = BIT(17), 457 .msg = "tm_sch_pg_cshap_offset_fifo_rd_err", 458 .reset_level = HNAE3_GLOBAL_RESET 459 }, { 460 .int_msk = BIT(18), 461 .msg = "tm_sch_pri_pshap_offset_fifo_wr_err", 462 .reset_level = HNAE3_GLOBAL_RESET 463 }, { 464 .int_msk = BIT(19), 465 .msg = "tm_sch_pri_pshap_offset_fifo_rd_err", 466 .reset_level = HNAE3_GLOBAL_RESET 467 }, { 468 .int_msk = BIT(20), 469 .msg = "tm_sch_pri_cshap_offset_fifo_wr_err", 470 .reset_level = HNAE3_GLOBAL_RESET 471 }, { 472 .int_msk = BIT(21), 473 .msg = "tm_sch_pri_cshap_offset_fifo_rd_err", 474 .reset_level = HNAE3_GLOBAL_RESET 475 }, { 476 .int_msk = BIT(22), 477 .msg = "tm_sch_rq_fifo_wr_err", 478 .reset_level = HNAE3_GLOBAL_RESET 479 }, { 480 .int_msk = BIT(23), 481 .msg = "tm_sch_rq_fifo_rd_err", 482 .reset_level = HNAE3_GLOBAL_RESET 483 }, { 484 .int_msk = BIT(24), 485 .msg = "tm_sch_nq_fifo_wr_err", 486 .reset_level = HNAE3_GLOBAL_RESET 487 }, { 488 .int_msk = BIT(25), 489 .msg = "tm_sch_nq_fifo_rd_err", 490 .reset_level = HNAE3_GLOBAL_RESET 491 }, { 492 .int_msk = BIT(26), 493 .msg = "tm_sch_roce_up_fifo_wr_err", 494 .reset_level = HNAE3_GLOBAL_RESET 495 }, { 496 .int_msk = BIT(27), 497 .msg = "tm_sch_roce_up_fifo_rd_err", 498 .reset_level = HNAE3_GLOBAL_RESET 499 }, { 500 .int_msk = BIT(28), 501 .msg = "tm_sch_rcb_byte_fifo_wr_err", 502 .reset_level = HNAE3_GLOBAL_RESET 503 }, { 504 .int_msk = BIT(29), 505 .msg = "tm_sch_rcb_byte_fifo_rd_err", 506 .reset_level = HNAE3_GLOBAL_RESET 507 }, { 508 .int_msk = BIT(30), 509 .msg = "tm_sch_ssu_byte_fifo_wr_err", 510 .reset_level = HNAE3_GLOBAL_RESET 511 }, { 512 .int_msk = BIT(31), 513 .msg = "tm_sch_ssu_byte_fifo_rd_err", 514 .reset_level = HNAE3_GLOBAL_RESET 515 }, { 516 /* sentinel */ 517 } 518 }; 519 520 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = { 521 { 522 .int_msk = BIT(0), 523 .msg = "qcn_shap_gp0_sch_fifo_rd_err", 524 .reset_level = HNAE3_GLOBAL_RESET 525 }, { 526 .int_msk = BIT(1), 527 .msg = "qcn_shap_gp0_sch_fifo_wr_err", 528 .reset_level = HNAE3_GLOBAL_RESET 529 }, { 530 .int_msk = BIT(2), 531 .msg = "qcn_shap_gp1_sch_fifo_rd_err", 532 .reset_level = HNAE3_GLOBAL_RESET 533 }, { 534 .int_msk = BIT(3), 535 .msg = "qcn_shap_gp1_sch_fifo_wr_err", 536 .reset_level = HNAE3_GLOBAL_RESET 537 }, { 538 .int_msk = BIT(4), 539 .msg = "qcn_shap_gp2_sch_fifo_rd_err", 540 .reset_level = HNAE3_GLOBAL_RESET 541 }, { 542 .int_msk = BIT(5), 543 .msg = "qcn_shap_gp2_sch_fifo_wr_err", 544 .reset_level = HNAE3_GLOBAL_RESET 545 }, { 546 .int_msk = BIT(6), 547 .msg = "qcn_shap_gp3_sch_fifo_rd_err", 548 .reset_level = HNAE3_GLOBAL_RESET 549 }, { 550 .int_msk = BIT(7), 551 .msg = "qcn_shap_gp3_sch_fifo_wr_err", 552 .reset_level = HNAE3_GLOBAL_RESET 553 }, { 554 .int_msk = BIT(8), 555 .msg = "qcn_shap_gp0_offset_fifo_rd_err", 556 .reset_level = HNAE3_GLOBAL_RESET 557 }, { 558 .int_msk = BIT(9), 559 .msg = "qcn_shap_gp0_offset_fifo_wr_err", 560 .reset_level = HNAE3_GLOBAL_RESET 561 }, { 562 .int_msk = BIT(10), 563 .msg = "qcn_shap_gp1_offset_fifo_rd_err", 564 .reset_level = HNAE3_GLOBAL_RESET 565 }, { 566 .int_msk = BIT(11), 567 .msg = "qcn_shap_gp1_offset_fifo_wr_err", 568 .reset_level = HNAE3_GLOBAL_RESET 569 }, { 570 .int_msk = BIT(12), 571 .msg = "qcn_shap_gp2_offset_fifo_rd_err", 572 .reset_level = HNAE3_GLOBAL_RESET 573 }, { 574 .int_msk = BIT(13), 575 .msg = "qcn_shap_gp2_offset_fifo_wr_err", 576 .reset_level = HNAE3_GLOBAL_RESET 577 }, { 578 .int_msk = BIT(14), 579 .msg = "qcn_shap_gp3_offset_fifo_rd_err", 580 .reset_level = HNAE3_GLOBAL_RESET 581 }, { 582 .int_msk = BIT(15), 583 .msg = "qcn_shap_gp3_offset_fifo_wr_err", 584 .reset_level = HNAE3_GLOBAL_RESET 585 }, { 586 .int_msk = BIT(16), 587 .msg = "qcn_byte_info_fifo_rd_err", 588 .reset_level = HNAE3_GLOBAL_RESET 589 }, { 590 .int_msk = BIT(17), 591 .msg = "qcn_byte_info_fifo_wr_err", 592 .reset_level = HNAE3_GLOBAL_RESET 593 }, { 594 /* sentinel */ 595 } 596 }; 597 598 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = { 599 { 600 .int_msk = BIT(1), 601 .msg = "qcn_byte_mem_ecc_mbit_err", 602 .reset_level = HNAE3_GLOBAL_RESET 603 }, { 604 .int_msk = BIT(3), 605 .msg = "qcn_time_mem_ecc_mbit_err", 606 .reset_level = HNAE3_GLOBAL_RESET 607 }, { 608 .int_msk = BIT(5), 609 .msg = "qcn_fb_mem_ecc_mbit_err", 610 .reset_level = HNAE3_GLOBAL_RESET 611 }, { 612 .int_msk = BIT(7), 613 .msg = "qcn_link_mem_ecc_mbit_err", 614 .reset_level = HNAE3_GLOBAL_RESET 615 }, { 616 .int_msk = BIT(9), 617 .msg = "qcn_rate_mem_ecc_mbit_err", 618 .reset_level = HNAE3_GLOBAL_RESET 619 }, { 620 .int_msk = BIT(11), 621 .msg = "qcn_tmplt_mem_ecc_mbit_err", 622 .reset_level = HNAE3_GLOBAL_RESET 623 }, { 624 .int_msk = BIT(13), 625 .msg = "qcn_shap_cfg_mem_ecc_mbit_err", 626 .reset_level = HNAE3_GLOBAL_RESET 627 }, { 628 .int_msk = BIT(15), 629 .msg = "qcn_gp0_barrel_mem_ecc_mbit_err", 630 .reset_level = HNAE3_GLOBAL_RESET 631 }, { 632 .int_msk = BIT(17), 633 .msg = "qcn_gp1_barrel_mem_ecc_mbit_err", 634 .reset_level = HNAE3_GLOBAL_RESET 635 }, { 636 .int_msk = BIT(19), 637 .msg = "qcn_gp2_barrel_mem_ecc_mbit_err", 638 .reset_level = HNAE3_GLOBAL_RESET 639 }, { 640 .int_msk = BIT(21), 641 .msg = "qcn_gp3_barral_mem_ecc_mbit_err", 642 .reset_level = HNAE3_GLOBAL_RESET 643 }, { 644 /* sentinel */ 645 } 646 }; 647 648 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = { 649 { 650 .int_msk = BIT(0), 651 .msg = "egu_cge_afifo_ecc_1bit_err", 652 .reset_level = HNAE3_NONE_RESET 653 }, { 654 .int_msk = BIT(1), 655 .msg = "egu_cge_afifo_ecc_mbit_err", 656 .reset_level = HNAE3_GLOBAL_RESET 657 }, { 658 .int_msk = BIT(2), 659 .msg = "egu_lge_afifo_ecc_1bit_err", 660 .reset_level = HNAE3_NONE_RESET 661 }, { 662 .int_msk = BIT(3), 663 .msg = "egu_lge_afifo_ecc_mbit_err", 664 .reset_level = HNAE3_GLOBAL_RESET 665 }, { 666 .int_msk = BIT(4), 667 .msg = "cge_igu_afifo_ecc_1bit_err", 668 .reset_level = HNAE3_NONE_RESET 669 }, { 670 .int_msk = BIT(5), 671 .msg = "cge_igu_afifo_ecc_mbit_err", 672 .reset_level = HNAE3_GLOBAL_RESET 673 }, { 674 .int_msk = BIT(6), 675 .msg = "lge_igu_afifo_ecc_1bit_err", 676 .reset_level = HNAE3_NONE_RESET 677 }, { 678 .int_msk = BIT(7), 679 .msg = "lge_igu_afifo_ecc_mbit_err", 680 .reset_level = HNAE3_GLOBAL_RESET 681 }, { 682 .int_msk = BIT(8), 683 .msg = "cge_igu_afifo_overflow_err", 684 .reset_level = HNAE3_GLOBAL_RESET 685 }, { 686 .int_msk = BIT(9), 687 .msg = "lge_igu_afifo_overflow_err", 688 .reset_level = HNAE3_GLOBAL_RESET 689 }, { 690 .int_msk = BIT(10), 691 .msg = "egu_cge_afifo_underrun_err", 692 .reset_level = HNAE3_GLOBAL_RESET 693 }, { 694 .int_msk = BIT(11), 695 .msg = "egu_lge_afifo_underrun_err", 696 .reset_level = HNAE3_GLOBAL_RESET 697 }, { 698 .int_msk = BIT(12), 699 .msg = "egu_ge_afifo_underrun_err", 700 .reset_level = HNAE3_GLOBAL_RESET 701 }, { 702 .int_msk = BIT(13), 703 .msg = "ge_igu_afifo_overflow_err", 704 .reset_level = HNAE3_GLOBAL_RESET 705 }, { 706 /* sentinel */ 707 } 708 }; 709 710 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = { 711 { 712 .int_msk = BIT(13), 713 .msg = "rpu_rx_pkt_bit32_ecc_mbit_err", 714 .reset_level = HNAE3_GLOBAL_RESET 715 }, { 716 .int_msk = BIT(14), 717 .msg = "rpu_rx_pkt_bit33_ecc_mbit_err", 718 .reset_level = HNAE3_GLOBAL_RESET 719 }, { 720 .int_msk = BIT(15), 721 .msg = "rpu_rx_pkt_bit34_ecc_mbit_err", 722 .reset_level = HNAE3_GLOBAL_RESET 723 }, { 724 .int_msk = BIT(16), 725 .msg = "rpu_rx_pkt_bit35_ecc_mbit_err", 726 .reset_level = HNAE3_GLOBAL_RESET 727 }, { 728 .int_msk = BIT(17), 729 .msg = "rcb_tx_ring_ecc_mbit_err", 730 .reset_level = HNAE3_GLOBAL_RESET 731 }, { 732 .int_msk = BIT(18), 733 .msg = "rcb_rx_ring_ecc_mbit_err", 734 .reset_level = HNAE3_GLOBAL_RESET 735 }, { 736 .int_msk = BIT(19), 737 .msg = "rcb_tx_fbd_ecc_mbit_err", 738 .reset_level = HNAE3_GLOBAL_RESET 739 }, { 740 .int_msk = BIT(20), 741 .msg = "rcb_rx_ebd_ecc_mbit_err", 742 .reset_level = HNAE3_GLOBAL_RESET 743 }, { 744 .int_msk = BIT(21), 745 .msg = "rcb_tso_info_ecc_mbit_err", 746 .reset_level = HNAE3_GLOBAL_RESET 747 }, { 748 .int_msk = BIT(22), 749 .msg = "rcb_tx_int_info_ecc_mbit_err", 750 .reset_level = HNAE3_GLOBAL_RESET 751 }, { 752 .int_msk = BIT(23), 753 .msg = "rcb_rx_int_info_ecc_mbit_err", 754 .reset_level = HNAE3_GLOBAL_RESET 755 }, { 756 .int_msk = BIT(24), 757 .msg = "tpu_tx_pkt_0_ecc_mbit_err", 758 .reset_level = HNAE3_GLOBAL_RESET 759 }, { 760 .int_msk = BIT(25), 761 .msg = "tpu_tx_pkt_1_ecc_mbit_err", 762 .reset_level = HNAE3_GLOBAL_RESET 763 }, { 764 .int_msk = BIT(26), 765 .msg = "rd_bus_err", 766 .reset_level = HNAE3_GLOBAL_RESET 767 }, { 768 .int_msk = BIT(27), 769 .msg = "wr_bus_err", 770 .reset_level = HNAE3_GLOBAL_RESET 771 }, { 772 .int_msk = BIT(28), 773 .msg = "reg_search_miss", 774 .reset_level = HNAE3_GLOBAL_RESET 775 }, { 776 .int_msk = BIT(29), 777 .msg = "rx_q_search_miss", 778 .reset_level = HNAE3_NONE_RESET 779 }, { 780 .int_msk = BIT(30), 781 .msg = "ooo_ecc_err_detect", 782 .reset_level = HNAE3_NONE_RESET 783 }, { 784 .int_msk = BIT(31), 785 .msg = "ooo_ecc_err_multpl", 786 .reset_level = HNAE3_GLOBAL_RESET 787 }, { 788 /* sentinel */ 789 } 790 }; 791 792 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = { 793 { 794 .int_msk = BIT(4), 795 .msg = "gro_bd_ecc_mbit_err", 796 .reset_level = HNAE3_GLOBAL_RESET 797 }, { 798 .int_msk = BIT(5), 799 .msg = "gro_context_ecc_mbit_err", 800 .reset_level = HNAE3_GLOBAL_RESET 801 }, { 802 .int_msk = BIT(6), 803 .msg = "rx_stash_cfg_ecc_mbit_err", 804 .reset_level = HNAE3_GLOBAL_RESET 805 }, { 806 .int_msk = BIT(7), 807 .msg = "axi_rd_fbd_ecc_mbit_err", 808 .reset_level = HNAE3_GLOBAL_RESET 809 }, { 810 /* sentinel */ 811 } 812 }; 813 814 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = { 815 { 816 .int_msk = BIT(0), 817 .msg = "over_8bd_no_fe", 818 .reset_level = HNAE3_FUNC_RESET 819 }, { 820 .int_msk = BIT(1), 821 .msg = "tso_mss_cmp_min_err", 822 .reset_level = HNAE3_NONE_RESET 823 }, { 824 .int_msk = BIT(2), 825 .msg = "tso_mss_cmp_max_err", 826 .reset_level = HNAE3_NONE_RESET 827 }, { 828 .int_msk = BIT(3), 829 .msg = "tx_rd_fbd_poison", 830 .reset_level = HNAE3_FUNC_RESET 831 }, { 832 .int_msk = BIT(4), 833 .msg = "rx_rd_ebd_poison", 834 .reset_level = HNAE3_FUNC_RESET 835 }, { 836 .int_msk = BIT(5), 837 .msg = "buf_wait_timeout", 838 .reset_level = HNAE3_NONE_RESET 839 }, { 840 /* sentinel */ 841 } 842 }; 843 844 static const struct hclge_hw_error hclge_ssu_com_err_int[] = { 845 { 846 .int_msk = BIT(0), 847 .msg = "buf_sum_err", 848 .reset_level = HNAE3_NONE_RESET 849 }, { 850 .int_msk = BIT(1), 851 .msg = "ppp_mb_num_err", 852 .reset_level = HNAE3_NONE_RESET 853 }, { 854 .int_msk = BIT(2), 855 .msg = "ppp_mbid_err", 856 .reset_level = HNAE3_GLOBAL_RESET 857 }, { 858 .int_msk = BIT(3), 859 .msg = "ppp_rlt_mac_err", 860 .reset_level = HNAE3_GLOBAL_RESET 861 }, { 862 .int_msk = BIT(4), 863 .msg = "ppp_rlt_host_err", 864 .reset_level = HNAE3_GLOBAL_RESET 865 }, { 866 .int_msk = BIT(5), 867 .msg = "cks_edit_position_err", 868 .reset_level = HNAE3_GLOBAL_RESET 869 }, { 870 .int_msk = BIT(6), 871 .msg = "cks_edit_condition_err", 872 .reset_level = HNAE3_GLOBAL_RESET 873 }, { 874 .int_msk = BIT(7), 875 .msg = "vlan_edit_condition_err", 876 .reset_level = HNAE3_GLOBAL_RESET 877 }, { 878 .int_msk = BIT(8), 879 .msg = "vlan_num_ot_err", 880 .reset_level = HNAE3_GLOBAL_RESET 881 }, { 882 .int_msk = BIT(9), 883 .msg = "vlan_num_in_err", 884 .reset_level = HNAE3_GLOBAL_RESET 885 }, { 886 /* sentinel */ 887 } 888 }; 889 890 #define HCLGE_SSU_MEM_ECC_ERR(x) \ 891 { \ 892 .int_msk = BIT(x), \ 893 .msg = "ssu_mem" #x "_ecc_mbit_err", \ 894 .reset_level = HNAE3_GLOBAL_RESET \ 895 } 896 897 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = { 898 HCLGE_SSU_MEM_ECC_ERR(0), 899 HCLGE_SSU_MEM_ECC_ERR(1), 900 HCLGE_SSU_MEM_ECC_ERR(2), 901 HCLGE_SSU_MEM_ECC_ERR(3), 902 HCLGE_SSU_MEM_ECC_ERR(4), 903 HCLGE_SSU_MEM_ECC_ERR(5), 904 HCLGE_SSU_MEM_ECC_ERR(6), 905 HCLGE_SSU_MEM_ECC_ERR(7), 906 HCLGE_SSU_MEM_ECC_ERR(8), 907 HCLGE_SSU_MEM_ECC_ERR(9), 908 HCLGE_SSU_MEM_ECC_ERR(10), 909 HCLGE_SSU_MEM_ECC_ERR(11), 910 HCLGE_SSU_MEM_ECC_ERR(12), 911 HCLGE_SSU_MEM_ECC_ERR(13), 912 HCLGE_SSU_MEM_ECC_ERR(14), 913 HCLGE_SSU_MEM_ECC_ERR(15), 914 HCLGE_SSU_MEM_ECC_ERR(16), 915 HCLGE_SSU_MEM_ECC_ERR(17), 916 HCLGE_SSU_MEM_ECC_ERR(18), 917 HCLGE_SSU_MEM_ECC_ERR(19), 918 HCLGE_SSU_MEM_ECC_ERR(20), 919 HCLGE_SSU_MEM_ECC_ERR(21), 920 HCLGE_SSU_MEM_ECC_ERR(22), 921 HCLGE_SSU_MEM_ECC_ERR(23), 922 HCLGE_SSU_MEM_ECC_ERR(24), 923 HCLGE_SSU_MEM_ECC_ERR(25), 924 HCLGE_SSU_MEM_ECC_ERR(26), 925 HCLGE_SSU_MEM_ECC_ERR(27), 926 HCLGE_SSU_MEM_ECC_ERR(28), 927 HCLGE_SSU_MEM_ECC_ERR(29), 928 HCLGE_SSU_MEM_ECC_ERR(30), 929 HCLGE_SSU_MEM_ECC_ERR(31), 930 { /* sentinel */ } 931 }; 932 933 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { 934 { 935 .int_msk = BIT(0), 936 .msg = "roc_pkt_without_key_port", 937 .reset_level = HNAE3_FUNC_RESET 938 }, { 939 .int_msk = BIT(1), 940 .msg = "tpu_pkt_without_key_port", 941 .reset_level = HNAE3_GLOBAL_RESET 942 }, { 943 .int_msk = BIT(2), 944 .msg = "igu_pkt_without_key_port", 945 .reset_level = HNAE3_GLOBAL_RESET 946 }, { 947 .int_msk = BIT(3), 948 .msg = "roc_eof_mis_match_port", 949 .reset_level = HNAE3_GLOBAL_RESET 950 }, { 951 .int_msk = BIT(4), 952 .msg = "tpu_eof_mis_match_port", 953 .reset_level = HNAE3_GLOBAL_RESET 954 }, { 955 .int_msk = BIT(5), 956 .msg = "igu_eof_mis_match_port", 957 .reset_level = HNAE3_GLOBAL_RESET 958 }, { 959 .int_msk = BIT(6), 960 .msg = "roc_sof_mis_match_port", 961 .reset_level = HNAE3_GLOBAL_RESET 962 }, { 963 .int_msk = BIT(7), 964 .msg = "tpu_sof_mis_match_port", 965 .reset_level = HNAE3_GLOBAL_RESET 966 }, { 967 .int_msk = BIT(8), 968 .msg = "igu_sof_mis_match_port", 969 .reset_level = HNAE3_GLOBAL_RESET 970 }, { 971 .int_msk = BIT(11), 972 .msg = "ets_rd_int_rx_port", 973 .reset_level = HNAE3_GLOBAL_RESET 974 }, { 975 .int_msk = BIT(12), 976 .msg = "ets_wr_int_rx_port", 977 .reset_level = HNAE3_GLOBAL_RESET 978 }, { 979 .int_msk = BIT(13), 980 .msg = "ets_rd_int_tx_port", 981 .reset_level = HNAE3_GLOBAL_RESET 982 }, { 983 .int_msk = BIT(14), 984 .msg = "ets_wr_int_tx_port", 985 .reset_level = HNAE3_GLOBAL_RESET 986 }, { 987 /* sentinel */ 988 } 989 }; 990 991 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = { 992 { 993 .int_msk = BIT(0), 994 .msg = "ig_mac_inf_int", 995 .reset_level = HNAE3_GLOBAL_RESET 996 }, { 997 .int_msk = BIT(1), 998 .msg = "ig_host_inf_int", 999 .reset_level = HNAE3_GLOBAL_RESET 1000 }, { 1001 .int_msk = BIT(2), 1002 .msg = "ig_roc_buf_int", 1003 .reset_level = HNAE3_GLOBAL_RESET 1004 }, { 1005 .int_msk = BIT(3), 1006 .msg = "ig_host_data_fifo_int", 1007 .reset_level = HNAE3_GLOBAL_RESET 1008 }, { 1009 .int_msk = BIT(4), 1010 .msg = "ig_host_key_fifo_int", 1011 .reset_level = HNAE3_GLOBAL_RESET 1012 }, { 1013 .int_msk = BIT(5), 1014 .msg = "tx_qcn_fifo_int", 1015 .reset_level = HNAE3_GLOBAL_RESET 1016 }, { 1017 .int_msk = BIT(6), 1018 .msg = "rx_qcn_fifo_int", 1019 .reset_level = HNAE3_GLOBAL_RESET 1020 }, { 1021 .int_msk = BIT(7), 1022 .msg = "tx_pf_rd_fifo_int", 1023 .reset_level = HNAE3_GLOBAL_RESET 1024 }, { 1025 .int_msk = BIT(8), 1026 .msg = "rx_pf_rd_fifo_int", 1027 .reset_level = HNAE3_GLOBAL_RESET 1028 }, { 1029 .int_msk = BIT(9), 1030 .msg = "qm_eof_fifo_int", 1031 .reset_level = HNAE3_GLOBAL_RESET 1032 }, { 1033 .int_msk = BIT(10), 1034 .msg = "mb_rlt_fifo_int", 1035 .reset_level = HNAE3_GLOBAL_RESET 1036 }, { 1037 .int_msk = BIT(11), 1038 .msg = "dup_uncopy_fifo_int", 1039 .reset_level = HNAE3_GLOBAL_RESET 1040 }, { 1041 .int_msk = BIT(12), 1042 .msg = "dup_cnt_rd_fifo_int", 1043 .reset_level = HNAE3_GLOBAL_RESET 1044 }, { 1045 .int_msk = BIT(13), 1046 .msg = "dup_cnt_drop_fifo_int", 1047 .reset_level = HNAE3_GLOBAL_RESET 1048 }, { 1049 .int_msk = BIT(14), 1050 .msg = "dup_cnt_wrb_fifo_int", 1051 .reset_level = HNAE3_GLOBAL_RESET 1052 }, { 1053 .int_msk = BIT(15), 1054 .msg = "host_cmd_fifo_int", 1055 .reset_level = HNAE3_GLOBAL_RESET 1056 }, { 1057 .int_msk = BIT(16), 1058 .msg = "mac_cmd_fifo_int", 1059 .reset_level = HNAE3_GLOBAL_RESET 1060 }, { 1061 .int_msk = BIT(17), 1062 .msg = "host_cmd_bitmap_empty_int", 1063 .reset_level = HNAE3_GLOBAL_RESET 1064 }, { 1065 .int_msk = BIT(18), 1066 .msg = "mac_cmd_bitmap_empty_int", 1067 .reset_level = HNAE3_GLOBAL_RESET 1068 }, { 1069 .int_msk = BIT(19), 1070 .msg = "dup_bitmap_empty_int", 1071 .reset_level = HNAE3_GLOBAL_RESET 1072 }, { 1073 .int_msk = BIT(20), 1074 .msg = "out_queue_bitmap_empty_int", 1075 .reset_level = HNAE3_GLOBAL_RESET 1076 }, { 1077 .int_msk = BIT(21), 1078 .msg = "bank2_bitmap_empty_int", 1079 .reset_level = HNAE3_GLOBAL_RESET 1080 }, { 1081 .int_msk = BIT(22), 1082 .msg = "bank1_bitmap_empty_int", 1083 .reset_level = HNAE3_GLOBAL_RESET 1084 }, { 1085 .int_msk = BIT(23), 1086 .msg = "bank0_bitmap_empty_int", 1087 .reset_level = HNAE3_GLOBAL_RESET 1088 }, { 1089 /* sentinel */ 1090 } 1091 }; 1092 1093 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = { 1094 { 1095 .int_msk = BIT(0), 1096 .msg = "ets_rd_int_rx_tcg", 1097 .reset_level = HNAE3_GLOBAL_RESET 1098 }, { 1099 .int_msk = BIT(1), 1100 .msg = "ets_wr_int_rx_tcg", 1101 .reset_level = HNAE3_GLOBAL_RESET 1102 }, { 1103 .int_msk = BIT(2), 1104 .msg = "ets_rd_int_tx_tcg", 1105 .reset_level = HNAE3_GLOBAL_RESET 1106 }, { 1107 .int_msk = BIT(3), 1108 .msg = "ets_wr_int_tx_tcg", 1109 .reset_level = HNAE3_GLOBAL_RESET 1110 }, { 1111 /* sentinel */ 1112 } 1113 }; 1114 1115 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = { 1116 { 1117 .int_msk = BIT(0), 1118 .msg = "roc_pkt_without_key_port", 1119 .reset_level = HNAE3_FUNC_RESET 1120 }, { 1121 .int_msk = BIT(9), 1122 .msg = "low_water_line_err_port", 1123 .reset_level = HNAE3_NONE_RESET 1124 }, { 1125 .int_msk = BIT(10), 1126 .msg = "hi_water_line_err_port", 1127 .reset_level = HNAE3_GLOBAL_RESET 1128 }, { 1129 /* sentinel */ 1130 } 1131 }; 1132 1133 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { 1134 { 1135 .int_msk = 0, 1136 .msg = "rocee qmm ovf: sgid invalid err" 1137 }, { 1138 .int_msk = 0x4, 1139 .msg = "rocee qmm ovf: sgid ovf err" 1140 }, { 1141 .int_msk = 0x8, 1142 .msg = "rocee qmm ovf: smac invalid err" 1143 }, { 1144 .int_msk = 0xC, 1145 .msg = "rocee qmm ovf: smac ovf err" 1146 }, { 1147 .int_msk = 0x10, 1148 .msg = "rocee qmm ovf: cqc invalid err" 1149 }, { 1150 .int_msk = 0x11, 1151 .msg = "rocee qmm ovf: cqc ovf err" 1152 }, { 1153 .int_msk = 0x12, 1154 .msg = "rocee qmm ovf: cqc hopnum err" 1155 }, { 1156 .int_msk = 0x13, 1157 .msg = "rocee qmm ovf: cqc ba0 err" 1158 }, { 1159 .int_msk = 0x14, 1160 .msg = "rocee qmm ovf: srqc invalid err" 1161 }, { 1162 .int_msk = 0x15, 1163 .msg = "rocee qmm ovf: srqc ovf err" 1164 }, { 1165 .int_msk = 0x16, 1166 .msg = "rocee qmm ovf: srqc hopnum err" 1167 }, { 1168 .int_msk = 0x17, 1169 .msg = "rocee qmm ovf: srqc ba0 err" 1170 }, { 1171 .int_msk = 0x18, 1172 .msg = "rocee qmm ovf: mpt invalid err" 1173 }, { 1174 .int_msk = 0x19, 1175 .msg = "rocee qmm ovf: mpt ovf err" 1176 }, { 1177 .int_msk = 0x1A, 1178 .msg = "rocee qmm ovf: mpt hopnum err" 1179 }, { 1180 .int_msk = 0x1B, 1181 .msg = "rocee qmm ovf: mpt ba0 err" 1182 }, { 1183 .int_msk = 0x1C, 1184 .msg = "rocee qmm ovf: qpc invalid err" 1185 }, { 1186 .int_msk = 0x1D, 1187 .msg = "rocee qmm ovf: qpc ovf err" 1188 }, { 1189 .int_msk = 0x1E, 1190 .msg = "rocee qmm ovf: qpc hopnum err" 1191 }, { 1192 .int_msk = 0x1F, 1193 .msg = "rocee qmm ovf: qpc ba0 err" 1194 }, { 1195 /* sentinel */ 1196 } 1197 }; 1198 1199 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = { 1200 { 1201 .module_id = MODULE_NONE, 1202 .msg = "MODULE_NONE" 1203 }, { 1204 .module_id = MODULE_BIOS_COMMON, 1205 .msg = "MODULE_BIOS_COMMON" 1206 }, { 1207 .module_id = MODULE_GE, 1208 .msg = "MODULE_GE" 1209 }, { 1210 .module_id = MODULE_IGU_EGU, 1211 .msg = "MODULE_IGU_EGU" 1212 }, { 1213 .module_id = MODULE_LGE, 1214 .msg = "MODULE_LGE" 1215 }, { 1216 .module_id = MODULE_NCSI, 1217 .msg = "MODULE_NCSI" 1218 }, { 1219 .module_id = MODULE_PPP, 1220 .msg = "MODULE_PPP" 1221 }, { 1222 .module_id = MODULE_QCN, 1223 .msg = "MODULE_QCN" 1224 }, { 1225 .module_id = MODULE_RCB_RX, 1226 .msg = "MODULE_RCB_RX" 1227 }, { 1228 .module_id = MODULE_RTC, 1229 .msg = "MODULE_RTC" 1230 }, { 1231 .module_id = MODULE_SSU, 1232 .msg = "MODULE_SSU" 1233 }, { 1234 .module_id = MODULE_TM, 1235 .msg = "MODULE_TM" 1236 }, { 1237 .module_id = MODULE_RCB_TX, 1238 .msg = "MODULE_RCB_TX" 1239 }, { 1240 .module_id = MODULE_TXDMA, 1241 .msg = "MODULE_TXDMA" 1242 }, { 1243 .module_id = MODULE_MASTER, 1244 .msg = "MODULE_MASTER" 1245 }, { 1246 .module_id = MODULE_ROCEE_TOP, 1247 .msg = "MODULE_ROCEE_TOP" 1248 }, { 1249 .module_id = MODULE_ROCEE_TIMER, 1250 .msg = "MODULE_ROCEE_TIMER" 1251 }, { 1252 .module_id = MODULE_ROCEE_MDB, 1253 .msg = "MODULE_ROCEE_MDB" 1254 }, { 1255 .module_id = MODULE_ROCEE_TSP, 1256 .msg = "MODULE_ROCEE_TSP" 1257 }, { 1258 .module_id = MODULE_ROCEE_TRP, 1259 .msg = "MODULE_ROCEE_TRP" 1260 }, { 1261 .module_id = MODULE_ROCEE_SCC, 1262 .msg = "MODULE_ROCEE_SCC" 1263 }, { 1264 .module_id = MODULE_ROCEE_CAEP, 1265 .msg = "MODULE_ROCEE_CAEP" 1266 }, { 1267 .module_id = MODULE_ROCEE_GEN_AC, 1268 .msg = "MODULE_ROCEE_GEN_AC" 1269 }, { 1270 .module_id = MODULE_ROCEE_QMM, 1271 .msg = "MODULE_ROCEE_QMM" 1272 }, { 1273 .module_id = MODULE_ROCEE_LSAN, 1274 .msg = "MODULE_ROCEE_LSAN" 1275 } 1276 }; 1277 1278 static const struct hclge_hw_type_id hclge_hw_type_id_st[] = { 1279 { 1280 .type_id = NONE_ERROR, 1281 .msg = "none_error" 1282 }, { 1283 .type_id = FIFO_ERROR, 1284 .msg = "fifo_error" 1285 }, { 1286 .type_id = MEMORY_ERROR, 1287 .msg = "memory_error" 1288 }, { 1289 .type_id = POISON_ERROR, 1290 .msg = "poison_error" 1291 }, { 1292 .type_id = MSIX_ECC_ERROR, 1293 .msg = "msix_ecc_error" 1294 }, { 1295 .type_id = TQP_INT_ECC_ERROR, 1296 .msg = "tqp_int_ecc_error" 1297 }, { 1298 .type_id = PF_ABNORMAL_INT_ERROR, 1299 .msg = "pf_abnormal_int_error" 1300 }, { 1301 .type_id = MPF_ABNORMAL_INT_ERROR, 1302 .msg = "mpf_abnormal_int_error" 1303 }, { 1304 .type_id = COMMON_ERROR, 1305 .msg = "common_error" 1306 }, { 1307 .type_id = PORT_ERROR, 1308 .msg = "port_error" 1309 }, { 1310 .type_id = ETS_ERROR, 1311 .msg = "ets_error" 1312 }, { 1313 .type_id = NCSI_ERROR, 1314 .msg = "ncsi_error" 1315 }, { 1316 .type_id = GLB_ERROR, 1317 .msg = "glb_error" 1318 }, { 1319 .type_id = ROCEE_NORMAL_ERR, 1320 .msg = "rocee_normal_error" 1321 }, { 1322 .type_id = ROCEE_OVF_ERR, 1323 .msg = "rocee_ovf_error" 1324 } 1325 }; 1326 1327 static void hclge_log_error(struct device *dev, char *reg, 1328 const struct hclge_hw_error *err, 1329 u32 err_sts, unsigned long *reset_requests) 1330 { 1331 while (err->msg) { 1332 if (err->int_msk & err_sts) { 1333 dev_err(dev, "%s %s found [error status=0x%x]\n", 1334 reg, err->msg, err_sts); 1335 if (err->reset_level && 1336 err->reset_level != HNAE3_NONE_RESET) 1337 set_bit(err->reset_level, reset_requests); 1338 } 1339 err++; 1340 } 1341 } 1342 1343 /* hclge_cmd_query_error: read the error information 1344 * @hdev: pointer to struct hclge_dev 1345 * @desc: descriptor for describing the command 1346 * @cmd: command opcode 1347 * @flag: flag for extended command structure 1348 * 1349 * This function query the error info from hw register/s using command 1350 */ 1351 static int hclge_cmd_query_error(struct hclge_dev *hdev, 1352 struct hclge_desc *desc, u32 cmd, u16 flag) 1353 { 1354 struct device *dev = &hdev->pdev->dev; 1355 int desc_num = 1; 1356 int ret; 1357 1358 hclge_cmd_setup_basic_desc(&desc[0], cmd, true); 1359 if (flag) { 1360 desc[0].flag |= cpu_to_le16(flag); 1361 hclge_cmd_setup_basic_desc(&desc[1], cmd, true); 1362 desc_num = 2; 1363 } 1364 1365 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 1366 if (ret) 1367 dev_err(dev, "query error cmd failed (%d)\n", ret); 1368 1369 return ret; 1370 } 1371 1372 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev) 1373 { 1374 struct hclge_desc desc; 1375 1376 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false); 1377 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR); 1378 1379 return hclge_cmd_send(&hdev->hw, &desc, 1); 1380 } 1381 1382 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en) 1383 { 1384 struct device *dev = &hdev->pdev->dev; 1385 struct hclge_desc desc[2]; 1386 int ret; 1387 1388 /* configure common error interrupts */ 1389 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); 1390 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1391 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); 1392 1393 if (en) { 1394 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); 1395 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | 1396 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); 1397 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); 1398 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN | 1399 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN); 1400 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); 1401 } 1402 1403 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); 1404 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | 1405 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); 1406 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); 1407 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK | 1408 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK); 1409 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); 1410 1411 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1412 if (ret) 1413 dev_err(dev, 1414 "fail(%d) to configure common err interrupts\n", ret); 1415 1416 return ret; 1417 } 1418 1419 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en) 1420 { 1421 struct device *dev = &hdev->pdev->dev; 1422 struct hclge_desc desc; 1423 int ret; 1424 1425 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) 1426 return 0; 1427 1428 /* configure NCSI error interrupts */ 1429 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false); 1430 if (en) 1431 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN); 1432 1433 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1434 if (ret) 1435 dev_err(dev, 1436 "fail(%d) to configure NCSI error interrupts\n", ret); 1437 1438 return ret; 1439 } 1440 1441 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en) 1442 { 1443 struct device *dev = &hdev->pdev->dev; 1444 struct hclge_desc desc; 1445 int ret; 1446 1447 /* configure IGU,EGU error interrupts */ 1448 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false); 1449 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE); 1450 if (en) 1451 desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN); 1452 1453 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK); 1454 1455 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1456 if (ret) { 1457 dev_err(dev, 1458 "fail(%d) to configure IGU common interrupts\n", ret); 1459 return ret; 1460 } 1461 1462 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false); 1463 if (en) 1464 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN); 1465 1466 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK); 1467 1468 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1469 if (ret) { 1470 dev_err(dev, 1471 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret); 1472 return ret; 1473 } 1474 1475 ret = hclge_config_ncsi_hw_err_int(hdev, en); 1476 1477 return ret; 1478 } 1479 1480 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, 1481 bool en) 1482 { 1483 struct device *dev = &hdev->pdev->dev; 1484 struct hclge_desc desc[2]; 1485 int ret; 1486 1487 /* configure PPP error interrupts */ 1488 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1489 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1490 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 1491 1492 if (cmd == HCLGE_PPP_CMD0_INT_CMD) { 1493 if (en) { 1494 desc[0].data[0] = 1495 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN); 1496 desc[0].data[1] = 1497 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN); 1498 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN); 1499 } 1500 1501 desc[1].data[0] = 1502 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK); 1503 desc[1].data[1] = 1504 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK); 1505 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1506 desc[1].data[2] = 1507 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK); 1508 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { 1509 if (en) { 1510 desc[0].data[0] = 1511 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN); 1512 desc[0].data[1] = 1513 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN); 1514 } 1515 1516 desc[1].data[0] = 1517 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK); 1518 desc[1].data[1] = 1519 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK); 1520 } 1521 1522 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1523 if (ret) 1524 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret); 1525 1526 return ret; 1527 } 1528 1529 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en) 1530 { 1531 int ret; 1532 1533 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, 1534 en); 1535 if (ret) 1536 return ret; 1537 1538 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, 1539 en); 1540 1541 return ret; 1542 } 1543 1544 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) 1545 { 1546 struct device *dev = &hdev->pdev->dev; 1547 struct hclge_desc desc; 1548 int ret; 1549 1550 /* configure TM SCH hw errors */ 1551 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false); 1552 if (en) 1553 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN); 1554 1555 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1556 if (ret) { 1557 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret); 1558 return ret; 1559 } 1560 1561 /* configure TM QCN hw errors */ 1562 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false); 1563 if (en) 1564 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); 1565 1566 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1567 if (ret) 1568 dev_err(dev, 1569 "fail(%d) to configure TM QCN mem errors\n", ret); 1570 1571 return ret; 1572 } 1573 1574 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en) 1575 { 1576 struct device *dev = &hdev->pdev->dev; 1577 struct hclge_desc desc; 1578 int ret; 1579 1580 /* configure MAC common error interrupts */ 1581 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false); 1582 if (en) 1583 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN); 1584 1585 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK); 1586 1587 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1588 if (ret) 1589 dev_err(dev, 1590 "fail(%d) to configure MAC COMMON error intr\n", ret); 1591 1592 return ret; 1593 } 1594 1595 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en) 1596 { 1597 struct hclge_desc desc; 1598 1599 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false); 1600 if (en) 1601 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN); 1602 else 1603 desc.data[0] = 0; 1604 1605 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK); 1606 1607 return hclge_cmd_send(&hdev->hw, &desc, 1); 1608 } 1609 1610 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, 1611 bool en) 1612 { 1613 struct device *dev = &hdev->pdev->dev; 1614 struct hclge_desc desc[2]; 1615 int desc_num = 1; 1616 int ret; 1617 1618 /* configure PPU error interrupts */ 1619 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) { 1620 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1621 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1622 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 1623 if (en) { 1624 desc[0].data[0] = 1625 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN); 1626 desc[0].data[1] = 1627 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN); 1628 desc[1].data[3] = 1629 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN); 1630 desc[1].data[4] = 1631 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN); 1632 } 1633 1634 desc[1].data[0] = 1635 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK); 1636 desc[1].data[1] = 1637 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK); 1638 desc[1].data[2] = 1639 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK); 1640 desc[1].data[3] |= 1641 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK); 1642 desc_num = 2; 1643 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) { 1644 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1645 if (en) 1646 desc[0].data[0] = 1647 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2); 1648 1649 desc[0].data[2] = 1650 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK); 1651 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) { 1652 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1653 if (en) 1654 desc[0].data[0] = 1655 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN); 1656 1657 desc[0].data[2] = 1658 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK); 1659 } else { 1660 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n"); 1661 return -EINVAL; 1662 } 1663 1664 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 1665 1666 return ret; 1667 } 1668 1669 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en) 1670 { 1671 struct device *dev = &hdev->pdev->dev; 1672 int ret; 1673 1674 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD, 1675 en); 1676 if (ret) { 1677 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n", 1678 ret); 1679 return ret; 1680 } 1681 1682 ret = hclge_config_ppu_error_interrupts(hdev, 1683 HCLGE_PPU_MPF_OTHER_INT_CMD, 1684 en); 1685 if (ret) { 1686 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret); 1687 return ret; 1688 } 1689 1690 ret = hclge_config_ppu_error_interrupts(hdev, 1691 HCLGE_PPU_PF_OTHER_INT_CMD, en); 1692 if (ret) 1693 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n", 1694 ret); 1695 return ret; 1696 } 1697 1698 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) 1699 { 1700 struct device *dev = &hdev->pdev->dev; 1701 struct hclge_desc desc[2]; 1702 int ret; 1703 1704 /* configure SSU ecc error interrupts */ 1705 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false); 1706 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1707 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false); 1708 if (en) { 1709 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN); 1710 desc[0].data[1] = 1711 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN); 1712 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN); 1713 } 1714 1715 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK); 1716 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK); 1717 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK); 1718 1719 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1720 if (ret) { 1721 dev_err(dev, 1722 "fail(%d) to configure SSU ECC error interrupt\n", ret); 1723 return ret; 1724 } 1725 1726 /* configure SSU common error interrupts */ 1727 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false); 1728 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1729 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false); 1730 1731 if (en) { 1732 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1733 desc[0].data[0] = 1734 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN); 1735 else 1736 desc[0].data[0] = 1737 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5)); 1738 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN); 1739 desc[0].data[2] = 1740 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN); 1741 } 1742 1743 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK | 1744 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK); 1745 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK); 1746 1747 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1748 if (ret) 1749 dev_err(dev, 1750 "fail(%d) to configure SSU COMMON error intr\n", ret); 1751 1752 return ret; 1753 } 1754 1755 /* hclge_query_bd_num: query number of buffer descriptors 1756 * @hdev: pointer to struct hclge_dev 1757 * @is_ras: true for ras, false for msix 1758 * @mpf_bd_num: number of main PF interrupt buffer descriptors 1759 * @pf_bd_num: number of not main PF interrupt buffer descriptors 1760 * 1761 * This function querys number of mpf and pf buffer descriptors. 1762 */ 1763 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, 1764 u32 *mpf_bd_num, u32 *pf_bd_num) 1765 { 1766 struct device *dev = &hdev->pdev->dev; 1767 u32 mpf_min_bd_num, pf_min_bd_num; 1768 enum hclge_opcode_type opcode; 1769 struct hclge_desc desc_bd; 1770 int ret; 1771 1772 if (is_ras) { 1773 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM; 1774 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM; 1775 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM; 1776 } else { 1777 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM; 1778 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM; 1779 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM; 1780 } 1781 1782 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true); 1783 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 1784 if (ret) { 1785 dev_err(dev, "fail(%d) to query msix int status bd num\n", 1786 ret); 1787 return ret; 1788 } 1789 1790 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]); 1791 *pf_bd_num = le32_to_cpu(desc_bd.data[1]); 1792 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) { 1793 dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n", 1794 *mpf_bd_num, *pf_bd_num); 1795 return -EINVAL; 1796 } 1797 1798 return 0; 1799 } 1800 1801 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors 1802 * @hdev: pointer to struct hclge_dev 1803 * @desc: descriptor for describing the command 1804 * @num: number of extended command structures 1805 * 1806 * This function handles all the main PF RAS errors in the 1807 * hw register/s using command. 1808 */ 1809 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, 1810 struct hclge_desc *desc, 1811 int num) 1812 { 1813 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1814 struct device *dev = &hdev->pdev->dev; 1815 __le32 *desc_data; 1816 u32 status; 1817 int ret; 1818 1819 /* query all main PF RAS errors */ 1820 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT, 1821 true); 1822 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1823 if (ret) { 1824 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret); 1825 return ret; 1826 } 1827 1828 /* log HNS common errors */ 1829 status = le32_to_cpu(desc[0].data[0]); 1830 if (status) 1831 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", 1832 &hclge_imp_tcm_ecc_int[0], status, 1833 &ae_dev->hw_err_reset_req); 1834 1835 status = le32_to_cpu(desc[0].data[1]); 1836 if (status) 1837 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", 1838 &hclge_cmdq_nic_mem_ecc_int[0], status, 1839 &ae_dev->hw_err_reset_req); 1840 1841 if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) 1842 dev_warn(dev, "imp_rd_data_poison_err found\n"); 1843 1844 status = le32_to_cpu(desc[0].data[3]); 1845 if (status) 1846 hclge_log_error(dev, "TQP_INT_ECC_INT_STS", 1847 &hclge_tqp_int_ecc_int[0], status, 1848 &ae_dev->hw_err_reset_req); 1849 1850 status = le32_to_cpu(desc[0].data[4]); 1851 if (status) 1852 hclge_log_error(dev, "MSIX_ECC_INT_STS", 1853 &hclge_msix_sram_ecc_int[0], status, 1854 &ae_dev->hw_err_reset_req); 1855 1856 /* log SSU(Storage Switch Unit) errors */ 1857 desc_data = (__le32 *)&desc[2]; 1858 status = le32_to_cpu(*(desc_data + 2)); 1859 if (status) 1860 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", 1861 &hclge_ssu_mem_ecc_err_int[0], status, 1862 &ae_dev->hw_err_reset_req); 1863 1864 status = le32_to_cpu(*(desc_data + 3)) & BIT(0); 1865 if (status) { 1866 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n", 1867 status); 1868 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1869 } 1870 1871 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; 1872 if (status) 1873 hclge_log_error(dev, "SSU_COMMON_ERR_INT", 1874 &hclge_ssu_com_err_int[0], status, 1875 &ae_dev->hw_err_reset_req); 1876 1877 /* log IGU(Ingress Unit) errors */ 1878 desc_data = (__le32 *)&desc[3]; 1879 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; 1880 if (status) 1881 hclge_log_error(dev, "IGU_INT_STS", 1882 &hclge_igu_int[0], status, 1883 &ae_dev->hw_err_reset_req); 1884 1885 /* log PPP(Programmable Packet Process) errors */ 1886 desc_data = (__le32 *)&desc[4]; 1887 status = le32_to_cpu(*(desc_data + 1)); 1888 if (status) 1889 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", 1890 &hclge_ppp_mpf_abnormal_int_st1[0], status, 1891 &ae_dev->hw_err_reset_req); 1892 1893 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; 1894 if (status) 1895 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", 1896 &hclge_ppp_mpf_abnormal_int_st3[0], status, 1897 &ae_dev->hw_err_reset_req); 1898 1899 /* log PPU(RCB) errors */ 1900 desc_data = (__le32 *)&desc[5]; 1901 status = le32_to_cpu(*(desc_data + 1)); 1902 if (status) { 1903 dev_err(dev, 1904 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n"); 1905 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1906 } 1907 1908 status = le32_to_cpu(*(desc_data + 2)); 1909 if (status) 1910 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", 1911 &hclge_ppu_mpf_abnormal_int_st2[0], status, 1912 &ae_dev->hw_err_reset_req); 1913 1914 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; 1915 if (status) 1916 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", 1917 &hclge_ppu_mpf_abnormal_int_st3[0], status, 1918 &ae_dev->hw_err_reset_req); 1919 1920 /* log TM(Traffic Manager) errors */ 1921 desc_data = (__le32 *)&desc[6]; 1922 status = le32_to_cpu(*desc_data); 1923 if (status) 1924 hclge_log_error(dev, "TM_SCH_RINT", 1925 &hclge_tm_sch_rint[0], status, 1926 &ae_dev->hw_err_reset_req); 1927 1928 /* log QCN(Quantized Congestion Control) errors */ 1929 desc_data = (__le32 *)&desc[7]; 1930 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; 1931 if (status) 1932 hclge_log_error(dev, "QCN_FIFO_RINT", 1933 &hclge_qcn_fifo_rint[0], status, 1934 &ae_dev->hw_err_reset_req); 1935 1936 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; 1937 if (status) 1938 hclge_log_error(dev, "QCN_ECC_RINT", 1939 &hclge_qcn_ecc_rint[0], status, 1940 &ae_dev->hw_err_reset_req); 1941 1942 /* log NCSI errors */ 1943 desc_data = (__le32 *)&desc[9]; 1944 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; 1945 if (status) 1946 hclge_log_error(dev, "NCSI_ECC_INT_RPT", 1947 &hclge_ncsi_err_int[0], status, 1948 &ae_dev->hw_err_reset_req); 1949 1950 /* clear all main PF RAS errors */ 1951 hclge_cmd_reuse_desc(&desc[0], false); 1952 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1953 if (ret) 1954 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret); 1955 1956 return ret; 1957 } 1958 1959 /* hclge_handle_pf_ras_error: handle all PF RAS errors 1960 * @hdev: pointer to struct hclge_dev 1961 * @desc: descriptor for describing the command 1962 * @num: number of extended command structures 1963 * 1964 * This function handles all the PF RAS errors in the 1965 * hw register/s using command. 1966 */ 1967 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, 1968 struct hclge_desc *desc, 1969 int num) 1970 { 1971 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1972 struct device *dev = &hdev->pdev->dev; 1973 __le32 *desc_data; 1974 u32 status; 1975 int ret; 1976 1977 /* query all PF RAS errors */ 1978 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT, 1979 true); 1980 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1981 if (ret) { 1982 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret); 1983 return ret; 1984 } 1985 1986 /* log SSU(Storage Switch Unit) errors */ 1987 status = le32_to_cpu(desc[0].data[0]); 1988 if (status) 1989 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 1990 &hclge_ssu_port_based_err_int[0], status, 1991 &ae_dev->hw_err_reset_req); 1992 1993 status = le32_to_cpu(desc[0].data[1]); 1994 if (status) 1995 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", 1996 &hclge_ssu_fifo_overflow_int[0], status, 1997 &ae_dev->hw_err_reset_req); 1998 1999 status = le32_to_cpu(desc[0].data[2]); 2000 if (status) 2001 hclge_log_error(dev, "SSU_ETS_TCG_INT", 2002 &hclge_ssu_ets_tcg_int[0], status, 2003 &ae_dev->hw_err_reset_req); 2004 2005 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ 2006 desc_data = (__le32 *)&desc[1]; 2007 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; 2008 if (status) 2009 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", 2010 &hclge_igu_egu_tnl_int[0], status, 2011 &ae_dev->hw_err_reset_req); 2012 2013 /* log PPU(RCB) errors */ 2014 desc_data = (__le32 *)&desc[3]; 2015 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; 2016 if (status) { 2017 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", 2018 &hclge_ppu_pf_abnormal_int[0], status, 2019 &ae_dev->hw_err_reset_req); 2020 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR); 2021 } 2022 2023 /* clear all PF RAS errors */ 2024 hclge_cmd_reuse_desc(&desc[0], false); 2025 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 2026 if (ret) 2027 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret); 2028 2029 return ret; 2030 } 2031 2032 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) 2033 { 2034 u32 mpf_bd_num, pf_bd_num, bd_num; 2035 struct hclge_desc *desc; 2036 int ret; 2037 2038 /* query the number of registers in the RAS int status */ 2039 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num); 2040 if (ret) 2041 return ret; 2042 2043 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2044 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2045 if (!desc) 2046 return -ENOMEM; 2047 2048 /* handle all main PF RAS errors */ 2049 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num); 2050 if (ret) { 2051 kfree(desc); 2052 return ret; 2053 } 2054 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 2055 2056 /* handle all PF RAS errors */ 2057 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num); 2058 kfree(desc); 2059 2060 return ret; 2061 } 2062 2063 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) 2064 { 2065 struct device *dev = &hdev->pdev->dev; 2066 struct hclge_desc desc[3]; 2067 int ret; 2068 2069 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2070 true); 2071 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2072 true); 2073 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2074 true); 2075 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 2076 desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 2077 2078 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); 2079 if (ret) { 2080 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); 2081 return ret; 2082 } 2083 2084 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", 2085 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 2086 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 2087 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 2088 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", 2089 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), 2090 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), 2091 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); 2092 dev_err(dev, "AXI3: %08X %08X %08X %08X\n", 2093 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), 2094 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); 2095 2096 return 0; 2097 } 2098 2099 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) 2100 { 2101 struct device *dev = &hdev->pdev->dev; 2102 struct hclge_desc desc[2]; 2103 int ret; 2104 2105 ret = hclge_cmd_query_error(hdev, &desc[0], 2106 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, 2107 HCLGE_CMD_FLAG_NEXT); 2108 if (ret) { 2109 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); 2110 return ret; 2111 } 2112 2113 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", 2114 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 2115 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 2116 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 2117 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), 2118 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); 2119 2120 return 0; 2121 } 2122 2123 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) 2124 { 2125 struct device *dev = &hdev->pdev->dev; 2126 struct hclge_desc desc[2]; 2127 int ret; 2128 2129 /* read overflow error status */ 2130 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD, 2131 0); 2132 if (ret) { 2133 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret); 2134 return ret; 2135 } 2136 2137 /* log overflow error */ 2138 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2139 const struct hclge_hw_error *err; 2140 u32 err_sts; 2141 2142 err = &hclge_rocee_qmm_ovf_err_int[0]; 2143 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK & 2144 le32_to_cpu(desc[0].data[0]); 2145 while (err->msg) { 2146 if (err->int_msk == err_sts) { 2147 dev_err(dev, "%s [error status=0x%x] found\n", 2148 err->msg, 2149 le32_to_cpu(desc[0].data[0])); 2150 break; 2151 } 2152 err++; 2153 } 2154 } 2155 2156 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2157 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n", 2158 le32_to_cpu(desc[0].data[1])); 2159 } 2160 2161 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2162 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n", 2163 le32_to_cpu(desc[0].data[2])); 2164 } 2165 2166 return 0; 2167 } 2168 2169 static enum hnae3_reset_type 2170 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) 2171 { 2172 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET; 2173 struct device *dev = &hdev->pdev->dev; 2174 struct hclge_desc desc[2]; 2175 unsigned int status; 2176 int ret; 2177 2178 /* read RAS error interrupt status */ 2179 ret = hclge_cmd_query_error(hdev, &desc[0], 2180 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0); 2181 if (ret) { 2182 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret); 2183 /* reset everything for now */ 2184 return HNAE3_GLOBAL_RESET; 2185 } 2186 2187 status = le32_to_cpu(desc[0].data[0]); 2188 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { 2189 if (status & HCLGE_ROCEE_RERR_INT_MASK) 2190 dev_err(dev, "ROCEE RAS AXI rresp error\n"); 2191 2192 if (status & HCLGE_ROCEE_BERR_INT_MASK) 2193 dev_err(dev, "ROCEE RAS AXI bresp error\n"); 2194 2195 reset_type = HNAE3_FUNC_RESET; 2196 2197 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR); 2198 2199 ret = hclge_log_rocee_axi_error(hdev); 2200 if (ret) 2201 return HNAE3_GLOBAL_RESET; 2202 } 2203 2204 if (status & HCLGE_ROCEE_ECC_INT_MASK) { 2205 dev_err(dev, "ROCEE RAS 2bit ECC error\n"); 2206 reset_type = HNAE3_GLOBAL_RESET; 2207 2208 ret = hclge_log_rocee_ecc_error(hdev); 2209 if (ret) 2210 return HNAE3_GLOBAL_RESET; 2211 } 2212 2213 if (status & HCLGE_ROCEE_OVF_INT_MASK) { 2214 ret = hclge_log_rocee_ovf_error(hdev); 2215 if (ret) { 2216 dev_err(dev, "failed(%d) to process ovf error\n", ret); 2217 /* reset everything for now */ 2218 return HNAE3_GLOBAL_RESET; 2219 } 2220 } 2221 2222 /* clear error status */ 2223 hclge_cmd_reuse_desc(&desc[0], false); 2224 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1); 2225 if (ret) { 2226 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret); 2227 /* reset everything for now */ 2228 return HNAE3_GLOBAL_RESET; 2229 } 2230 2231 return reset_type; 2232 } 2233 2234 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) 2235 { 2236 struct device *dev = &hdev->pdev->dev; 2237 struct hclge_desc desc; 2238 int ret; 2239 2240 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 || 2241 !hnae3_dev_roce_supported(hdev)) 2242 return 0; 2243 2244 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false); 2245 if (en) { 2246 /* enable ROCEE hw error interrupts */ 2247 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN); 2248 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN); 2249 2250 hclge_log_and_clear_rocee_ras_error(hdev); 2251 } 2252 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK); 2253 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK); 2254 2255 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2256 if (ret) 2257 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret); 2258 2259 return ret; 2260 } 2261 2262 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev) 2263 { 2264 struct hclge_dev *hdev = ae_dev->priv; 2265 enum hnae3_reset_type reset_type; 2266 2267 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) 2268 return; 2269 2270 reset_type = hclge_log_and_clear_rocee_ras_error(hdev); 2271 if (reset_type != HNAE3_NONE_RESET) 2272 set_bit(reset_type, &ae_dev->hw_err_reset_req); 2273 } 2274 2275 static const struct hclge_hw_blk hw_blk[] = { 2276 { 2277 .msk = BIT(0), 2278 .name = "IGU_EGU", 2279 .config_err_int = hclge_config_igu_egu_hw_err_int, 2280 }, { 2281 .msk = BIT(1), 2282 .name = "PPP", 2283 .config_err_int = hclge_config_ppp_hw_err_int, 2284 }, { 2285 .msk = BIT(2), 2286 .name = "SSU", 2287 .config_err_int = hclge_config_ssu_hw_err_int, 2288 }, { 2289 .msk = BIT(3), 2290 .name = "PPU", 2291 .config_err_int = hclge_config_ppu_hw_err_int, 2292 }, { 2293 .msk = BIT(4), 2294 .name = "TM", 2295 .config_err_int = hclge_config_tm_hw_err_int, 2296 }, { 2297 .msk = BIT(5), 2298 .name = "COMMON", 2299 .config_err_int = hclge_config_common_hw_err_int, 2300 }, { 2301 .msk = BIT(8), 2302 .name = "MAC", 2303 .config_err_int = hclge_config_mac_err_int, 2304 }, { 2305 /* sentinel */ 2306 } 2307 }; 2308 2309 static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable) 2310 { 2311 u32 reg_val; 2312 2313 reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG); 2314 2315 if (enable) 2316 reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 2317 else 2318 reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 2319 2320 hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val); 2321 } 2322 2323 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) 2324 { 2325 const struct hclge_hw_blk *module = hw_blk; 2326 int ret = 0; 2327 2328 hclge_config_all_msix_error(hdev, state); 2329 2330 while (module->name) { 2331 if (module->config_err_int) { 2332 ret = module->config_err_int(hdev, state); 2333 if (ret) 2334 return ret; 2335 } 2336 module++; 2337 } 2338 2339 return ret; 2340 } 2341 2342 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) 2343 { 2344 struct hclge_dev *hdev = ae_dev->priv; 2345 struct device *dev = &hdev->pdev->dev; 2346 u32 status; 2347 2348 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 2349 dev_err(dev, 2350 "Can't recover - RAS error reported during dev init\n"); 2351 return PCI_ERS_RESULT_NONE; 2352 } 2353 2354 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 2355 if (status & HCLGE_RAS_REG_NFE_MASK || 2356 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) 2357 ae_dev->hw_err_reset_req = 0; 2358 else 2359 goto out; 2360 2361 /* Handling Non-fatal HNS RAS errors */ 2362 if (status & HCLGE_RAS_REG_NFE_MASK) { 2363 dev_err(dev, 2364 "HNS Non-Fatal RAS error(status=0x%x) identified\n", 2365 status); 2366 hclge_handle_all_ras_errors(hdev); 2367 } 2368 2369 /* Handling Non-fatal Rocee RAS errors */ 2370 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 && 2371 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { 2372 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n"); 2373 hclge_handle_rocee_ras_error(ae_dev); 2374 } 2375 2376 if (ae_dev->hw_err_reset_req) 2377 return PCI_ERS_RESULT_NEED_RESET; 2378 2379 out: 2380 return PCI_ERS_RESULT_RECOVERED; 2381 } 2382 2383 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev, 2384 struct hclge_desc *desc, bool is_mpf, 2385 u32 bd_num) 2386 { 2387 if (is_mpf) 2388 desc[0].opcode = 2389 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT); 2390 else 2391 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT); 2392 2393 desc[0].flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN); 2394 2395 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num); 2396 } 2397 2398 /* hclge_query_8bd_info: query information about over_8bd_nfe_err 2399 * @hdev: pointer to struct hclge_dev 2400 * @vf_id: Index of the virtual function with error 2401 * @q_id: Physical index of the queue with error 2402 * 2403 * This function get specific index of queue and function which causes 2404 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is 2405 * caused by PF instead of VF. 2406 */ 2407 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id, 2408 u16 *q_id) 2409 { 2410 struct hclge_query_ppu_pf_other_int_dfx_cmd *req; 2411 struct hclge_desc desc; 2412 int ret; 2413 2414 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true); 2415 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2416 if (ret) 2417 return ret; 2418 2419 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data; 2420 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id); 2421 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid); 2422 2423 return 0; 2424 } 2425 2426 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err 2427 * @hdev: pointer to struct hclge_dev 2428 * @reset_requests: reset level that we need to trigger later 2429 * 2430 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in 2431 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed. 2432 */ 2433 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, 2434 unsigned long *reset_requests) 2435 { 2436 struct device *dev = &hdev->pdev->dev; 2437 u16 vf_id; 2438 u16 q_id; 2439 int ret; 2440 2441 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id); 2442 if (ret) { 2443 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n", 2444 ret); 2445 return; 2446 } 2447 2448 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%u), queue_id(%u)\n", 2449 vf_id, q_id); 2450 2451 if (vf_id) { 2452 if (vf_id >= hdev->num_alloc_vport) { 2453 dev_err(dev, "invalid vf id(%u)\n", vf_id); 2454 return; 2455 } 2456 2457 /* If we need to trigger other reset whose level is higher 2458 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset 2459 * here. 2460 */ 2461 if (*reset_requests != 0) 2462 return; 2463 2464 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]); 2465 if (ret) 2466 dev_err(dev, "inform reset to vf(%u) failed %d!\n", 2467 hdev->vport->vport_id, ret); 2468 } else { 2469 set_bit(HNAE3_FUNC_RESET, reset_requests); 2470 } 2471 } 2472 2473 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors 2474 * @hdev: pointer to struct hclge_dev 2475 * @desc: descriptor for describing the command 2476 * @mpf_bd_num: number of extended command structures 2477 * @reset_requests: record of the reset level that we need 2478 * 2479 * This function handles all the main PF MSI-X errors in the hw register/s 2480 * using command. 2481 */ 2482 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev, 2483 struct hclge_desc *desc, 2484 int mpf_bd_num, 2485 unsigned long *reset_requests) 2486 { 2487 struct device *dev = &hdev->pdev->dev; 2488 __le32 *desc_data; 2489 u32 status; 2490 int ret; 2491 /* query all main PF MSIx errors */ 2492 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, 2493 true); 2494 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); 2495 if (ret) { 2496 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret); 2497 return ret; 2498 } 2499 2500 /* log MAC errors */ 2501 desc_data = (__le32 *)&desc[1]; 2502 status = le32_to_cpu(*desc_data); 2503 if (status) 2504 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", 2505 &hclge_mac_afifo_tnl_int[0], status, 2506 reset_requests); 2507 2508 /* log PPU(RCB) MPF errors */ 2509 desc_data = (__le32 *)&desc[5]; 2510 status = le32_to_cpu(*(desc_data + 2)) & 2511 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; 2512 if (status) 2513 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]", 2514 status); 2515 2516 /* clear all main PF MSIx errors */ 2517 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 2518 if (ret) 2519 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret); 2520 2521 return ret; 2522 } 2523 2524 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors 2525 * @hdev: pointer to struct hclge_dev 2526 * @desc: descriptor for describing the command 2527 * @mpf_bd_num: number of extended command structures 2528 * @reset_requests: record of the reset level that we need 2529 * 2530 * This function handles all the PF MSI-X errors in the hw register/s using 2531 * command. 2532 */ 2533 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev, 2534 struct hclge_desc *desc, 2535 int pf_bd_num, 2536 unsigned long *reset_requests) 2537 { 2538 struct device *dev = &hdev->pdev->dev; 2539 __le32 *desc_data; 2540 u32 status; 2541 int ret; 2542 2543 /* query all PF MSIx errors */ 2544 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, 2545 true); 2546 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); 2547 if (ret) { 2548 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret); 2549 return ret; 2550 } 2551 2552 /* log SSU PF errors */ 2553 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; 2554 if (status) 2555 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 2556 &hclge_ssu_port_based_pf_int[0], 2557 status, reset_requests); 2558 2559 /* read and log PPP PF errors */ 2560 desc_data = (__le32 *)&desc[2]; 2561 status = le32_to_cpu(*desc_data); 2562 if (status) 2563 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", 2564 &hclge_ppp_pf_abnormal_int[0], 2565 status, reset_requests); 2566 2567 /* log PPU(RCB) PF errors */ 2568 desc_data = (__le32 *)&desc[3]; 2569 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; 2570 if (status) 2571 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", 2572 &hclge_ppu_pf_abnormal_int[0], 2573 status, reset_requests); 2574 2575 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK; 2576 if (status) 2577 hclge_handle_over_8bd_err(hdev, reset_requests); 2578 2579 /* clear all PF MSIx errors */ 2580 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 2581 if (ret) 2582 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret); 2583 2584 return ret; 2585 } 2586 2587 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, 2588 unsigned long *reset_requests) 2589 { 2590 u32 mpf_bd_num, pf_bd_num, bd_num; 2591 struct hclge_desc *desc; 2592 int ret; 2593 2594 /* query the number of bds for the MSIx int status */ 2595 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 2596 if (ret) 2597 goto out; 2598 2599 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2600 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2601 if (!desc) 2602 return -ENOMEM; 2603 2604 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num, 2605 reset_requests); 2606 if (ret) 2607 goto msi_error; 2608 2609 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 2610 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests); 2611 if (ret) 2612 goto msi_error; 2613 2614 ret = hclge_handle_mac_tnl(hdev); 2615 2616 msi_error: 2617 kfree(desc); 2618 out: 2619 return ret; 2620 } 2621 2622 int hclge_handle_hw_msix_error(struct hclge_dev *hdev, 2623 unsigned long *reset_requests) 2624 { 2625 struct device *dev = &hdev->pdev->dev; 2626 2627 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 2628 dev_err(dev, 2629 "failed to handle msix error during dev init\n"); 2630 return -EAGAIN; 2631 } 2632 2633 return hclge_handle_all_hw_msix_error(hdev, reset_requests); 2634 } 2635 2636 int hclge_handle_mac_tnl(struct hclge_dev *hdev) 2637 { 2638 struct hclge_mac_tnl_stats mac_tnl_stats; 2639 struct device *dev = &hdev->pdev->dev; 2640 struct hclge_desc desc; 2641 u32 status; 2642 int ret; 2643 2644 /* query and clear mac tnl interruptions */ 2645 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true); 2646 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2647 if (ret) { 2648 dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret); 2649 return ret; 2650 } 2651 2652 status = le32_to_cpu(desc.data[0]); 2653 if (status) { 2654 /* When mac tnl interrupt occurs, we record current time and 2655 * register status here in a fifo, then clear the status. So 2656 * that if link status changes suddenly at some time, we can 2657 * query them by debugfs. 2658 */ 2659 mac_tnl_stats.time = local_clock(); 2660 mac_tnl_stats.status = status; 2661 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats); 2662 ret = hclge_clear_mac_tnl_int(hdev); 2663 if (ret) 2664 dev_err(dev, "failed to clear mac tnl int, ret = %d.\n", 2665 ret); 2666 } 2667 2668 return ret; 2669 } 2670 2671 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) 2672 { 2673 struct hclge_dev *hdev = ae_dev->priv; 2674 struct device *dev = &hdev->pdev->dev; 2675 u32 mpf_bd_num, pf_bd_num, bd_num; 2676 struct hclge_desc *desc; 2677 u32 status; 2678 int ret; 2679 2680 ae_dev->hw_err_reset_req = 0; 2681 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 2682 2683 /* query the number of bds for the MSIx int status */ 2684 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 2685 if (ret) 2686 return; 2687 2688 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2689 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2690 if (!desc) 2691 return; 2692 2693 /* Clear HNS hw errors reported through msix */ 2694 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) - 2695 HCLGE_DESC_NO_DATA_LEN); 2696 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 2697 if (ret) { 2698 dev_err(dev, "fail(%d) to clear mpf msix int during init\n", 2699 ret); 2700 goto msi_error; 2701 } 2702 2703 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) - 2704 HCLGE_DESC_NO_DATA_LEN); 2705 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 2706 if (ret) { 2707 dev_err(dev, "fail(%d) to clear pf msix int during init\n", 2708 ret); 2709 goto msi_error; 2710 } 2711 2712 /* Handle Non-fatal HNS RAS errors */ 2713 if (status & HCLGE_RAS_REG_NFE_MASK) { 2714 dev_err(dev, "HNS hw error(RAS) identified during init\n"); 2715 hclge_handle_all_ras_errors(hdev); 2716 } 2717 2718 msi_error: 2719 kfree(desc); 2720 } 2721 2722 bool hclge_find_error_source(struct hclge_dev *hdev) 2723 { 2724 u32 msix_src_flag, hw_err_src_flag; 2725 2726 msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) & 2727 HCLGE_VECTOR0_REG_MSIX_MASK; 2728 2729 hw_err_src_flag = hclge_read_dev(&hdev->hw, 2730 HCLGE_RAS_PF_OTHER_INT_STS_REG) & 2731 HCLGE_RAS_REG_ERR_MASK; 2732 2733 return msix_src_flag || hw_err_src_flag; 2734 } 2735 2736 void hclge_handle_occurred_error(struct hclge_dev *hdev) 2737 { 2738 struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); 2739 2740 if (hclge_find_error_source(hdev)) 2741 hclge_handle_error_info_log(ae_dev); 2742 } 2743 2744 static void 2745 hclge_handle_error_type_reg_log(struct device *dev, 2746 struct hclge_mod_err_info *mod_info, 2747 struct hclge_type_reg_err_info *type_reg_info) 2748 { 2749 #define HCLGE_ERR_TYPE_MASK 0x7F 2750 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7 2751 2752 u8 mod_id, total_module, type_id, total_type, i, is_ras; 2753 u8 index_module = MODULE_NONE; 2754 u8 index_type = NONE_ERROR; 2755 2756 mod_id = mod_info->mod_id; 2757 type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK; 2758 is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET; 2759 2760 total_module = ARRAY_SIZE(hclge_hw_module_id_st); 2761 total_type = ARRAY_SIZE(hclge_hw_type_id_st); 2762 2763 for (i = 0; i < total_module; i++) { 2764 if (mod_id == hclge_hw_module_id_st[i].module_id) { 2765 index_module = i; 2766 break; 2767 } 2768 } 2769 2770 for (i = 0; i < total_type; i++) { 2771 if (type_id == hclge_hw_type_id_st[i].type_id) { 2772 index_type = i; 2773 break; 2774 } 2775 } 2776 2777 if (index_module != MODULE_NONE && index_type != NONE_ERROR) 2778 dev_err(dev, 2779 "found %s %s, is %s error.\n", 2780 hclge_hw_module_id_st[index_module].msg, 2781 hclge_hw_type_id_st[index_type].msg, 2782 is_ras ? "ras" : "msix"); 2783 else 2784 dev_err(dev, 2785 "unknown module[%u] or type[%u].\n", mod_id, type_id); 2786 2787 dev_err(dev, "reg_value:\n"); 2788 for (i = 0; i < type_reg_info->reg_num; i++) 2789 dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]); 2790 } 2791 2792 static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, 2793 const u32 *buf, u32 buf_size) 2794 { 2795 struct hclge_type_reg_err_info *type_reg_info; 2796 struct hclge_dev *hdev = ae_dev->priv; 2797 struct device *dev = &hdev->pdev->dev; 2798 struct hclge_mod_err_info *mod_info; 2799 struct hclge_sum_err_info *sum_info; 2800 u8 mod_num, err_num, i; 2801 u32 offset = 0; 2802 2803 sum_info = (struct hclge_sum_err_info *)&buf[offset++]; 2804 if (sum_info->reset_type && 2805 sum_info->reset_type != HNAE3_NONE_RESET) 2806 set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req); 2807 mod_num = sum_info->mod_num; 2808 2809 while (mod_num--) { 2810 if (offset >= buf_size) { 2811 dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n", 2812 offset, buf_size); 2813 return; 2814 } 2815 mod_info = (struct hclge_mod_err_info *)&buf[offset++]; 2816 err_num = mod_info->err_num; 2817 2818 for (i = 0; i < err_num; i++) { 2819 if (offset >= buf_size) { 2820 dev_err(dev, 2821 "The offset(%u) exceeds buf size(%u).\n", 2822 offset, buf_size); 2823 return; 2824 } 2825 2826 type_reg_info = (struct hclge_type_reg_err_info *) 2827 &buf[offset++]; 2828 hclge_handle_error_type_reg_log(dev, mod_info, 2829 type_reg_info); 2830 2831 offset += type_reg_info->reg_num; 2832 } 2833 } 2834 } 2835 2836 static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num) 2837 { 2838 struct device *dev = &hdev->pdev->dev; 2839 struct hclge_desc desc_bd; 2840 int ret; 2841 2842 hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true); 2843 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 2844 if (ret) { 2845 dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret); 2846 return ret; 2847 } 2848 2849 *bd_num = le32_to_cpu(desc_bd.data[0]); 2850 if (!(*bd_num)) { 2851 dev_err(dev, "The value of bd_num is 0!\n"); 2852 return -EINVAL; 2853 } 2854 2855 return 0; 2856 } 2857 2858 static int hclge_query_all_err_info(struct hclge_dev *hdev, 2859 struct hclge_desc *desc, u32 bd_num) 2860 { 2861 struct device *dev = &hdev->pdev->dev; 2862 int ret; 2863 2864 hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true); 2865 ret = hclge_cmd_send(&hdev->hw, desc, bd_num); 2866 if (ret) 2867 dev_err(dev, "failed to query error info, ret = %d.\n", ret); 2868 2869 return ret; 2870 } 2871 2872 int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev) 2873 { 2874 u32 bd_num, desc_len, buf_len, buf_size, i; 2875 struct hclge_dev *hdev = ae_dev->priv; 2876 struct hclge_desc *desc; 2877 __le32 *desc_data; 2878 u32 *buf; 2879 int ret; 2880 2881 ret = hclge_query_all_err_bd_num(hdev, &bd_num); 2882 if (ret) 2883 goto out; 2884 2885 desc_len = bd_num * sizeof(struct hclge_desc); 2886 desc = kzalloc(desc_len, GFP_KERNEL); 2887 if (!desc) { 2888 ret = -ENOMEM; 2889 goto out; 2890 } 2891 2892 ret = hclge_query_all_err_info(hdev, desc, bd_num); 2893 if (ret) 2894 goto err_desc; 2895 2896 buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN; 2897 buf_size = buf_len / sizeof(u32); 2898 2899 desc_data = kzalloc(buf_len, GFP_KERNEL); 2900 if (!desc_data) { 2901 ret = -ENOMEM; 2902 goto err_desc; 2903 } 2904 2905 buf = kzalloc(buf_len, GFP_KERNEL); 2906 if (!buf) { 2907 ret = -ENOMEM; 2908 goto err_buf_alloc; 2909 } 2910 2911 memcpy(desc_data, &desc[0].data[0], buf_len); 2912 for (i = 0; i < buf_size; i++) 2913 buf[i] = le32_to_cpu(desc_data[i]); 2914 2915 hclge_handle_error_module_log(ae_dev, buf, buf_size); 2916 kfree(buf); 2917 2918 err_buf_alloc: 2919 kfree(desc_data); 2920 err_desc: 2921 kfree(desc); 2922 out: 2923 return ret; 2924 } 2925