1 // SPDX-License-Identifier: GPL-2.0+ 2 /* Copyright (c) 2016-2017 Hisilicon Limited. */ 3 4 #include <linux/sched/clock.h> 5 6 #include "hclge_err.h" 7 8 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { 9 { 10 .int_msk = BIT(1), 11 .msg = "imp_itcm0_ecc_mbit_err", 12 .reset_level = HNAE3_NONE_RESET 13 }, { 14 .int_msk = BIT(3), 15 .msg = "imp_itcm1_ecc_mbit_err", 16 .reset_level = HNAE3_NONE_RESET 17 }, { 18 .int_msk = BIT(5), 19 .msg = "imp_itcm2_ecc_mbit_err", 20 .reset_level = HNAE3_NONE_RESET 21 }, { 22 .int_msk = BIT(7), 23 .msg = "imp_itcm3_ecc_mbit_err", 24 .reset_level = HNAE3_NONE_RESET 25 }, { 26 .int_msk = BIT(9), 27 .msg = "imp_dtcm0_mem0_ecc_mbit_err", 28 .reset_level = HNAE3_NONE_RESET 29 }, { 30 .int_msk = BIT(11), 31 .msg = "imp_dtcm0_mem1_ecc_mbit_err", 32 .reset_level = HNAE3_NONE_RESET 33 }, { 34 .int_msk = BIT(13), 35 .msg = "imp_dtcm1_mem0_ecc_mbit_err", 36 .reset_level = HNAE3_NONE_RESET 37 }, { 38 .int_msk = BIT(15), 39 .msg = "imp_dtcm1_mem1_ecc_mbit_err", 40 .reset_level = HNAE3_NONE_RESET 41 }, { 42 .int_msk = BIT(17), 43 .msg = "imp_itcm4_ecc_mbit_err", 44 .reset_level = HNAE3_NONE_RESET 45 }, { 46 /* sentinel */ 47 } 48 }; 49 50 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { 51 { 52 .int_msk = BIT(1), 53 .msg = "cmdq_nic_rx_depth_ecc_mbit_err", 54 .reset_level = HNAE3_NONE_RESET 55 }, { 56 .int_msk = BIT(3), 57 .msg = "cmdq_nic_tx_depth_ecc_mbit_err", 58 .reset_level = HNAE3_NONE_RESET 59 }, { 60 .int_msk = BIT(5), 61 .msg = "cmdq_nic_rx_tail_ecc_mbit_err", 62 .reset_level = HNAE3_NONE_RESET 63 }, { 64 .int_msk = BIT(7), 65 .msg = "cmdq_nic_tx_tail_ecc_mbit_err", 66 .reset_level = HNAE3_NONE_RESET 67 }, { 68 .int_msk = BIT(9), 69 .msg = "cmdq_nic_rx_head_ecc_mbit_err", 70 .reset_level = HNAE3_NONE_RESET 71 }, { 72 .int_msk = BIT(11), 73 .msg = "cmdq_nic_tx_head_ecc_mbit_err", 74 .reset_level = HNAE3_NONE_RESET 75 }, { 76 .int_msk = BIT(13), 77 .msg = "cmdq_nic_rx_addr_ecc_mbit_err", 78 .reset_level = HNAE3_NONE_RESET 79 }, { 80 .int_msk = BIT(15), 81 .msg = "cmdq_nic_tx_addr_ecc_mbit_err", 82 .reset_level = HNAE3_NONE_RESET 83 }, { 84 .int_msk = BIT(17), 85 .msg = "cmdq_rocee_rx_depth_ecc_mbit_err", 86 .reset_level = HNAE3_NONE_RESET 87 }, { 88 .int_msk = BIT(19), 89 .msg = "cmdq_rocee_tx_depth_ecc_mbit_err", 90 .reset_level = HNAE3_NONE_RESET 91 }, { 92 .int_msk = BIT(21), 93 .msg = "cmdq_rocee_rx_tail_ecc_mbit_err", 94 .reset_level = HNAE3_NONE_RESET 95 }, { 96 .int_msk = BIT(23), 97 .msg = "cmdq_rocee_tx_tail_ecc_mbit_err", 98 .reset_level = HNAE3_NONE_RESET 99 }, { 100 .int_msk = BIT(25), 101 .msg = "cmdq_rocee_rx_head_ecc_mbit_err", 102 .reset_level = HNAE3_NONE_RESET 103 }, { 104 .int_msk = BIT(27), 105 .msg = "cmdq_rocee_tx_head_ecc_mbit_err", 106 .reset_level = HNAE3_NONE_RESET 107 }, { 108 .int_msk = BIT(29), 109 .msg = "cmdq_rocee_rx_addr_ecc_mbit_err", 110 .reset_level = HNAE3_NONE_RESET 111 }, { 112 .int_msk = BIT(31), 113 .msg = "cmdq_rocee_tx_addr_ecc_mbit_err", 114 .reset_level = HNAE3_NONE_RESET 115 }, { 116 /* sentinel */ 117 } 118 }; 119 120 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { 121 { 122 .int_msk = BIT(6), 123 .msg = "tqp_int_cfg_even_ecc_mbit_err", 124 .reset_level = HNAE3_NONE_RESET 125 }, { 126 .int_msk = BIT(7), 127 .msg = "tqp_int_cfg_odd_ecc_mbit_err", 128 .reset_level = HNAE3_NONE_RESET 129 }, { 130 .int_msk = BIT(8), 131 .msg = "tqp_int_ctrl_even_ecc_mbit_err", 132 .reset_level = HNAE3_NONE_RESET 133 }, { 134 .int_msk = BIT(9), 135 .msg = "tqp_int_ctrl_odd_ecc_mbit_err", 136 .reset_level = HNAE3_NONE_RESET 137 }, { 138 .int_msk = BIT(10), 139 .msg = "tx_que_scan_int_ecc_mbit_err", 140 .reset_level = HNAE3_NONE_RESET 141 }, { 142 .int_msk = BIT(11), 143 .msg = "rx_que_scan_int_ecc_mbit_err", 144 .reset_level = HNAE3_NONE_RESET 145 }, { 146 /* sentinel */ 147 } 148 }; 149 150 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = { 151 { 152 .int_msk = BIT(1), 153 .msg = "msix_nic_ecc_mbit_err", 154 .reset_level = HNAE3_NONE_RESET 155 }, { 156 .int_msk = BIT(3), 157 .msg = "msix_rocee_ecc_mbit_err", 158 .reset_level = HNAE3_NONE_RESET 159 }, { 160 /* sentinel */ 161 } 162 }; 163 164 static const struct hclge_hw_error hclge_igu_int[] = { 165 { 166 .int_msk = BIT(0), 167 .msg = "igu_rx_buf0_ecc_mbit_err", 168 .reset_level = HNAE3_GLOBAL_RESET 169 }, { 170 .int_msk = BIT(2), 171 .msg = "igu_rx_buf1_ecc_mbit_err", 172 .reset_level = HNAE3_GLOBAL_RESET 173 }, { 174 /* sentinel */ 175 } 176 }; 177 178 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { 179 { 180 .int_msk = BIT(0), 181 .msg = "rx_buf_overflow", 182 .reset_level = HNAE3_GLOBAL_RESET 183 }, { 184 .int_msk = BIT(1), 185 .msg = "rx_stp_fifo_overflow", 186 .reset_level = HNAE3_GLOBAL_RESET 187 }, { 188 .int_msk = BIT(2), 189 .msg = "rx_stp_fifo_underflow", 190 .reset_level = HNAE3_GLOBAL_RESET 191 }, { 192 .int_msk = BIT(3), 193 .msg = "tx_buf_overflow", 194 .reset_level = HNAE3_GLOBAL_RESET 195 }, { 196 .int_msk = BIT(4), 197 .msg = "tx_buf_underrun", 198 .reset_level = HNAE3_GLOBAL_RESET 199 }, { 200 .int_msk = BIT(5), 201 .msg = "rx_stp_buf_overflow", 202 .reset_level = HNAE3_GLOBAL_RESET 203 }, { 204 /* sentinel */ 205 } 206 }; 207 208 static const struct hclge_hw_error hclge_ncsi_err_int[] = { 209 { 210 .int_msk = BIT(1), 211 .msg = "ncsi_tx_ecc_mbit_err", 212 .reset_level = HNAE3_NONE_RESET 213 }, { 214 /* sentinel */ 215 } 216 }; 217 218 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = { 219 { 220 .int_msk = BIT(0), 221 .msg = "vf_vlan_ad_mem_ecc_mbit_err", 222 .reset_level = HNAE3_GLOBAL_RESET 223 }, { 224 .int_msk = BIT(1), 225 .msg = "umv_mcast_group_mem_ecc_mbit_err", 226 .reset_level = HNAE3_GLOBAL_RESET 227 }, { 228 .int_msk = BIT(2), 229 .msg = "umv_key_mem0_ecc_mbit_err", 230 .reset_level = HNAE3_GLOBAL_RESET 231 }, { 232 .int_msk = BIT(3), 233 .msg = "umv_key_mem1_ecc_mbit_err", 234 .reset_level = HNAE3_GLOBAL_RESET 235 }, { 236 .int_msk = BIT(4), 237 .msg = "umv_key_mem2_ecc_mbit_err", 238 .reset_level = HNAE3_GLOBAL_RESET 239 }, { 240 .int_msk = BIT(5), 241 .msg = "umv_key_mem3_ecc_mbit_err", 242 .reset_level = HNAE3_GLOBAL_RESET 243 }, { 244 .int_msk = BIT(6), 245 .msg = "umv_ad_mem_ecc_mbit_err", 246 .reset_level = HNAE3_GLOBAL_RESET 247 }, { 248 .int_msk = BIT(7), 249 .msg = "rss_tc_mode_mem_ecc_mbit_err", 250 .reset_level = HNAE3_GLOBAL_RESET 251 }, { 252 .int_msk = BIT(8), 253 .msg = "rss_idt_mem0_ecc_mbit_err", 254 .reset_level = HNAE3_GLOBAL_RESET 255 }, { 256 .int_msk = BIT(9), 257 .msg = "rss_idt_mem1_ecc_mbit_err", 258 .reset_level = HNAE3_GLOBAL_RESET 259 }, { 260 .int_msk = BIT(10), 261 .msg = "rss_idt_mem2_ecc_mbit_err", 262 .reset_level = HNAE3_GLOBAL_RESET 263 }, { 264 .int_msk = BIT(11), 265 .msg = "rss_idt_mem3_ecc_mbit_err", 266 .reset_level = HNAE3_GLOBAL_RESET 267 }, { 268 .int_msk = BIT(12), 269 .msg = "rss_idt_mem4_ecc_mbit_err", 270 .reset_level = HNAE3_GLOBAL_RESET 271 }, { 272 .int_msk = BIT(13), 273 .msg = "rss_idt_mem5_ecc_mbit_err", 274 .reset_level = HNAE3_GLOBAL_RESET 275 }, { 276 .int_msk = BIT(14), 277 .msg = "rss_idt_mem6_ecc_mbit_err", 278 .reset_level = HNAE3_GLOBAL_RESET 279 }, { 280 .int_msk = BIT(15), 281 .msg = "rss_idt_mem7_ecc_mbit_err", 282 .reset_level = HNAE3_GLOBAL_RESET 283 }, { 284 .int_msk = BIT(16), 285 .msg = "rss_idt_mem8_ecc_mbit_err", 286 .reset_level = HNAE3_GLOBAL_RESET 287 }, { 288 .int_msk = BIT(17), 289 .msg = "rss_idt_mem9_ecc_mbit_err", 290 .reset_level = HNAE3_GLOBAL_RESET 291 }, { 292 .int_msk = BIT(18), 293 .msg = "rss_idt_mem10_ecc_mbit_err", 294 .reset_level = HNAE3_GLOBAL_RESET 295 }, { 296 .int_msk = BIT(19), 297 .msg = "rss_idt_mem11_ecc_mbit_err", 298 .reset_level = HNAE3_GLOBAL_RESET 299 }, { 300 .int_msk = BIT(20), 301 .msg = "rss_idt_mem12_ecc_mbit_err", 302 .reset_level = HNAE3_GLOBAL_RESET 303 }, { 304 .int_msk = BIT(21), 305 .msg = "rss_idt_mem13_ecc_mbit_err", 306 .reset_level = HNAE3_GLOBAL_RESET 307 }, { 308 .int_msk = BIT(22), 309 .msg = "rss_idt_mem14_ecc_mbit_err", 310 .reset_level = HNAE3_GLOBAL_RESET 311 }, { 312 .int_msk = BIT(23), 313 .msg = "rss_idt_mem15_ecc_mbit_err", 314 .reset_level = HNAE3_GLOBAL_RESET 315 }, { 316 .int_msk = BIT(24), 317 .msg = "port_vlan_mem_ecc_mbit_err", 318 .reset_level = HNAE3_GLOBAL_RESET 319 }, { 320 .int_msk = BIT(25), 321 .msg = "mcast_linear_table_mem_ecc_mbit_err", 322 .reset_level = HNAE3_GLOBAL_RESET 323 }, { 324 .int_msk = BIT(26), 325 .msg = "mcast_result_mem_ecc_mbit_err", 326 .reset_level = HNAE3_GLOBAL_RESET 327 }, { 328 .int_msk = BIT(27), 329 .msg = "flow_director_ad_mem0_ecc_mbit_err", 330 .reset_level = HNAE3_GLOBAL_RESET 331 }, { 332 .int_msk = BIT(28), 333 .msg = "flow_director_ad_mem1_ecc_mbit_err", 334 .reset_level = HNAE3_GLOBAL_RESET 335 }, { 336 .int_msk = BIT(29), 337 .msg = "rx_vlan_tag_memory_ecc_mbit_err", 338 .reset_level = HNAE3_GLOBAL_RESET 339 }, { 340 .int_msk = BIT(30), 341 .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err", 342 .reset_level = HNAE3_GLOBAL_RESET 343 }, { 344 /* sentinel */ 345 } 346 }; 347 348 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = { 349 { 350 .int_msk = BIT(0), 351 .msg = "tx_vlan_tag_err", 352 .reset_level = HNAE3_NONE_RESET 353 }, { 354 .int_msk = BIT(1), 355 .msg = "rss_list_tc_unassigned_queue_err", 356 .reset_level = HNAE3_NONE_RESET 357 }, { 358 /* sentinel */ 359 } 360 }; 361 362 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = { 363 { 364 .int_msk = BIT(0), 365 .msg = "hfs_fifo_mem_ecc_mbit_err", 366 .reset_level = HNAE3_GLOBAL_RESET 367 }, { 368 .int_msk = BIT(1), 369 .msg = "rslt_descr_fifo_mem_ecc_mbit_err", 370 .reset_level = HNAE3_GLOBAL_RESET 371 }, { 372 .int_msk = BIT(2), 373 .msg = "tx_vlan_tag_mem_ecc_mbit_err", 374 .reset_level = HNAE3_GLOBAL_RESET 375 }, { 376 .int_msk = BIT(3), 377 .msg = "FD_CN0_memory_ecc_mbit_err", 378 .reset_level = HNAE3_GLOBAL_RESET 379 }, { 380 .int_msk = BIT(4), 381 .msg = "FD_CN1_memory_ecc_mbit_err", 382 .reset_level = HNAE3_GLOBAL_RESET 383 }, { 384 .int_msk = BIT(5), 385 .msg = "GRO_AD_memory_ecc_mbit_err", 386 .reset_level = HNAE3_GLOBAL_RESET 387 }, { 388 /* sentinel */ 389 } 390 }; 391 392 static const struct hclge_hw_error hclge_tm_sch_rint[] = { 393 { 394 .int_msk = BIT(1), 395 .msg = "tm_sch_ecc_mbit_err", 396 .reset_level = HNAE3_GLOBAL_RESET 397 }, { 398 .int_msk = BIT(2), 399 .msg = "tm_sch_port_shap_sub_fifo_wr_err", 400 .reset_level = HNAE3_GLOBAL_RESET 401 }, { 402 .int_msk = BIT(3), 403 .msg = "tm_sch_port_shap_sub_fifo_rd_err", 404 .reset_level = HNAE3_GLOBAL_RESET 405 }, { 406 .int_msk = BIT(4), 407 .msg = "tm_sch_pg_pshap_sub_fifo_wr_err", 408 .reset_level = HNAE3_GLOBAL_RESET 409 }, { 410 .int_msk = BIT(5), 411 .msg = "tm_sch_pg_pshap_sub_fifo_rd_err", 412 .reset_level = HNAE3_GLOBAL_RESET 413 }, { 414 .int_msk = BIT(6), 415 .msg = "tm_sch_pg_cshap_sub_fifo_wr_err", 416 .reset_level = HNAE3_GLOBAL_RESET 417 }, { 418 .int_msk = BIT(7), 419 .msg = "tm_sch_pg_cshap_sub_fifo_rd_err", 420 .reset_level = HNAE3_GLOBAL_RESET 421 }, { 422 .int_msk = BIT(8), 423 .msg = "tm_sch_pri_pshap_sub_fifo_wr_err", 424 .reset_level = HNAE3_GLOBAL_RESET 425 }, { 426 .int_msk = BIT(9), 427 .msg = "tm_sch_pri_pshap_sub_fifo_rd_err", 428 .reset_level = HNAE3_GLOBAL_RESET 429 }, { 430 .int_msk = BIT(10), 431 .msg = "tm_sch_pri_cshap_sub_fifo_wr_err", 432 .reset_level = HNAE3_GLOBAL_RESET 433 }, { 434 .int_msk = BIT(11), 435 .msg = "tm_sch_pri_cshap_sub_fifo_rd_err", 436 .reset_level = HNAE3_GLOBAL_RESET 437 }, { 438 .int_msk = BIT(12), 439 .msg = "tm_sch_port_shap_offset_fifo_wr_err", 440 .reset_level = HNAE3_GLOBAL_RESET 441 }, { 442 .int_msk = BIT(13), 443 .msg = "tm_sch_port_shap_offset_fifo_rd_err", 444 .reset_level = HNAE3_GLOBAL_RESET 445 }, { 446 .int_msk = BIT(14), 447 .msg = "tm_sch_pg_pshap_offset_fifo_wr_err", 448 .reset_level = HNAE3_GLOBAL_RESET 449 }, { 450 .int_msk = BIT(15), 451 .msg = "tm_sch_pg_pshap_offset_fifo_rd_err", 452 .reset_level = HNAE3_GLOBAL_RESET 453 }, { 454 .int_msk = BIT(16), 455 .msg = "tm_sch_pg_cshap_offset_fifo_wr_err", 456 .reset_level = HNAE3_GLOBAL_RESET 457 }, { 458 .int_msk = BIT(17), 459 .msg = "tm_sch_pg_cshap_offset_fifo_rd_err", 460 .reset_level = HNAE3_GLOBAL_RESET 461 }, { 462 .int_msk = BIT(18), 463 .msg = "tm_sch_pri_pshap_offset_fifo_wr_err", 464 .reset_level = HNAE3_GLOBAL_RESET 465 }, { 466 .int_msk = BIT(19), 467 .msg = "tm_sch_pri_pshap_offset_fifo_rd_err", 468 .reset_level = HNAE3_GLOBAL_RESET 469 }, { 470 .int_msk = BIT(20), 471 .msg = "tm_sch_pri_cshap_offset_fifo_wr_err", 472 .reset_level = HNAE3_GLOBAL_RESET 473 }, { 474 .int_msk = BIT(21), 475 .msg = "tm_sch_pri_cshap_offset_fifo_rd_err", 476 .reset_level = HNAE3_GLOBAL_RESET 477 }, { 478 .int_msk = BIT(22), 479 .msg = "tm_sch_rq_fifo_wr_err", 480 .reset_level = HNAE3_GLOBAL_RESET 481 }, { 482 .int_msk = BIT(23), 483 .msg = "tm_sch_rq_fifo_rd_err", 484 .reset_level = HNAE3_GLOBAL_RESET 485 }, { 486 .int_msk = BIT(24), 487 .msg = "tm_sch_nq_fifo_wr_err", 488 .reset_level = HNAE3_GLOBAL_RESET 489 }, { 490 .int_msk = BIT(25), 491 .msg = "tm_sch_nq_fifo_rd_err", 492 .reset_level = HNAE3_GLOBAL_RESET 493 }, { 494 .int_msk = BIT(26), 495 .msg = "tm_sch_roce_up_fifo_wr_err", 496 .reset_level = HNAE3_GLOBAL_RESET 497 }, { 498 .int_msk = BIT(27), 499 .msg = "tm_sch_roce_up_fifo_rd_err", 500 .reset_level = HNAE3_GLOBAL_RESET 501 }, { 502 .int_msk = BIT(28), 503 .msg = "tm_sch_rcb_byte_fifo_wr_err", 504 .reset_level = HNAE3_GLOBAL_RESET 505 }, { 506 .int_msk = BIT(29), 507 .msg = "tm_sch_rcb_byte_fifo_rd_err", 508 .reset_level = HNAE3_GLOBAL_RESET 509 }, { 510 .int_msk = BIT(30), 511 .msg = "tm_sch_ssu_byte_fifo_wr_err", 512 .reset_level = HNAE3_GLOBAL_RESET 513 }, { 514 .int_msk = BIT(31), 515 .msg = "tm_sch_ssu_byte_fifo_rd_err", 516 .reset_level = HNAE3_GLOBAL_RESET 517 }, { 518 /* sentinel */ 519 } 520 }; 521 522 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = { 523 { 524 .int_msk = BIT(0), 525 .msg = "qcn_shap_gp0_sch_fifo_rd_err", 526 .reset_level = HNAE3_GLOBAL_RESET 527 }, { 528 .int_msk = BIT(1), 529 .msg = "qcn_shap_gp0_sch_fifo_wr_err", 530 .reset_level = HNAE3_GLOBAL_RESET 531 }, { 532 .int_msk = BIT(2), 533 .msg = "qcn_shap_gp1_sch_fifo_rd_err", 534 .reset_level = HNAE3_GLOBAL_RESET 535 }, { 536 .int_msk = BIT(3), 537 .msg = "qcn_shap_gp1_sch_fifo_wr_err", 538 .reset_level = HNAE3_GLOBAL_RESET 539 }, { 540 .int_msk = BIT(4), 541 .msg = "qcn_shap_gp2_sch_fifo_rd_err", 542 .reset_level = HNAE3_GLOBAL_RESET 543 }, { 544 .int_msk = BIT(5), 545 .msg = "qcn_shap_gp2_sch_fifo_wr_err", 546 .reset_level = HNAE3_GLOBAL_RESET 547 }, { 548 .int_msk = BIT(6), 549 .msg = "qcn_shap_gp3_sch_fifo_rd_err", 550 .reset_level = HNAE3_GLOBAL_RESET 551 }, { 552 .int_msk = BIT(7), 553 .msg = "qcn_shap_gp3_sch_fifo_wr_err", 554 .reset_level = HNAE3_GLOBAL_RESET 555 }, { 556 .int_msk = BIT(8), 557 .msg = "qcn_shap_gp0_offset_fifo_rd_err", 558 .reset_level = HNAE3_GLOBAL_RESET 559 }, { 560 .int_msk = BIT(9), 561 .msg = "qcn_shap_gp0_offset_fifo_wr_err", 562 .reset_level = HNAE3_GLOBAL_RESET 563 }, { 564 .int_msk = BIT(10), 565 .msg = "qcn_shap_gp1_offset_fifo_rd_err", 566 .reset_level = HNAE3_GLOBAL_RESET 567 }, { 568 .int_msk = BIT(11), 569 .msg = "qcn_shap_gp1_offset_fifo_wr_err", 570 .reset_level = HNAE3_GLOBAL_RESET 571 }, { 572 .int_msk = BIT(12), 573 .msg = "qcn_shap_gp2_offset_fifo_rd_err", 574 .reset_level = HNAE3_GLOBAL_RESET 575 }, { 576 .int_msk = BIT(13), 577 .msg = "qcn_shap_gp2_offset_fifo_wr_err", 578 .reset_level = HNAE3_GLOBAL_RESET 579 }, { 580 .int_msk = BIT(14), 581 .msg = "qcn_shap_gp3_offset_fifo_rd_err", 582 .reset_level = HNAE3_GLOBAL_RESET 583 }, { 584 .int_msk = BIT(15), 585 .msg = "qcn_shap_gp3_offset_fifo_wr_err", 586 .reset_level = HNAE3_GLOBAL_RESET 587 }, { 588 .int_msk = BIT(16), 589 .msg = "qcn_byte_info_fifo_rd_err", 590 .reset_level = HNAE3_GLOBAL_RESET 591 }, { 592 .int_msk = BIT(17), 593 .msg = "qcn_byte_info_fifo_wr_err", 594 .reset_level = HNAE3_GLOBAL_RESET 595 }, { 596 /* sentinel */ 597 } 598 }; 599 600 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = { 601 { 602 .int_msk = BIT(1), 603 .msg = "qcn_byte_mem_ecc_mbit_err", 604 .reset_level = HNAE3_GLOBAL_RESET 605 }, { 606 .int_msk = BIT(3), 607 .msg = "qcn_time_mem_ecc_mbit_err", 608 .reset_level = HNAE3_GLOBAL_RESET 609 }, { 610 .int_msk = BIT(5), 611 .msg = "qcn_fb_mem_ecc_mbit_err", 612 .reset_level = HNAE3_GLOBAL_RESET 613 }, { 614 .int_msk = BIT(7), 615 .msg = "qcn_link_mem_ecc_mbit_err", 616 .reset_level = HNAE3_GLOBAL_RESET 617 }, { 618 .int_msk = BIT(9), 619 .msg = "qcn_rate_mem_ecc_mbit_err", 620 .reset_level = HNAE3_GLOBAL_RESET 621 }, { 622 .int_msk = BIT(11), 623 .msg = "qcn_tmplt_mem_ecc_mbit_err", 624 .reset_level = HNAE3_GLOBAL_RESET 625 }, { 626 .int_msk = BIT(13), 627 .msg = "qcn_shap_cfg_mem_ecc_mbit_err", 628 .reset_level = HNAE3_GLOBAL_RESET 629 }, { 630 .int_msk = BIT(15), 631 .msg = "qcn_gp0_barrel_mem_ecc_mbit_err", 632 .reset_level = HNAE3_GLOBAL_RESET 633 }, { 634 .int_msk = BIT(17), 635 .msg = "qcn_gp1_barrel_mem_ecc_mbit_err", 636 .reset_level = HNAE3_GLOBAL_RESET 637 }, { 638 .int_msk = BIT(19), 639 .msg = "qcn_gp2_barrel_mem_ecc_mbit_err", 640 .reset_level = HNAE3_GLOBAL_RESET 641 }, { 642 .int_msk = BIT(21), 643 .msg = "qcn_gp3_barral_mem_ecc_mbit_err", 644 .reset_level = HNAE3_GLOBAL_RESET 645 }, { 646 /* sentinel */ 647 } 648 }; 649 650 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = { 651 { 652 .int_msk = BIT(0), 653 .msg = "egu_cge_afifo_ecc_1bit_err", 654 .reset_level = HNAE3_NONE_RESET 655 }, { 656 .int_msk = BIT(1), 657 .msg = "egu_cge_afifo_ecc_mbit_err", 658 .reset_level = HNAE3_GLOBAL_RESET 659 }, { 660 .int_msk = BIT(2), 661 .msg = "egu_lge_afifo_ecc_1bit_err", 662 .reset_level = HNAE3_NONE_RESET 663 }, { 664 .int_msk = BIT(3), 665 .msg = "egu_lge_afifo_ecc_mbit_err", 666 .reset_level = HNAE3_GLOBAL_RESET 667 }, { 668 .int_msk = BIT(4), 669 .msg = "cge_igu_afifo_ecc_1bit_err", 670 .reset_level = HNAE3_NONE_RESET 671 }, { 672 .int_msk = BIT(5), 673 .msg = "cge_igu_afifo_ecc_mbit_err", 674 .reset_level = HNAE3_GLOBAL_RESET 675 }, { 676 .int_msk = BIT(6), 677 .msg = "lge_igu_afifo_ecc_1bit_err", 678 .reset_level = HNAE3_NONE_RESET 679 }, { 680 .int_msk = BIT(7), 681 .msg = "lge_igu_afifo_ecc_mbit_err", 682 .reset_level = HNAE3_GLOBAL_RESET 683 }, { 684 .int_msk = BIT(8), 685 .msg = "cge_igu_afifo_overflow_err", 686 .reset_level = HNAE3_GLOBAL_RESET 687 }, { 688 .int_msk = BIT(9), 689 .msg = "lge_igu_afifo_overflow_err", 690 .reset_level = HNAE3_GLOBAL_RESET 691 }, { 692 .int_msk = BIT(10), 693 .msg = "egu_cge_afifo_underrun_err", 694 .reset_level = HNAE3_GLOBAL_RESET 695 }, { 696 .int_msk = BIT(11), 697 .msg = "egu_lge_afifo_underrun_err", 698 .reset_level = HNAE3_GLOBAL_RESET 699 }, { 700 .int_msk = BIT(12), 701 .msg = "egu_ge_afifo_underrun_err", 702 .reset_level = HNAE3_GLOBAL_RESET 703 }, { 704 .int_msk = BIT(13), 705 .msg = "ge_igu_afifo_overflow_err", 706 .reset_level = HNAE3_GLOBAL_RESET 707 }, { 708 /* sentinel */ 709 } 710 }; 711 712 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = { 713 { 714 .int_msk = BIT(13), 715 .msg = "rpu_rx_pkt_bit32_ecc_mbit_err", 716 .reset_level = HNAE3_GLOBAL_RESET 717 }, { 718 .int_msk = BIT(14), 719 .msg = "rpu_rx_pkt_bit33_ecc_mbit_err", 720 .reset_level = HNAE3_GLOBAL_RESET 721 }, { 722 .int_msk = BIT(15), 723 .msg = "rpu_rx_pkt_bit34_ecc_mbit_err", 724 .reset_level = HNAE3_GLOBAL_RESET 725 }, { 726 .int_msk = BIT(16), 727 .msg = "rpu_rx_pkt_bit35_ecc_mbit_err", 728 .reset_level = HNAE3_GLOBAL_RESET 729 }, { 730 .int_msk = BIT(17), 731 .msg = "rcb_tx_ring_ecc_mbit_err", 732 .reset_level = HNAE3_GLOBAL_RESET 733 }, { 734 .int_msk = BIT(18), 735 .msg = "rcb_rx_ring_ecc_mbit_err", 736 .reset_level = HNAE3_GLOBAL_RESET 737 }, { 738 .int_msk = BIT(19), 739 .msg = "rcb_tx_fbd_ecc_mbit_err", 740 .reset_level = HNAE3_GLOBAL_RESET 741 }, { 742 .int_msk = BIT(20), 743 .msg = "rcb_rx_ebd_ecc_mbit_err", 744 .reset_level = HNAE3_GLOBAL_RESET 745 }, { 746 .int_msk = BIT(21), 747 .msg = "rcb_tso_info_ecc_mbit_err", 748 .reset_level = HNAE3_GLOBAL_RESET 749 }, { 750 .int_msk = BIT(22), 751 .msg = "rcb_tx_int_info_ecc_mbit_err", 752 .reset_level = HNAE3_GLOBAL_RESET 753 }, { 754 .int_msk = BIT(23), 755 .msg = "rcb_rx_int_info_ecc_mbit_err", 756 .reset_level = HNAE3_GLOBAL_RESET 757 }, { 758 .int_msk = BIT(24), 759 .msg = "tpu_tx_pkt_0_ecc_mbit_err", 760 .reset_level = HNAE3_GLOBAL_RESET 761 }, { 762 .int_msk = BIT(25), 763 .msg = "tpu_tx_pkt_1_ecc_mbit_err", 764 .reset_level = HNAE3_GLOBAL_RESET 765 }, { 766 .int_msk = BIT(26), 767 .msg = "rd_bus_err", 768 .reset_level = HNAE3_GLOBAL_RESET 769 }, { 770 .int_msk = BIT(27), 771 .msg = "wr_bus_err", 772 .reset_level = HNAE3_GLOBAL_RESET 773 }, { 774 .int_msk = BIT(28), 775 .msg = "reg_search_miss", 776 .reset_level = HNAE3_GLOBAL_RESET 777 }, { 778 .int_msk = BIT(29), 779 .msg = "rx_q_search_miss", 780 .reset_level = HNAE3_NONE_RESET 781 }, { 782 .int_msk = BIT(30), 783 .msg = "ooo_ecc_err_detect", 784 .reset_level = HNAE3_NONE_RESET 785 }, { 786 .int_msk = BIT(31), 787 .msg = "ooo_ecc_err_multpl", 788 .reset_level = HNAE3_GLOBAL_RESET 789 }, { 790 /* sentinel */ 791 } 792 }; 793 794 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = { 795 { 796 .int_msk = BIT(4), 797 .msg = "gro_bd_ecc_mbit_err", 798 .reset_level = HNAE3_GLOBAL_RESET 799 }, { 800 .int_msk = BIT(5), 801 .msg = "gro_context_ecc_mbit_err", 802 .reset_level = HNAE3_GLOBAL_RESET 803 }, { 804 .int_msk = BIT(6), 805 .msg = "rx_stash_cfg_ecc_mbit_err", 806 .reset_level = HNAE3_GLOBAL_RESET 807 }, { 808 .int_msk = BIT(7), 809 .msg = "axi_rd_fbd_ecc_mbit_err", 810 .reset_level = HNAE3_GLOBAL_RESET 811 }, { 812 /* sentinel */ 813 } 814 }; 815 816 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = { 817 { 818 .int_msk = BIT(0), 819 .msg = "over_8bd_no_fe", 820 .reset_level = HNAE3_FUNC_RESET 821 }, { 822 .int_msk = BIT(1), 823 .msg = "tso_mss_cmp_min_err", 824 .reset_level = HNAE3_NONE_RESET 825 }, { 826 .int_msk = BIT(2), 827 .msg = "tso_mss_cmp_max_err", 828 .reset_level = HNAE3_NONE_RESET 829 }, { 830 .int_msk = BIT(3), 831 .msg = "tx_rd_fbd_poison", 832 .reset_level = HNAE3_FUNC_RESET 833 }, { 834 .int_msk = BIT(4), 835 .msg = "rx_rd_ebd_poison", 836 .reset_level = HNAE3_FUNC_RESET 837 }, { 838 .int_msk = BIT(5), 839 .msg = "buf_wait_timeout", 840 .reset_level = HNAE3_NONE_RESET 841 }, { 842 /* sentinel */ 843 } 844 }; 845 846 static const struct hclge_hw_error hclge_ssu_com_err_int[] = { 847 { 848 .int_msk = BIT(0), 849 .msg = "buf_sum_err", 850 .reset_level = HNAE3_NONE_RESET 851 }, { 852 .int_msk = BIT(1), 853 .msg = "ppp_mb_num_err", 854 .reset_level = HNAE3_NONE_RESET 855 }, { 856 .int_msk = BIT(2), 857 .msg = "ppp_mbid_err", 858 .reset_level = HNAE3_GLOBAL_RESET 859 }, { 860 .int_msk = BIT(3), 861 .msg = "ppp_rlt_mac_err", 862 .reset_level = HNAE3_GLOBAL_RESET 863 }, { 864 .int_msk = BIT(4), 865 .msg = "ppp_rlt_host_err", 866 .reset_level = HNAE3_GLOBAL_RESET 867 }, { 868 .int_msk = BIT(5), 869 .msg = "cks_edit_position_err", 870 .reset_level = HNAE3_GLOBAL_RESET 871 }, { 872 .int_msk = BIT(6), 873 .msg = "cks_edit_condition_err", 874 .reset_level = HNAE3_GLOBAL_RESET 875 }, { 876 .int_msk = BIT(7), 877 .msg = "vlan_edit_condition_err", 878 .reset_level = HNAE3_GLOBAL_RESET 879 }, { 880 .int_msk = BIT(8), 881 .msg = "vlan_num_ot_err", 882 .reset_level = HNAE3_GLOBAL_RESET 883 }, { 884 .int_msk = BIT(9), 885 .msg = "vlan_num_in_err", 886 .reset_level = HNAE3_GLOBAL_RESET 887 }, { 888 /* sentinel */ 889 } 890 }; 891 892 #define HCLGE_SSU_MEM_ECC_ERR(x) \ 893 { \ 894 .int_msk = BIT(x), \ 895 .msg = "ssu_mem" #x "_ecc_mbit_err", \ 896 .reset_level = HNAE3_GLOBAL_RESET \ 897 } 898 899 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = { 900 HCLGE_SSU_MEM_ECC_ERR(0), 901 HCLGE_SSU_MEM_ECC_ERR(1), 902 HCLGE_SSU_MEM_ECC_ERR(2), 903 HCLGE_SSU_MEM_ECC_ERR(3), 904 HCLGE_SSU_MEM_ECC_ERR(4), 905 HCLGE_SSU_MEM_ECC_ERR(5), 906 HCLGE_SSU_MEM_ECC_ERR(6), 907 HCLGE_SSU_MEM_ECC_ERR(7), 908 HCLGE_SSU_MEM_ECC_ERR(8), 909 HCLGE_SSU_MEM_ECC_ERR(9), 910 HCLGE_SSU_MEM_ECC_ERR(10), 911 HCLGE_SSU_MEM_ECC_ERR(11), 912 HCLGE_SSU_MEM_ECC_ERR(12), 913 HCLGE_SSU_MEM_ECC_ERR(13), 914 HCLGE_SSU_MEM_ECC_ERR(14), 915 HCLGE_SSU_MEM_ECC_ERR(15), 916 HCLGE_SSU_MEM_ECC_ERR(16), 917 HCLGE_SSU_MEM_ECC_ERR(17), 918 HCLGE_SSU_MEM_ECC_ERR(18), 919 HCLGE_SSU_MEM_ECC_ERR(19), 920 HCLGE_SSU_MEM_ECC_ERR(20), 921 HCLGE_SSU_MEM_ECC_ERR(21), 922 HCLGE_SSU_MEM_ECC_ERR(22), 923 HCLGE_SSU_MEM_ECC_ERR(23), 924 HCLGE_SSU_MEM_ECC_ERR(24), 925 HCLGE_SSU_MEM_ECC_ERR(25), 926 HCLGE_SSU_MEM_ECC_ERR(26), 927 HCLGE_SSU_MEM_ECC_ERR(27), 928 HCLGE_SSU_MEM_ECC_ERR(28), 929 HCLGE_SSU_MEM_ECC_ERR(29), 930 HCLGE_SSU_MEM_ECC_ERR(30), 931 HCLGE_SSU_MEM_ECC_ERR(31), 932 { /* sentinel */ } 933 }; 934 935 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { 936 { 937 .int_msk = BIT(0), 938 .msg = "roc_pkt_without_key_port", 939 .reset_level = HNAE3_FUNC_RESET 940 }, { 941 .int_msk = BIT(1), 942 .msg = "tpu_pkt_without_key_port", 943 .reset_level = HNAE3_GLOBAL_RESET 944 }, { 945 .int_msk = BIT(2), 946 .msg = "igu_pkt_without_key_port", 947 .reset_level = HNAE3_GLOBAL_RESET 948 }, { 949 .int_msk = BIT(3), 950 .msg = "roc_eof_mis_match_port", 951 .reset_level = HNAE3_GLOBAL_RESET 952 }, { 953 .int_msk = BIT(4), 954 .msg = "tpu_eof_mis_match_port", 955 .reset_level = HNAE3_GLOBAL_RESET 956 }, { 957 .int_msk = BIT(5), 958 .msg = "igu_eof_mis_match_port", 959 .reset_level = HNAE3_GLOBAL_RESET 960 }, { 961 .int_msk = BIT(6), 962 .msg = "roc_sof_mis_match_port", 963 .reset_level = HNAE3_GLOBAL_RESET 964 }, { 965 .int_msk = BIT(7), 966 .msg = "tpu_sof_mis_match_port", 967 .reset_level = HNAE3_GLOBAL_RESET 968 }, { 969 .int_msk = BIT(8), 970 .msg = "igu_sof_mis_match_port", 971 .reset_level = HNAE3_GLOBAL_RESET 972 }, { 973 .int_msk = BIT(11), 974 .msg = "ets_rd_int_rx_port", 975 .reset_level = HNAE3_GLOBAL_RESET 976 }, { 977 .int_msk = BIT(12), 978 .msg = "ets_wr_int_rx_port", 979 .reset_level = HNAE3_GLOBAL_RESET 980 }, { 981 .int_msk = BIT(13), 982 .msg = "ets_rd_int_tx_port", 983 .reset_level = HNAE3_GLOBAL_RESET 984 }, { 985 .int_msk = BIT(14), 986 .msg = "ets_wr_int_tx_port", 987 .reset_level = HNAE3_GLOBAL_RESET 988 }, { 989 /* sentinel */ 990 } 991 }; 992 993 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = { 994 { 995 .int_msk = BIT(0), 996 .msg = "ig_mac_inf_int", 997 .reset_level = HNAE3_GLOBAL_RESET 998 }, { 999 .int_msk = BIT(1), 1000 .msg = "ig_host_inf_int", 1001 .reset_level = HNAE3_GLOBAL_RESET 1002 }, { 1003 .int_msk = BIT(2), 1004 .msg = "ig_roc_buf_int", 1005 .reset_level = HNAE3_GLOBAL_RESET 1006 }, { 1007 .int_msk = BIT(3), 1008 .msg = "ig_host_data_fifo_int", 1009 .reset_level = HNAE3_GLOBAL_RESET 1010 }, { 1011 .int_msk = BIT(4), 1012 .msg = "ig_host_key_fifo_int", 1013 .reset_level = HNAE3_GLOBAL_RESET 1014 }, { 1015 .int_msk = BIT(5), 1016 .msg = "tx_qcn_fifo_int", 1017 .reset_level = HNAE3_GLOBAL_RESET 1018 }, { 1019 .int_msk = BIT(6), 1020 .msg = "rx_qcn_fifo_int", 1021 .reset_level = HNAE3_GLOBAL_RESET 1022 }, { 1023 .int_msk = BIT(7), 1024 .msg = "tx_pf_rd_fifo_int", 1025 .reset_level = HNAE3_GLOBAL_RESET 1026 }, { 1027 .int_msk = BIT(8), 1028 .msg = "rx_pf_rd_fifo_int", 1029 .reset_level = HNAE3_GLOBAL_RESET 1030 }, { 1031 .int_msk = BIT(9), 1032 .msg = "qm_eof_fifo_int", 1033 .reset_level = HNAE3_GLOBAL_RESET 1034 }, { 1035 .int_msk = BIT(10), 1036 .msg = "mb_rlt_fifo_int", 1037 .reset_level = HNAE3_GLOBAL_RESET 1038 }, { 1039 .int_msk = BIT(11), 1040 .msg = "dup_uncopy_fifo_int", 1041 .reset_level = HNAE3_GLOBAL_RESET 1042 }, { 1043 .int_msk = BIT(12), 1044 .msg = "dup_cnt_rd_fifo_int", 1045 .reset_level = HNAE3_GLOBAL_RESET 1046 }, { 1047 .int_msk = BIT(13), 1048 .msg = "dup_cnt_drop_fifo_int", 1049 .reset_level = HNAE3_GLOBAL_RESET 1050 }, { 1051 .int_msk = BIT(14), 1052 .msg = "dup_cnt_wrb_fifo_int", 1053 .reset_level = HNAE3_GLOBAL_RESET 1054 }, { 1055 .int_msk = BIT(15), 1056 .msg = "host_cmd_fifo_int", 1057 .reset_level = HNAE3_GLOBAL_RESET 1058 }, { 1059 .int_msk = BIT(16), 1060 .msg = "mac_cmd_fifo_int", 1061 .reset_level = HNAE3_GLOBAL_RESET 1062 }, { 1063 .int_msk = BIT(17), 1064 .msg = "host_cmd_bitmap_empty_int", 1065 .reset_level = HNAE3_GLOBAL_RESET 1066 }, { 1067 .int_msk = BIT(18), 1068 .msg = "mac_cmd_bitmap_empty_int", 1069 .reset_level = HNAE3_GLOBAL_RESET 1070 }, { 1071 .int_msk = BIT(19), 1072 .msg = "dup_bitmap_empty_int", 1073 .reset_level = HNAE3_GLOBAL_RESET 1074 }, { 1075 .int_msk = BIT(20), 1076 .msg = "out_queue_bitmap_empty_int", 1077 .reset_level = HNAE3_GLOBAL_RESET 1078 }, { 1079 .int_msk = BIT(21), 1080 .msg = "bank2_bitmap_empty_int", 1081 .reset_level = HNAE3_GLOBAL_RESET 1082 }, { 1083 .int_msk = BIT(22), 1084 .msg = "bank1_bitmap_empty_int", 1085 .reset_level = HNAE3_GLOBAL_RESET 1086 }, { 1087 .int_msk = BIT(23), 1088 .msg = "bank0_bitmap_empty_int", 1089 .reset_level = HNAE3_GLOBAL_RESET 1090 }, { 1091 /* sentinel */ 1092 } 1093 }; 1094 1095 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = { 1096 { 1097 .int_msk = BIT(0), 1098 .msg = "ets_rd_int_rx_tcg", 1099 .reset_level = HNAE3_GLOBAL_RESET 1100 }, { 1101 .int_msk = BIT(1), 1102 .msg = "ets_wr_int_rx_tcg", 1103 .reset_level = HNAE3_GLOBAL_RESET 1104 }, { 1105 .int_msk = BIT(2), 1106 .msg = "ets_rd_int_tx_tcg", 1107 .reset_level = HNAE3_GLOBAL_RESET 1108 }, { 1109 .int_msk = BIT(3), 1110 .msg = "ets_wr_int_tx_tcg", 1111 .reset_level = HNAE3_GLOBAL_RESET 1112 }, { 1113 /* sentinel */ 1114 } 1115 }; 1116 1117 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = { 1118 { 1119 .int_msk = BIT(0), 1120 .msg = "roc_pkt_without_key_port", 1121 .reset_level = HNAE3_FUNC_RESET 1122 }, { 1123 .int_msk = BIT(9), 1124 .msg = "low_water_line_err_port", 1125 .reset_level = HNAE3_NONE_RESET 1126 }, { 1127 .int_msk = BIT(10), 1128 .msg = "hi_water_line_err_port", 1129 .reset_level = HNAE3_GLOBAL_RESET 1130 }, { 1131 /* sentinel */ 1132 } 1133 }; 1134 1135 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { 1136 { 1137 .int_msk = 0, 1138 .msg = "rocee qmm ovf: sgid invalid err" 1139 }, { 1140 .int_msk = 0x4, 1141 .msg = "rocee qmm ovf: sgid ovf err" 1142 }, { 1143 .int_msk = 0x8, 1144 .msg = "rocee qmm ovf: smac invalid err" 1145 }, { 1146 .int_msk = 0xC, 1147 .msg = "rocee qmm ovf: smac ovf err" 1148 }, { 1149 .int_msk = 0x10, 1150 .msg = "rocee qmm ovf: cqc invalid err" 1151 }, { 1152 .int_msk = 0x11, 1153 .msg = "rocee qmm ovf: cqc ovf err" 1154 }, { 1155 .int_msk = 0x12, 1156 .msg = "rocee qmm ovf: cqc hopnum err" 1157 }, { 1158 .int_msk = 0x13, 1159 .msg = "rocee qmm ovf: cqc ba0 err" 1160 }, { 1161 .int_msk = 0x14, 1162 .msg = "rocee qmm ovf: srqc invalid err" 1163 }, { 1164 .int_msk = 0x15, 1165 .msg = "rocee qmm ovf: srqc ovf err" 1166 }, { 1167 .int_msk = 0x16, 1168 .msg = "rocee qmm ovf: srqc hopnum err" 1169 }, { 1170 .int_msk = 0x17, 1171 .msg = "rocee qmm ovf: srqc ba0 err" 1172 }, { 1173 .int_msk = 0x18, 1174 .msg = "rocee qmm ovf: mpt invalid err" 1175 }, { 1176 .int_msk = 0x19, 1177 .msg = "rocee qmm ovf: mpt ovf err" 1178 }, { 1179 .int_msk = 0x1A, 1180 .msg = "rocee qmm ovf: mpt hopnum err" 1181 }, { 1182 .int_msk = 0x1B, 1183 .msg = "rocee qmm ovf: mpt ba0 err" 1184 }, { 1185 .int_msk = 0x1C, 1186 .msg = "rocee qmm ovf: qpc invalid err" 1187 }, { 1188 .int_msk = 0x1D, 1189 .msg = "rocee qmm ovf: qpc ovf err" 1190 }, { 1191 .int_msk = 0x1E, 1192 .msg = "rocee qmm ovf: qpc hopnum err" 1193 }, { 1194 .int_msk = 0x1F, 1195 .msg = "rocee qmm ovf: qpc ba0 err" 1196 }, { 1197 /* sentinel */ 1198 } 1199 }; 1200 1201 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = { 1202 { 1203 .module_id = MODULE_NONE, 1204 .msg = "MODULE_NONE" 1205 }, { 1206 .module_id = MODULE_BIOS_COMMON, 1207 .msg = "MODULE_BIOS_COMMON" 1208 }, { 1209 .module_id = MODULE_GE, 1210 .msg = "MODULE_GE" 1211 }, { 1212 .module_id = MODULE_IGU_EGU, 1213 .msg = "MODULE_IGU_EGU" 1214 }, { 1215 .module_id = MODULE_LGE, 1216 .msg = "MODULE_LGE" 1217 }, { 1218 .module_id = MODULE_NCSI, 1219 .msg = "MODULE_NCSI" 1220 }, { 1221 .module_id = MODULE_PPP, 1222 .msg = "MODULE_PPP" 1223 }, { 1224 .module_id = MODULE_QCN, 1225 .msg = "MODULE_QCN" 1226 }, { 1227 .module_id = MODULE_RCB_RX, 1228 .msg = "MODULE_RCB_RX" 1229 }, { 1230 .module_id = MODULE_RTC, 1231 .msg = "MODULE_RTC" 1232 }, { 1233 .module_id = MODULE_SSU, 1234 .msg = "MODULE_SSU" 1235 }, { 1236 .module_id = MODULE_TM, 1237 .msg = "MODULE_TM" 1238 }, { 1239 .module_id = MODULE_RCB_TX, 1240 .msg = "MODULE_RCB_TX" 1241 }, { 1242 .module_id = MODULE_TXDMA, 1243 .msg = "MODULE_TXDMA" 1244 }, { 1245 .module_id = MODULE_MASTER, 1246 .msg = "MODULE_MASTER" 1247 }, { 1248 .module_id = MODULE_HIMAC, 1249 .msg = "MODULE_HIMAC" 1250 }, { 1251 .module_id = MODULE_ROCEE_TOP, 1252 .msg = "MODULE_ROCEE_TOP" 1253 }, { 1254 .module_id = MODULE_ROCEE_TIMER, 1255 .msg = "MODULE_ROCEE_TIMER" 1256 }, { 1257 .module_id = MODULE_ROCEE_MDB, 1258 .msg = "MODULE_ROCEE_MDB" 1259 }, { 1260 .module_id = MODULE_ROCEE_TSP, 1261 .msg = "MODULE_ROCEE_TSP" 1262 }, { 1263 .module_id = MODULE_ROCEE_TRP, 1264 .msg = "MODULE_ROCEE_TRP" 1265 }, { 1266 .module_id = MODULE_ROCEE_SCC, 1267 .msg = "MODULE_ROCEE_SCC" 1268 }, { 1269 .module_id = MODULE_ROCEE_CAEP, 1270 .msg = "MODULE_ROCEE_CAEP" 1271 }, { 1272 .module_id = MODULE_ROCEE_GEN_AC, 1273 .msg = "MODULE_ROCEE_GEN_AC" 1274 }, { 1275 .module_id = MODULE_ROCEE_QMM, 1276 .msg = "MODULE_ROCEE_QMM" 1277 }, { 1278 .module_id = MODULE_ROCEE_LSAN, 1279 .msg = "MODULE_ROCEE_LSAN" 1280 } 1281 }; 1282 1283 static const struct hclge_hw_type_id hclge_hw_type_id_st[] = { 1284 { 1285 .type_id = NONE_ERROR, 1286 .msg = "none_error" 1287 }, { 1288 .type_id = FIFO_ERROR, 1289 .msg = "fifo_error" 1290 }, { 1291 .type_id = MEMORY_ERROR, 1292 .msg = "memory_error" 1293 }, { 1294 .type_id = POISON_ERROR, 1295 .msg = "poison_error" 1296 }, { 1297 .type_id = MSIX_ECC_ERROR, 1298 .msg = "msix_ecc_error" 1299 }, { 1300 .type_id = TQP_INT_ECC_ERROR, 1301 .msg = "tqp_int_ecc_error" 1302 }, { 1303 .type_id = PF_ABNORMAL_INT_ERROR, 1304 .msg = "pf_abnormal_int_error" 1305 }, { 1306 .type_id = MPF_ABNORMAL_INT_ERROR, 1307 .msg = "mpf_abnormal_int_error" 1308 }, { 1309 .type_id = COMMON_ERROR, 1310 .msg = "common_error" 1311 }, { 1312 .type_id = PORT_ERROR, 1313 .msg = "port_error" 1314 }, { 1315 .type_id = ETS_ERROR, 1316 .msg = "ets_error" 1317 }, { 1318 .type_id = NCSI_ERROR, 1319 .msg = "ncsi_error" 1320 }, { 1321 .type_id = GLB_ERROR, 1322 .msg = "glb_error" 1323 }, { 1324 .type_id = LINK_ERROR, 1325 .msg = "link_error" 1326 }, { 1327 .type_id = PTP_ERROR, 1328 .msg = "ptp_error" 1329 }, { 1330 .type_id = ROCEE_NORMAL_ERR, 1331 .msg = "rocee_normal_error" 1332 }, { 1333 .type_id = ROCEE_OVF_ERR, 1334 .msg = "rocee_ovf_error" 1335 }, { 1336 .type_id = ROCEE_BUS_ERR, 1337 .msg = "rocee_bus_error" 1338 }, 1339 }; 1340 1341 static void hclge_log_error(struct device *dev, char *reg, 1342 const struct hclge_hw_error *err, 1343 u32 err_sts, unsigned long *reset_requests) 1344 { 1345 while (err->msg) { 1346 if (err->int_msk & err_sts) { 1347 dev_err(dev, "%s %s found [error status=0x%x]\n", 1348 reg, err->msg, err_sts); 1349 if (err->reset_level && 1350 err->reset_level != HNAE3_NONE_RESET) 1351 set_bit(err->reset_level, reset_requests); 1352 } 1353 err++; 1354 } 1355 } 1356 1357 /* hclge_cmd_query_error: read the error information 1358 * @hdev: pointer to struct hclge_dev 1359 * @desc: descriptor for describing the command 1360 * @cmd: command opcode 1361 * @flag: flag for extended command structure 1362 * 1363 * This function query the error info from hw register/s using command 1364 */ 1365 static int hclge_cmd_query_error(struct hclge_dev *hdev, 1366 struct hclge_desc *desc, u32 cmd, u16 flag) 1367 { 1368 struct device *dev = &hdev->pdev->dev; 1369 int desc_num = 1; 1370 int ret; 1371 1372 hclge_cmd_setup_basic_desc(&desc[0], cmd, true); 1373 if (flag) { 1374 desc[0].flag |= cpu_to_le16(flag); 1375 hclge_cmd_setup_basic_desc(&desc[1], cmd, true); 1376 desc_num = 2; 1377 } 1378 1379 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 1380 if (ret) 1381 dev_err(dev, "query error cmd failed (%d)\n", ret); 1382 1383 return ret; 1384 } 1385 1386 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev) 1387 { 1388 struct hclge_desc desc; 1389 1390 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false); 1391 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR); 1392 1393 return hclge_cmd_send(&hdev->hw, &desc, 1); 1394 } 1395 1396 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en) 1397 { 1398 struct device *dev = &hdev->pdev->dev; 1399 struct hclge_desc desc[2]; 1400 int ret; 1401 1402 /* configure common error interrupts */ 1403 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); 1404 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1405 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); 1406 1407 if (en) { 1408 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); 1409 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | 1410 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); 1411 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); 1412 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN | 1413 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN); 1414 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); 1415 } 1416 1417 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); 1418 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | 1419 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); 1420 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); 1421 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK | 1422 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK); 1423 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); 1424 1425 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1426 if (ret) 1427 dev_err(dev, 1428 "fail(%d) to configure common err interrupts\n", ret); 1429 1430 return ret; 1431 } 1432 1433 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en) 1434 { 1435 struct device *dev = &hdev->pdev->dev; 1436 struct hclge_desc desc; 1437 int ret; 1438 1439 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) 1440 return 0; 1441 1442 /* configure NCSI error interrupts */ 1443 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false); 1444 if (en) 1445 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN); 1446 1447 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1448 if (ret) 1449 dev_err(dev, 1450 "fail(%d) to configure NCSI error interrupts\n", ret); 1451 1452 return ret; 1453 } 1454 1455 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en) 1456 { 1457 struct device *dev = &hdev->pdev->dev; 1458 struct hclge_desc desc; 1459 int ret; 1460 1461 /* configure IGU,EGU error interrupts */ 1462 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false); 1463 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE); 1464 if (en) 1465 desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN); 1466 1467 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK); 1468 1469 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1470 if (ret) { 1471 dev_err(dev, 1472 "fail(%d) to configure IGU common interrupts\n", ret); 1473 return ret; 1474 } 1475 1476 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false); 1477 if (en) 1478 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN); 1479 1480 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK); 1481 1482 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1483 if (ret) { 1484 dev_err(dev, 1485 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret); 1486 return ret; 1487 } 1488 1489 ret = hclge_config_ncsi_hw_err_int(hdev, en); 1490 1491 return ret; 1492 } 1493 1494 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, 1495 bool en) 1496 { 1497 struct device *dev = &hdev->pdev->dev; 1498 struct hclge_desc desc[2]; 1499 int ret; 1500 1501 /* configure PPP error interrupts */ 1502 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1503 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1504 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 1505 1506 if (cmd == HCLGE_PPP_CMD0_INT_CMD) { 1507 if (en) { 1508 desc[0].data[0] = 1509 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN); 1510 desc[0].data[1] = 1511 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN); 1512 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN); 1513 } 1514 1515 desc[1].data[0] = 1516 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK); 1517 desc[1].data[1] = 1518 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK); 1519 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1520 desc[1].data[2] = 1521 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK); 1522 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { 1523 if (en) { 1524 desc[0].data[0] = 1525 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN); 1526 desc[0].data[1] = 1527 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN); 1528 } 1529 1530 desc[1].data[0] = 1531 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK); 1532 desc[1].data[1] = 1533 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK); 1534 } 1535 1536 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1537 if (ret) 1538 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret); 1539 1540 return ret; 1541 } 1542 1543 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en) 1544 { 1545 int ret; 1546 1547 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, 1548 en); 1549 if (ret) 1550 return ret; 1551 1552 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, 1553 en); 1554 1555 return ret; 1556 } 1557 1558 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) 1559 { 1560 struct device *dev = &hdev->pdev->dev; 1561 struct hclge_desc desc; 1562 int ret; 1563 1564 /* configure TM SCH hw errors */ 1565 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false); 1566 if (en) 1567 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN); 1568 1569 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1570 if (ret) { 1571 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret); 1572 return ret; 1573 } 1574 1575 /* configure TM QCN hw errors */ 1576 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false); 1577 desc.data[0] = cpu_to_le32(HCLGE_TM_QCN_ERR_INT_TYPE); 1578 if (en) { 1579 desc.data[0] |= cpu_to_le32(HCLGE_TM_QCN_FIFO_INT_EN); 1580 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); 1581 } 1582 1583 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1584 if (ret) 1585 dev_err(dev, 1586 "fail(%d) to configure TM QCN mem errors\n", ret); 1587 1588 return ret; 1589 } 1590 1591 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en) 1592 { 1593 struct device *dev = &hdev->pdev->dev; 1594 struct hclge_desc desc; 1595 int ret; 1596 1597 /* configure MAC common error interrupts */ 1598 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false); 1599 if (en) 1600 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN); 1601 1602 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK); 1603 1604 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1605 if (ret) 1606 dev_err(dev, 1607 "fail(%d) to configure MAC COMMON error intr\n", ret); 1608 1609 return ret; 1610 } 1611 1612 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en) 1613 { 1614 struct hclge_desc desc; 1615 1616 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false); 1617 if (en) 1618 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN); 1619 else 1620 desc.data[0] = 0; 1621 1622 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK); 1623 1624 return hclge_cmd_send(&hdev->hw, &desc, 1); 1625 } 1626 1627 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, 1628 bool en) 1629 { 1630 struct device *dev = &hdev->pdev->dev; 1631 struct hclge_desc desc[2]; 1632 int desc_num = 1; 1633 int ret; 1634 1635 /* configure PPU error interrupts */ 1636 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) { 1637 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1638 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1639 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 1640 if (en) { 1641 desc[0].data[0] = 1642 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN); 1643 desc[0].data[1] = 1644 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN); 1645 desc[1].data[3] = 1646 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN); 1647 desc[1].data[4] = 1648 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN); 1649 } 1650 1651 desc[1].data[0] = 1652 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK); 1653 desc[1].data[1] = 1654 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK); 1655 desc[1].data[2] = 1656 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK); 1657 desc[1].data[3] |= 1658 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK); 1659 desc_num = 2; 1660 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) { 1661 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1662 if (en) 1663 desc[0].data[0] = 1664 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2); 1665 1666 desc[0].data[2] = 1667 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK); 1668 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) { 1669 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1670 if (en) 1671 desc[0].data[0] = 1672 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN); 1673 1674 desc[0].data[2] = 1675 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK); 1676 } else { 1677 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n"); 1678 return -EINVAL; 1679 } 1680 1681 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 1682 1683 return ret; 1684 } 1685 1686 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en) 1687 { 1688 struct device *dev = &hdev->pdev->dev; 1689 int ret; 1690 1691 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD, 1692 en); 1693 if (ret) { 1694 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n", 1695 ret); 1696 return ret; 1697 } 1698 1699 ret = hclge_config_ppu_error_interrupts(hdev, 1700 HCLGE_PPU_MPF_OTHER_INT_CMD, 1701 en); 1702 if (ret) { 1703 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret); 1704 return ret; 1705 } 1706 1707 ret = hclge_config_ppu_error_interrupts(hdev, 1708 HCLGE_PPU_PF_OTHER_INT_CMD, en); 1709 if (ret) 1710 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n", 1711 ret); 1712 return ret; 1713 } 1714 1715 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) 1716 { 1717 struct device *dev = &hdev->pdev->dev; 1718 struct hclge_desc desc[2]; 1719 int ret; 1720 1721 /* configure SSU ecc error interrupts */ 1722 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false); 1723 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1724 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false); 1725 if (en) { 1726 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN); 1727 desc[0].data[1] = 1728 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN); 1729 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN); 1730 } 1731 1732 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK); 1733 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK); 1734 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK); 1735 1736 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1737 if (ret) { 1738 dev_err(dev, 1739 "fail(%d) to configure SSU ECC error interrupt\n", ret); 1740 return ret; 1741 } 1742 1743 /* configure SSU common error interrupts */ 1744 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false); 1745 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1746 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false); 1747 1748 if (en) { 1749 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1750 desc[0].data[0] = 1751 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN); 1752 else 1753 desc[0].data[0] = 1754 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5)); 1755 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN); 1756 desc[0].data[2] = 1757 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN); 1758 } 1759 1760 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK | 1761 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK); 1762 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK); 1763 1764 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1765 if (ret) 1766 dev_err(dev, 1767 "fail(%d) to configure SSU COMMON error intr\n", ret); 1768 1769 return ret; 1770 } 1771 1772 /* hclge_query_bd_num: query number of buffer descriptors 1773 * @hdev: pointer to struct hclge_dev 1774 * @is_ras: true for ras, false for msix 1775 * @mpf_bd_num: number of main PF interrupt buffer descriptors 1776 * @pf_bd_num: number of not main PF interrupt buffer descriptors 1777 * 1778 * This function querys number of mpf and pf buffer descriptors. 1779 */ 1780 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, 1781 u32 *mpf_bd_num, u32 *pf_bd_num) 1782 { 1783 struct device *dev = &hdev->pdev->dev; 1784 u32 mpf_min_bd_num, pf_min_bd_num; 1785 enum hclge_opcode_type opcode; 1786 struct hclge_desc desc_bd; 1787 int ret; 1788 1789 if (is_ras) { 1790 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM; 1791 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM; 1792 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM; 1793 } else { 1794 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM; 1795 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM; 1796 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM; 1797 } 1798 1799 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true); 1800 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 1801 if (ret) { 1802 dev_err(dev, "fail(%d) to query msix int status bd num\n", 1803 ret); 1804 return ret; 1805 } 1806 1807 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]); 1808 *pf_bd_num = le32_to_cpu(desc_bd.data[1]); 1809 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) { 1810 dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n", 1811 *mpf_bd_num, *pf_bd_num); 1812 return -EINVAL; 1813 } 1814 1815 return 0; 1816 } 1817 1818 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors 1819 * @hdev: pointer to struct hclge_dev 1820 * @desc: descriptor for describing the command 1821 * @num: number of extended command structures 1822 * 1823 * This function handles all the main PF RAS errors in the 1824 * hw register/s using command. 1825 */ 1826 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, 1827 struct hclge_desc *desc, 1828 int num) 1829 { 1830 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1831 struct device *dev = &hdev->pdev->dev; 1832 __le32 *desc_data; 1833 u32 status; 1834 int ret; 1835 1836 /* query all main PF RAS errors */ 1837 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT, 1838 true); 1839 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1840 if (ret) { 1841 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret); 1842 return ret; 1843 } 1844 1845 /* log HNS common errors */ 1846 status = le32_to_cpu(desc[0].data[0]); 1847 if (status) 1848 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", 1849 &hclge_imp_tcm_ecc_int[0], status, 1850 &ae_dev->hw_err_reset_req); 1851 1852 status = le32_to_cpu(desc[0].data[1]); 1853 if (status) 1854 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", 1855 &hclge_cmdq_nic_mem_ecc_int[0], status, 1856 &ae_dev->hw_err_reset_req); 1857 1858 if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) 1859 dev_warn(dev, "imp_rd_data_poison_err found\n"); 1860 1861 status = le32_to_cpu(desc[0].data[3]); 1862 if (status) 1863 hclge_log_error(dev, "TQP_INT_ECC_INT_STS", 1864 &hclge_tqp_int_ecc_int[0], status, 1865 &ae_dev->hw_err_reset_req); 1866 1867 status = le32_to_cpu(desc[0].data[4]); 1868 if (status) 1869 hclge_log_error(dev, "MSIX_ECC_INT_STS", 1870 &hclge_msix_sram_ecc_int[0], status, 1871 &ae_dev->hw_err_reset_req); 1872 1873 /* log SSU(Storage Switch Unit) errors */ 1874 desc_data = (__le32 *)&desc[2]; 1875 status = le32_to_cpu(*(desc_data + 2)); 1876 if (status) 1877 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", 1878 &hclge_ssu_mem_ecc_err_int[0], status, 1879 &ae_dev->hw_err_reset_req); 1880 1881 status = le32_to_cpu(*(desc_data + 3)) & BIT(0); 1882 if (status) { 1883 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n", 1884 status); 1885 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1886 } 1887 1888 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; 1889 if (status) 1890 hclge_log_error(dev, "SSU_COMMON_ERR_INT", 1891 &hclge_ssu_com_err_int[0], status, 1892 &ae_dev->hw_err_reset_req); 1893 1894 /* log IGU(Ingress Unit) errors */ 1895 desc_data = (__le32 *)&desc[3]; 1896 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; 1897 if (status) 1898 hclge_log_error(dev, "IGU_INT_STS", 1899 &hclge_igu_int[0], status, 1900 &ae_dev->hw_err_reset_req); 1901 1902 /* log PPP(Programmable Packet Process) errors */ 1903 desc_data = (__le32 *)&desc[4]; 1904 status = le32_to_cpu(*(desc_data + 1)); 1905 if (status) 1906 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", 1907 &hclge_ppp_mpf_abnormal_int_st1[0], status, 1908 &ae_dev->hw_err_reset_req); 1909 1910 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; 1911 if (status) 1912 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", 1913 &hclge_ppp_mpf_abnormal_int_st3[0], status, 1914 &ae_dev->hw_err_reset_req); 1915 1916 /* log PPU(RCB) errors */ 1917 desc_data = (__le32 *)&desc[5]; 1918 status = le32_to_cpu(*(desc_data + 1)); 1919 if (status) { 1920 dev_err(dev, 1921 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n"); 1922 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1923 } 1924 1925 status = le32_to_cpu(*(desc_data + 2)); 1926 if (status) 1927 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", 1928 &hclge_ppu_mpf_abnormal_int_st2[0], status, 1929 &ae_dev->hw_err_reset_req); 1930 1931 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; 1932 if (status) 1933 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", 1934 &hclge_ppu_mpf_abnormal_int_st3[0], status, 1935 &ae_dev->hw_err_reset_req); 1936 1937 /* log TM(Traffic Manager) errors */ 1938 desc_data = (__le32 *)&desc[6]; 1939 status = le32_to_cpu(*desc_data); 1940 if (status) 1941 hclge_log_error(dev, "TM_SCH_RINT", 1942 &hclge_tm_sch_rint[0], status, 1943 &ae_dev->hw_err_reset_req); 1944 1945 /* log QCN(Quantized Congestion Control) errors */ 1946 desc_data = (__le32 *)&desc[7]; 1947 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; 1948 if (status) 1949 hclge_log_error(dev, "QCN_FIFO_RINT", 1950 &hclge_qcn_fifo_rint[0], status, 1951 &ae_dev->hw_err_reset_req); 1952 1953 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; 1954 if (status) 1955 hclge_log_error(dev, "QCN_ECC_RINT", 1956 &hclge_qcn_ecc_rint[0], status, 1957 &ae_dev->hw_err_reset_req); 1958 1959 /* log NCSI errors */ 1960 desc_data = (__le32 *)&desc[9]; 1961 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; 1962 if (status) 1963 hclge_log_error(dev, "NCSI_ECC_INT_RPT", 1964 &hclge_ncsi_err_int[0], status, 1965 &ae_dev->hw_err_reset_req); 1966 1967 /* clear all main PF RAS errors */ 1968 hclge_comm_cmd_reuse_desc(&desc[0], false); 1969 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1970 if (ret) 1971 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret); 1972 1973 return ret; 1974 } 1975 1976 /* hclge_handle_pf_ras_error: handle all PF RAS errors 1977 * @hdev: pointer to struct hclge_dev 1978 * @desc: descriptor for describing the command 1979 * @num: number of extended command structures 1980 * 1981 * This function handles all the PF RAS errors in the 1982 * hw registers using command. 1983 */ 1984 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, 1985 struct hclge_desc *desc, 1986 int num) 1987 { 1988 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1989 struct device *dev = &hdev->pdev->dev; 1990 __le32 *desc_data; 1991 u32 status; 1992 int ret; 1993 1994 /* query all PF RAS errors */ 1995 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT, 1996 true); 1997 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1998 if (ret) { 1999 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret); 2000 return ret; 2001 } 2002 2003 /* log SSU(Storage Switch Unit) errors */ 2004 status = le32_to_cpu(desc[0].data[0]); 2005 if (status) 2006 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 2007 &hclge_ssu_port_based_err_int[0], status, 2008 &ae_dev->hw_err_reset_req); 2009 2010 status = le32_to_cpu(desc[0].data[1]); 2011 if (status) 2012 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", 2013 &hclge_ssu_fifo_overflow_int[0], status, 2014 &ae_dev->hw_err_reset_req); 2015 2016 status = le32_to_cpu(desc[0].data[2]); 2017 if (status) 2018 hclge_log_error(dev, "SSU_ETS_TCG_INT", 2019 &hclge_ssu_ets_tcg_int[0], status, 2020 &ae_dev->hw_err_reset_req); 2021 2022 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ 2023 desc_data = (__le32 *)&desc[1]; 2024 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; 2025 if (status) 2026 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", 2027 &hclge_igu_egu_tnl_int[0], status, 2028 &ae_dev->hw_err_reset_req); 2029 2030 /* log PPU(RCB) errors */ 2031 desc_data = (__le32 *)&desc[3]; 2032 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; 2033 if (status) { 2034 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", 2035 &hclge_ppu_pf_abnormal_int[0], status, 2036 &ae_dev->hw_err_reset_req); 2037 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR); 2038 } 2039 2040 /* clear all PF RAS errors */ 2041 hclge_comm_cmd_reuse_desc(&desc[0], false); 2042 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 2043 if (ret) 2044 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret); 2045 2046 return ret; 2047 } 2048 2049 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) 2050 { 2051 u32 mpf_bd_num, pf_bd_num, bd_num; 2052 struct hclge_desc *desc; 2053 int ret; 2054 2055 /* query the number of registers in the RAS int status */ 2056 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num); 2057 if (ret) 2058 return ret; 2059 2060 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2061 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2062 if (!desc) 2063 return -ENOMEM; 2064 2065 /* handle all main PF RAS errors */ 2066 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num); 2067 if (ret) { 2068 kfree(desc); 2069 return ret; 2070 } 2071 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 2072 2073 /* handle all PF RAS errors */ 2074 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num); 2075 kfree(desc); 2076 2077 return ret; 2078 } 2079 2080 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) 2081 { 2082 struct device *dev = &hdev->pdev->dev; 2083 struct hclge_desc desc[3]; 2084 int ret; 2085 2086 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2087 true); 2088 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2089 true); 2090 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2091 true); 2092 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2093 desc[1].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2094 2095 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); 2096 if (ret) { 2097 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); 2098 return ret; 2099 } 2100 2101 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", 2102 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 2103 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 2104 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 2105 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", 2106 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), 2107 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), 2108 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); 2109 dev_err(dev, "AXI3: %08X %08X %08X %08X\n", 2110 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), 2111 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); 2112 2113 return 0; 2114 } 2115 2116 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) 2117 { 2118 struct device *dev = &hdev->pdev->dev; 2119 struct hclge_desc desc[2]; 2120 int ret; 2121 2122 ret = hclge_cmd_query_error(hdev, &desc[0], 2123 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, 2124 HCLGE_COMM_CMD_FLAG_NEXT); 2125 if (ret) { 2126 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); 2127 return ret; 2128 } 2129 2130 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", 2131 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 2132 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 2133 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 2134 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), 2135 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); 2136 2137 return 0; 2138 } 2139 2140 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) 2141 { 2142 struct device *dev = &hdev->pdev->dev; 2143 struct hclge_desc desc[2]; 2144 int ret; 2145 2146 /* read overflow error status */ 2147 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD, 2148 0); 2149 if (ret) { 2150 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret); 2151 return ret; 2152 } 2153 2154 /* log overflow error */ 2155 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2156 const struct hclge_hw_error *err; 2157 u32 err_sts; 2158 2159 err = &hclge_rocee_qmm_ovf_err_int[0]; 2160 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK & 2161 le32_to_cpu(desc[0].data[0]); 2162 while (err->msg) { 2163 if (err->int_msk == err_sts) { 2164 dev_err(dev, "%s [error status=0x%x] found\n", 2165 err->msg, 2166 le32_to_cpu(desc[0].data[0])); 2167 break; 2168 } 2169 err++; 2170 } 2171 } 2172 2173 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2174 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n", 2175 le32_to_cpu(desc[0].data[1])); 2176 } 2177 2178 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2179 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n", 2180 le32_to_cpu(desc[0].data[2])); 2181 } 2182 2183 return 0; 2184 } 2185 2186 static enum hnae3_reset_type 2187 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) 2188 { 2189 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET; 2190 struct device *dev = &hdev->pdev->dev; 2191 struct hclge_desc desc[2]; 2192 unsigned int status; 2193 int ret; 2194 2195 /* read RAS error interrupt status */ 2196 ret = hclge_cmd_query_error(hdev, &desc[0], 2197 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0); 2198 if (ret) { 2199 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret); 2200 /* reset everything for now */ 2201 return HNAE3_GLOBAL_RESET; 2202 } 2203 2204 status = le32_to_cpu(desc[0].data[0]); 2205 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { 2206 if (status & HCLGE_ROCEE_RERR_INT_MASK) 2207 dev_err(dev, "ROCEE RAS AXI rresp error\n"); 2208 2209 if (status & HCLGE_ROCEE_BERR_INT_MASK) 2210 dev_err(dev, "ROCEE RAS AXI bresp error\n"); 2211 2212 reset_type = HNAE3_FUNC_RESET; 2213 2214 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR); 2215 2216 ret = hclge_log_rocee_axi_error(hdev); 2217 if (ret) 2218 return HNAE3_GLOBAL_RESET; 2219 } 2220 2221 if (status & HCLGE_ROCEE_ECC_INT_MASK) { 2222 dev_err(dev, "ROCEE RAS 2bit ECC error\n"); 2223 reset_type = HNAE3_GLOBAL_RESET; 2224 2225 ret = hclge_log_rocee_ecc_error(hdev); 2226 if (ret) 2227 return HNAE3_GLOBAL_RESET; 2228 } 2229 2230 if (status & HCLGE_ROCEE_OVF_INT_MASK) { 2231 ret = hclge_log_rocee_ovf_error(hdev); 2232 if (ret) { 2233 dev_err(dev, "failed(%d) to process ovf error\n", ret); 2234 /* reset everything for now */ 2235 return HNAE3_GLOBAL_RESET; 2236 } 2237 } 2238 2239 /* clear error status */ 2240 hclge_comm_cmd_reuse_desc(&desc[0], false); 2241 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1); 2242 if (ret) { 2243 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret); 2244 /* reset everything for now */ 2245 return HNAE3_GLOBAL_RESET; 2246 } 2247 2248 return reset_type; 2249 } 2250 2251 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) 2252 { 2253 struct device *dev = &hdev->pdev->dev; 2254 struct hclge_desc desc; 2255 int ret; 2256 2257 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 || 2258 !hnae3_dev_roce_supported(hdev)) 2259 return 0; 2260 2261 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false); 2262 if (en) { 2263 /* enable ROCEE hw error interrupts */ 2264 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN); 2265 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN); 2266 2267 hclge_log_and_clear_rocee_ras_error(hdev); 2268 } 2269 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK); 2270 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK); 2271 2272 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2273 if (ret) 2274 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret); 2275 2276 return ret; 2277 } 2278 2279 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev) 2280 { 2281 struct hclge_dev *hdev = ae_dev->priv; 2282 enum hnae3_reset_type reset_type; 2283 2284 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) 2285 return; 2286 2287 reset_type = hclge_log_and_clear_rocee_ras_error(hdev); 2288 if (reset_type != HNAE3_NONE_RESET) 2289 set_bit(reset_type, &ae_dev->hw_err_reset_req); 2290 } 2291 2292 static const struct hclge_hw_blk hw_blk[] = { 2293 { 2294 .msk = BIT(0), 2295 .name = "IGU_EGU", 2296 .config_err_int = hclge_config_igu_egu_hw_err_int, 2297 }, { 2298 .msk = BIT(1), 2299 .name = "PPP", 2300 .config_err_int = hclge_config_ppp_hw_err_int, 2301 }, { 2302 .msk = BIT(2), 2303 .name = "SSU", 2304 .config_err_int = hclge_config_ssu_hw_err_int, 2305 }, { 2306 .msk = BIT(3), 2307 .name = "PPU", 2308 .config_err_int = hclge_config_ppu_hw_err_int, 2309 }, { 2310 .msk = BIT(4), 2311 .name = "TM", 2312 .config_err_int = hclge_config_tm_hw_err_int, 2313 }, { 2314 .msk = BIT(5), 2315 .name = "COMMON", 2316 .config_err_int = hclge_config_common_hw_err_int, 2317 }, { 2318 .msk = BIT(8), 2319 .name = "MAC", 2320 .config_err_int = hclge_config_mac_err_int, 2321 }, { 2322 /* sentinel */ 2323 } 2324 }; 2325 2326 static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable) 2327 { 2328 u32 reg_val; 2329 2330 reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG); 2331 2332 if (enable) 2333 reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 2334 else 2335 reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 2336 2337 hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val); 2338 } 2339 2340 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) 2341 { 2342 const struct hclge_hw_blk *module = hw_blk; 2343 int ret = 0; 2344 2345 hclge_config_all_msix_error(hdev, state); 2346 2347 while (module->name) { 2348 if (module->config_err_int) { 2349 ret = module->config_err_int(hdev, state); 2350 if (ret) 2351 return ret; 2352 } 2353 module++; 2354 } 2355 2356 return ret; 2357 } 2358 2359 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) 2360 { 2361 struct hclge_dev *hdev = ae_dev->priv; 2362 struct device *dev = &hdev->pdev->dev; 2363 u32 status; 2364 2365 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 2366 dev_err(dev, 2367 "Can't recover - RAS error reported during dev init\n"); 2368 return PCI_ERS_RESULT_NONE; 2369 } 2370 2371 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 2372 if (status & HCLGE_RAS_REG_NFE_MASK || 2373 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) 2374 ae_dev->hw_err_reset_req = 0; 2375 else 2376 goto out; 2377 2378 /* Handling Non-fatal HNS RAS errors */ 2379 if (status & HCLGE_RAS_REG_NFE_MASK) { 2380 dev_err(dev, 2381 "HNS Non-Fatal RAS error(status=0x%x) identified\n", 2382 status); 2383 hclge_handle_all_ras_errors(hdev); 2384 } 2385 2386 /* Handling Non-fatal Rocee RAS errors */ 2387 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 && 2388 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { 2389 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n"); 2390 hclge_handle_rocee_ras_error(ae_dev); 2391 } 2392 2393 if (ae_dev->hw_err_reset_req) 2394 return PCI_ERS_RESULT_NEED_RESET; 2395 2396 out: 2397 return PCI_ERS_RESULT_RECOVERED; 2398 } 2399 2400 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev, 2401 struct hclge_desc *desc, bool is_mpf, 2402 u32 bd_num) 2403 { 2404 if (is_mpf) 2405 desc[0].opcode = 2406 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT); 2407 else 2408 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT); 2409 2410 desc[0].flag = cpu_to_le16(HCLGE_COMM_CMD_FLAG_NO_INTR | 2411 HCLGE_COMM_CMD_FLAG_IN); 2412 2413 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num); 2414 } 2415 2416 /* hclge_query_8bd_info: query information about over_8bd_nfe_err 2417 * @hdev: pointer to struct hclge_dev 2418 * @vf_id: Index of the virtual function with error 2419 * @q_id: Physical index of the queue with error 2420 * 2421 * This function get specific index of queue and function which causes 2422 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is 2423 * caused by PF instead of VF. 2424 */ 2425 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id, 2426 u16 *q_id) 2427 { 2428 struct hclge_query_ppu_pf_other_int_dfx_cmd *req; 2429 struct hclge_desc desc; 2430 int ret; 2431 2432 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true); 2433 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2434 if (ret) 2435 return ret; 2436 2437 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data; 2438 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id); 2439 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid); 2440 2441 return 0; 2442 } 2443 2444 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err 2445 * @hdev: pointer to struct hclge_dev 2446 * @reset_requests: reset level that we need to trigger later 2447 * 2448 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in 2449 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed. 2450 */ 2451 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, 2452 unsigned long *reset_requests) 2453 { 2454 struct device *dev = &hdev->pdev->dev; 2455 u16 vf_id; 2456 u16 q_id; 2457 int ret; 2458 2459 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id); 2460 if (ret) { 2461 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n", 2462 ret); 2463 return; 2464 } 2465 2466 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vport(%u), queue_id(%u)\n", 2467 vf_id, q_id); 2468 2469 if (vf_id) { 2470 if (vf_id >= hdev->num_alloc_vport) { 2471 dev_err(dev, "invalid vport(%u)\n", vf_id); 2472 return; 2473 } 2474 2475 /* If we need to trigger other reset whose level is higher 2476 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset 2477 * here. 2478 */ 2479 if (*reset_requests != 0) 2480 return; 2481 2482 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]); 2483 if (ret) 2484 dev_err(dev, "inform reset to vport(%u) failed %d!\n", 2485 vf_id, ret); 2486 } else { 2487 set_bit(HNAE3_FUNC_RESET, reset_requests); 2488 } 2489 } 2490 2491 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors 2492 * @hdev: pointer to struct hclge_dev 2493 * @desc: descriptor for describing the command 2494 * @mpf_bd_num: number of extended command structures 2495 * @reset_requests: record of the reset level that we need 2496 * 2497 * This function handles all the main PF MSI-X errors in the hw register/s 2498 * using command. 2499 */ 2500 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev, 2501 struct hclge_desc *desc, 2502 int mpf_bd_num, 2503 unsigned long *reset_requests) 2504 { 2505 struct device *dev = &hdev->pdev->dev; 2506 __le32 *desc_data; 2507 u32 status; 2508 int ret; 2509 /* query all main PF MSIx errors */ 2510 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, 2511 true); 2512 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); 2513 if (ret) { 2514 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret); 2515 return ret; 2516 } 2517 2518 /* log MAC errors */ 2519 desc_data = (__le32 *)&desc[1]; 2520 status = le32_to_cpu(*desc_data); 2521 if (status) 2522 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", 2523 &hclge_mac_afifo_tnl_int[0], status, 2524 reset_requests); 2525 2526 /* log PPU(RCB) MPF errors */ 2527 desc_data = (__le32 *)&desc[5]; 2528 status = le32_to_cpu(*(desc_data + 2)) & 2529 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; 2530 if (status) 2531 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]", 2532 status); 2533 2534 /* clear all main PF MSIx errors */ 2535 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 2536 if (ret) 2537 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret); 2538 2539 return ret; 2540 } 2541 2542 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors 2543 * @hdev: pointer to struct hclge_dev 2544 * @desc: descriptor for describing the command 2545 * @mpf_bd_num: number of extended command structures 2546 * @reset_requests: record of the reset level that we need 2547 * 2548 * This function handles all the PF MSI-X errors in the hw register/s using 2549 * command. 2550 */ 2551 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev, 2552 struct hclge_desc *desc, 2553 int pf_bd_num, 2554 unsigned long *reset_requests) 2555 { 2556 struct device *dev = &hdev->pdev->dev; 2557 __le32 *desc_data; 2558 u32 status; 2559 int ret; 2560 2561 /* query all PF MSIx errors */ 2562 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, 2563 true); 2564 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); 2565 if (ret) { 2566 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret); 2567 return ret; 2568 } 2569 2570 /* log SSU PF errors */ 2571 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; 2572 if (status) 2573 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 2574 &hclge_ssu_port_based_pf_int[0], 2575 status, reset_requests); 2576 2577 /* read and log PPP PF errors */ 2578 desc_data = (__le32 *)&desc[2]; 2579 status = le32_to_cpu(*desc_data); 2580 if (status) 2581 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", 2582 &hclge_ppp_pf_abnormal_int[0], 2583 status, reset_requests); 2584 2585 /* log PPU(RCB) PF errors */ 2586 desc_data = (__le32 *)&desc[3]; 2587 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; 2588 if (status) 2589 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", 2590 &hclge_ppu_pf_abnormal_int[0], 2591 status, reset_requests); 2592 2593 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK; 2594 if (status) 2595 hclge_handle_over_8bd_err(hdev, reset_requests); 2596 2597 /* clear all PF MSIx errors */ 2598 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 2599 if (ret) 2600 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret); 2601 2602 return ret; 2603 } 2604 2605 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, 2606 unsigned long *reset_requests) 2607 { 2608 u32 mpf_bd_num, pf_bd_num, bd_num; 2609 struct hclge_desc *desc; 2610 int ret; 2611 2612 /* query the number of bds for the MSIx int status */ 2613 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 2614 if (ret) 2615 goto out; 2616 2617 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2618 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2619 if (!desc) 2620 return -ENOMEM; 2621 2622 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num, 2623 reset_requests); 2624 if (ret) 2625 goto msi_error; 2626 2627 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 2628 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests); 2629 if (ret) 2630 goto msi_error; 2631 2632 ret = hclge_handle_mac_tnl(hdev); 2633 2634 msi_error: 2635 kfree(desc); 2636 out: 2637 return ret; 2638 } 2639 2640 int hclge_handle_hw_msix_error(struct hclge_dev *hdev, 2641 unsigned long *reset_requests) 2642 { 2643 struct device *dev = &hdev->pdev->dev; 2644 2645 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 2646 dev_err(dev, 2647 "failed to handle msix error during dev init\n"); 2648 return -EAGAIN; 2649 } 2650 2651 return hclge_handle_all_hw_msix_error(hdev, reset_requests); 2652 } 2653 2654 int hclge_handle_mac_tnl(struct hclge_dev *hdev) 2655 { 2656 struct hclge_mac_tnl_stats mac_tnl_stats; 2657 struct device *dev = &hdev->pdev->dev; 2658 struct hclge_desc desc; 2659 u32 status; 2660 int ret; 2661 2662 /* query and clear mac tnl interruptions */ 2663 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true); 2664 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2665 if (ret) { 2666 dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret); 2667 return ret; 2668 } 2669 2670 status = le32_to_cpu(desc.data[0]); 2671 if (status) { 2672 /* When mac tnl interrupt occurs, we record current time and 2673 * register status here in a fifo, then clear the status. So 2674 * that if link status changes suddenly at some time, we can 2675 * query them by debugfs. 2676 */ 2677 mac_tnl_stats.time = local_clock(); 2678 mac_tnl_stats.status = status; 2679 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats); 2680 ret = hclge_clear_mac_tnl_int(hdev); 2681 if (ret) 2682 dev_err(dev, "failed to clear mac tnl int, ret = %d.\n", 2683 ret); 2684 } 2685 2686 return ret; 2687 } 2688 2689 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) 2690 { 2691 struct hclge_dev *hdev = ae_dev->priv; 2692 struct device *dev = &hdev->pdev->dev; 2693 u32 mpf_bd_num, pf_bd_num, bd_num; 2694 struct hclge_desc *desc; 2695 u32 status; 2696 int ret; 2697 2698 ae_dev->hw_err_reset_req = 0; 2699 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 2700 2701 /* query the number of bds for the MSIx int status */ 2702 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 2703 if (ret) 2704 return; 2705 2706 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2707 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2708 if (!desc) 2709 return; 2710 2711 /* Clear HNS hw errors reported through msix */ 2712 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) - 2713 HCLGE_DESC_NO_DATA_LEN); 2714 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 2715 if (ret) { 2716 dev_err(dev, "fail(%d) to clear mpf msix int during init\n", 2717 ret); 2718 goto msi_error; 2719 } 2720 2721 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) - 2722 HCLGE_DESC_NO_DATA_LEN); 2723 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 2724 if (ret) { 2725 dev_err(dev, "fail(%d) to clear pf msix int during init\n", 2726 ret); 2727 goto msi_error; 2728 } 2729 2730 /* Handle Non-fatal HNS RAS errors */ 2731 if (status & HCLGE_RAS_REG_NFE_MASK) { 2732 dev_err(dev, "HNS hw error(RAS) identified during init\n"); 2733 hclge_handle_all_ras_errors(hdev); 2734 } 2735 2736 msi_error: 2737 kfree(desc); 2738 } 2739 2740 bool hclge_find_error_source(struct hclge_dev *hdev) 2741 { 2742 u32 msix_src_flag, hw_err_src_flag; 2743 2744 msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) & 2745 HCLGE_VECTOR0_REG_MSIX_MASK; 2746 2747 hw_err_src_flag = hclge_read_dev(&hdev->hw, 2748 HCLGE_RAS_PF_OTHER_INT_STS_REG) & 2749 HCLGE_RAS_REG_ERR_MASK; 2750 2751 return msix_src_flag || hw_err_src_flag; 2752 } 2753 2754 void hclge_handle_occurred_error(struct hclge_dev *hdev) 2755 { 2756 struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); 2757 2758 if (hclge_find_error_source(hdev)) 2759 hclge_handle_error_info_log(ae_dev); 2760 } 2761 2762 static void 2763 hclge_handle_error_type_reg_log(struct device *dev, 2764 struct hclge_mod_err_info *mod_info, 2765 struct hclge_type_reg_err_info *type_reg_info) 2766 { 2767 #define HCLGE_ERR_TYPE_MASK 0x7F 2768 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7 2769 2770 u8 mod_id, total_module, type_id, total_type, i, is_ras; 2771 u8 index_module = MODULE_NONE; 2772 u8 index_type = NONE_ERROR; 2773 2774 mod_id = mod_info->mod_id; 2775 type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK; 2776 is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET; 2777 2778 total_module = ARRAY_SIZE(hclge_hw_module_id_st); 2779 total_type = ARRAY_SIZE(hclge_hw_type_id_st); 2780 2781 for (i = 0; i < total_module; i++) { 2782 if (mod_id == hclge_hw_module_id_st[i].module_id) { 2783 index_module = i; 2784 break; 2785 } 2786 } 2787 2788 for (i = 0; i < total_type; i++) { 2789 if (type_id == hclge_hw_type_id_st[i].type_id) { 2790 index_type = i; 2791 break; 2792 } 2793 } 2794 2795 if (index_module != MODULE_NONE && index_type != NONE_ERROR) 2796 dev_err(dev, 2797 "found %s %s, is %s error.\n", 2798 hclge_hw_module_id_st[index_module].msg, 2799 hclge_hw_type_id_st[index_type].msg, 2800 is_ras ? "ras" : "msix"); 2801 else 2802 dev_err(dev, 2803 "unknown module[%u] or type[%u].\n", mod_id, type_id); 2804 2805 dev_err(dev, "reg_value:\n"); 2806 for (i = 0; i < type_reg_info->reg_num; i++) 2807 dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]); 2808 } 2809 2810 static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, 2811 const u32 *buf, u32 buf_size) 2812 { 2813 struct hclge_type_reg_err_info *type_reg_info; 2814 struct hclge_dev *hdev = ae_dev->priv; 2815 struct device *dev = &hdev->pdev->dev; 2816 struct hclge_mod_err_info *mod_info; 2817 struct hclge_sum_err_info *sum_info; 2818 u8 mod_num, err_num, i; 2819 u32 offset = 0; 2820 2821 sum_info = (struct hclge_sum_err_info *)&buf[offset++]; 2822 if (sum_info->reset_type && 2823 sum_info->reset_type != HNAE3_NONE_RESET) 2824 set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req); 2825 mod_num = sum_info->mod_num; 2826 2827 while (mod_num--) { 2828 if (offset >= buf_size) { 2829 dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n", 2830 offset, buf_size); 2831 return; 2832 } 2833 mod_info = (struct hclge_mod_err_info *)&buf[offset++]; 2834 err_num = mod_info->err_num; 2835 2836 for (i = 0; i < err_num; i++) { 2837 if (offset >= buf_size) { 2838 dev_err(dev, 2839 "The offset(%u) exceeds buf size(%u).\n", 2840 offset, buf_size); 2841 return; 2842 } 2843 2844 type_reg_info = (struct hclge_type_reg_err_info *) 2845 &buf[offset++]; 2846 hclge_handle_error_type_reg_log(dev, mod_info, 2847 type_reg_info); 2848 2849 offset += type_reg_info->reg_num; 2850 } 2851 } 2852 } 2853 2854 static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num) 2855 { 2856 struct device *dev = &hdev->pdev->dev; 2857 struct hclge_desc desc_bd; 2858 int ret; 2859 2860 hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true); 2861 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 2862 if (ret) { 2863 dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret); 2864 return ret; 2865 } 2866 2867 *bd_num = le32_to_cpu(desc_bd.data[0]); 2868 if (!(*bd_num)) { 2869 dev_err(dev, "The value of bd_num is 0!\n"); 2870 return -EINVAL; 2871 } 2872 2873 return 0; 2874 } 2875 2876 static int hclge_query_all_err_info(struct hclge_dev *hdev, 2877 struct hclge_desc *desc, u32 bd_num) 2878 { 2879 struct device *dev = &hdev->pdev->dev; 2880 int ret; 2881 2882 hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true); 2883 ret = hclge_cmd_send(&hdev->hw, desc, bd_num); 2884 if (ret) 2885 dev_err(dev, "failed to query error info, ret = %d.\n", ret); 2886 2887 return ret; 2888 } 2889 2890 int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev) 2891 { 2892 u32 bd_num, desc_len, buf_len, buf_size, i; 2893 struct hclge_dev *hdev = ae_dev->priv; 2894 struct hclge_desc *desc; 2895 __le32 *desc_data; 2896 u32 *buf; 2897 int ret; 2898 2899 ret = hclge_query_all_err_bd_num(hdev, &bd_num); 2900 if (ret) 2901 goto out; 2902 2903 desc_len = bd_num * sizeof(struct hclge_desc); 2904 desc = kzalloc(desc_len, GFP_KERNEL); 2905 if (!desc) { 2906 ret = -ENOMEM; 2907 goto out; 2908 } 2909 2910 ret = hclge_query_all_err_info(hdev, desc, bd_num); 2911 if (ret) 2912 goto err_desc; 2913 2914 buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN; 2915 buf_size = buf_len / sizeof(u32); 2916 2917 desc_data = kzalloc(buf_len, GFP_KERNEL); 2918 if (!desc_data) { 2919 ret = -ENOMEM; 2920 goto err_desc; 2921 } 2922 2923 buf = kzalloc(buf_len, GFP_KERNEL); 2924 if (!buf) { 2925 ret = -ENOMEM; 2926 goto err_buf_alloc; 2927 } 2928 2929 memcpy(desc_data, &desc[0].data[0], buf_len); 2930 for (i = 0; i < buf_size; i++) 2931 buf[i] = le32_to_cpu(desc_data[i]); 2932 2933 hclge_handle_error_module_log(ae_dev, buf, buf_size); 2934 kfree(buf); 2935 2936 err_buf_alloc: 2937 kfree(desc_data); 2938 err_desc: 2939 kfree(desc); 2940 out: 2941 return ret; 2942 } 2943