1 // SPDX-License-Identifier: GPL-2.0+ 2 /* Copyright (c) 2016-2017 Hisilicon Limited. */ 3 4 #include "hclge_err.h" 5 6 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { 7 { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err", 8 .reset_level = HNAE3_NONE_RESET }, 9 { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err", 10 .reset_level = HNAE3_NONE_RESET }, 11 { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err", 12 .reset_level = HNAE3_NONE_RESET }, 13 { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err", 14 .reset_level = HNAE3_NONE_RESET }, 15 { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err", 16 .reset_level = HNAE3_NONE_RESET }, 17 { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err", 18 .reset_level = HNAE3_NONE_RESET }, 19 { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err", 20 .reset_level = HNAE3_NONE_RESET }, 21 { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err", 22 .reset_level = HNAE3_NONE_RESET }, 23 { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err", 24 .reset_level = HNAE3_NONE_RESET }, 25 { /* sentinel */ } 26 }; 27 28 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { 29 { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err", 30 .reset_level = HNAE3_NONE_RESET }, 31 { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err", 32 .reset_level = HNAE3_NONE_RESET }, 33 { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err", 34 .reset_level = HNAE3_NONE_RESET }, 35 { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err", 36 .reset_level = HNAE3_NONE_RESET }, 37 { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err", 38 .reset_level = HNAE3_NONE_RESET }, 39 { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err", 40 .reset_level = HNAE3_NONE_RESET }, 41 { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err", 42 .reset_level = HNAE3_NONE_RESET }, 43 { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err", 44 .reset_level = HNAE3_NONE_RESET }, 45 { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err", 46 .reset_level = HNAE3_NONE_RESET }, 47 { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err", 48 .reset_level = HNAE3_NONE_RESET }, 49 { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err", 50 .reset_level = HNAE3_NONE_RESET }, 51 { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err", 52 .reset_level = HNAE3_NONE_RESET }, 53 { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err", 54 .reset_level = HNAE3_NONE_RESET }, 55 { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err", 56 .reset_level = HNAE3_NONE_RESET }, 57 { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err", 58 .reset_level = HNAE3_NONE_RESET }, 59 { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err", 60 .reset_level = HNAE3_NONE_RESET }, 61 { /* sentinel */ } 62 }; 63 64 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { 65 { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err", 66 .reset_level = HNAE3_NONE_RESET }, 67 { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err", 68 .reset_level = HNAE3_NONE_RESET }, 69 { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err", 70 .reset_level = HNAE3_NONE_RESET }, 71 { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err", 72 .reset_level = HNAE3_NONE_RESET }, 73 { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err", 74 .reset_level = HNAE3_NONE_RESET }, 75 { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err", 76 .reset_level = HNAE3_NONE_RESET }, 77 { /* sentinel */ } 78 }; 79 80 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = { 81 { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err", 82 .reset_level = HNAE3_NONE_RESET }, 83 { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err", 84 .reset_level = HNAE3_NONE_RESET }, 85 { /* sentinel */ } 86 }; 87 88 static const struct hclge_hw_error hclge_igu_int[] = { 89 { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err", 90 .reset_level = HNAE3_GLOBAL_RESET }, 91 { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err", 92 .reset_level = HNAE3_GLOBAL_RESET }, 93 { /* sentinel */ } 94 }; 95 96 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { 97 { .int_msk = BIT(0), .msg = "rx_buf_overflow", 98 .reset_level = HNAE3_GLOBAL_RESET }, 99 { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow", 100 .reset_level = HNAE3_GLOBAL_RESET }, 101 { .int_msk = BIT(2), .msg = "rx_stp_fifo_underflow", 102 .reset_level = HNAE3_GLOBAL_RESET }, 103 { .int_msk = BIT(3), .msg = "tx_buf_overflow", 104 .reset_level = HNAE3_GLOBAL_RESET }, 105 { .int_msk = BIT(4), .msg = "tx_buf_underrun", 106 .reset_level = HNAE3_GLOBAL_RESET }, 107 { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow", 108 .reset_level = HNAE3_GLOBAL_RESET }, 109 { /* sentinel */ } 110 }; 111 112 static const struct hclge_hw_error hclge_ncsi_err_int[] = { 113 { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err", 114 .reset_level = HNAE3_NONE_RESET }, 115 { /* sentinel */ } 116 }; 117 118 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = { 119 { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err", 120 .reset_level = HNAE3_GLOBAL_RESET }, 121 { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err", 122 .reset_level = HNAE3_GLOBAL_RESET }, 123 { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err", 124 .reset_level = HNAE3_GLOBAL_RESET }, 125 { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err", 126 .reset_level = HNAE3_GLOBAL_RESET }, 127 { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err", 128 .reset_level = HNAE3_GLOBAL_RESET }, 129 { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err", 130 .reset_level = HNAE3_GLOBAL_RESET }, 131 { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err", 132 .reset_level = HNAE3_GLOBAL_RESET }, 133 { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err", 134 .reset_level = HNAE3_GLOBAL_RESET }, 135 { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err", 136 .reset_level = HNAE3_GLOBAL_RESET }, 137 { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err", 138 .reset_level = HNAE3_GLOBAL_RESET }, 139 { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err", 140 .reset_level = HNAE3_GLOBAL_RESET }, 141 { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err", 142 .reset_level = HNAE3_GLOBAL_RESET }, 143 { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err", 144 .reset_level = HNAE3_GLOBAL_RESET }, 145 { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err", 146 .reset_level = HNAE3_GLOBAL_RESET }, 147 { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err", 148 .reset_level = HNAE3_GLOBAL_RESET }, 149 { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err", 150 .reset_level = HNAE3_GLOBAL_RESET }, 151 { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err", 152 .reset_level = HNAE3_GLOBAL_RESET }, 153 { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err", 154 .reset_level = HNAE3_GLOBAL_RESET }, 155 { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err", 156 .reset_level = HNAE3_GLOBAL_RESET }, 157 { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err", 158 .reset_level = HNAE3_GLOBAL_RESET }, 159 { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err", 160 .reset_level = HNAE3_GLOBAL_RESET }, 161 { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err", 162 .reset_level = HNAE3_GLOBAL_RESET }, 163 { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err", 164 .reset_level = HNAE3_GLOBAL_RESET }, 165 { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err", 166 .reset_level = HNAE3_GLOBAL_RESET }, 167 { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err", 168 .reset_level = HNAE3_GLOBAL_RESET }, 169 { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err", 170 .reset_level = HNAE3_GLOBAL_RESET }, 171 { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err", 172 .reset_level = HNAE3_GLOBAL_RESET }, 173 { .int_msk = BIT(27), .msg = "flow_director_ad_mem0_ecc_mbit_err", 174 .reset_level = HNAE3_GLOBAL_RESET }, 175 { .int_msk = BIT(28), .msg = "flow_director_ad_mem1_ecc_mbit_err", 176 .reset_level = HNAE3_GLOBAL_RESET }, 177 { .int_msk = BIT(29), .msg = "rx_vlan_tag_memory_ecc_mbit_err", 178 .reset_level = HNAE3_GLOBAL_RESET }, 179 { .int_msk = BIT(30), .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err", 180 .reset_level = HNAE3_GLOBAL_RESET }, 181 { /* sentinel */ } 182 }; 183 184 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = { 185 { .int_msk = BIT(0), .msg = "tx_vlan_tag_err", 186 .reset_level = HNAE3_NONE_RESET }, 187 { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err", 188 .reset_level = HNAE3_NONE_RESET }, 189 { /* sentinel */ } 190 }; 191 192 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = { 193 { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err", 194 .reset_level = HNAE3_GLOBAL_RESET }, 195 { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err", 196 .reset_level = HNAE3_GLOBAL_RESET }, 197 { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err", 198 .reset_level = HNAE3_GLOBAL_RESET }, 199 { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err", 200 .reset_level = HNAE3_GLOBAL_RESET }, 201 { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err", 202 .reset_level = HNAE3_GLOBAL_RESET }, 203 { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err", 204 .reset_level = HNAE3_GLOBAL_RESET }, 205 { /* sentinel */ } 206 }; 207 208 static const struct hclge_hw_error hclge_tm_sch_rint[] = { 209 { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err", 210 .reset_level = HNAE3_GLOBAL_RESET }, 211 { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err", 212 .reset_level = HNAE3_GLOBAL_RESET }, 213 { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err", 214 .reset_level = HNAE3_GLOBAL_RESET }, 215 { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err", 216 .reset_level = HNAE3_GLOBAL_RESET }, 217 { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err", 218 .reset_level = HNAE3_GLOBAL_RESET }, 219 { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err", 220 .reset_level = HNAE3_GLOBAL_RESET }, 221 { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err", 222 .reset_level = HNAE3_GLOBAL_RESET }, 223 { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err", 224 .reset_level = HNAE3_GLOBAL_RESET }, 225 { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err", 226 .reset_level = HNAE3_GLOBAL_RESET }, 227 { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err", 228 .reset_level = HNAE3_GLOBAL_RESET }, 229 { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err", 230 .reset_level = HNAE3_GLOBAL_RESET }, 231 { .int_msk = BIT(12), .msg = "tm_sch_port_shap_offset_fifo_wr_err", 232 .reset_level = HNAE3_GLOBAL_RESET }, 233 { .int_msk = BIT(13), .msg = "tm_sch_port_shap_offset_fifo_rd_err", 234 .reset_level = HNAE3_GLOBAL_RESET }, 235 { .int_msk = BIT(14), .msg = "tm_sch_pg_pshap_offset_fifo_wr_err", 236 .reset_level = HNAE3_GLOBAL_RESET }, 237 { .int_msk = BIT(15), .msg = "tm_sch_pg_pshap_offset_fifo_rd_err", 238 .reset_level = HNAE3_GLOBAL_RESET }, 239 { .int_msk = BIT(16), .msg = "tm_sch_pg_cshap_offset_fifo_wr_err", 240 .reset_level = HNAE3_GLOBAL_RESET }, 241 { .int_msk = BIT(17), .msg = "tm_sch_pg_cshap_offset_fifo_rd_err", 242 .reset_level = HNAE3_GLOBAL_RESET }, 243 { .int_msk = BIT(18), .msg = "tm_sch_pri_pshap_offset_fifo_wr_err", 244 .reset_level = HNAE3_GLOBAL_RESET }, 245 { .int_msk = BIT(19), .msg = "tm_sch_pri_pshap_offset_fifo_rd_err", 246 .reset_level = HNAE3_GLOBAL_RESET }, 247 { .int_msk = BIT(20), .msg = "tm_sch_pri_cshap_offset_fifo_wr_err", 248 .reset_level = HNAE3_GLOBAL_RESET }, 249 { .int_msk = BIT(21), .msg = "tm_sch_pri_cshap_offset_fifo_rd_err", 250 .reset_level = HNAE3_GLOBAL_RESET }, 251 { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err", 252 .reset_level = HNAE3_GLOBAL_RESET }, 253 { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err", 254 .reset_level = HNAE3_GLOBAL_RESET }, 255 { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err", 256 .reset_level = HNAE3_GLOBAL_RESET }, 257 { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err", 258 .reset_level = HNAE3_GLOBAL_RESET }, 259 { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err", 260 .reset_level = HNAE3_GLOBAL_RESET }, 261 { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err", 262 .reset_level = HNAE3_GLOBAL_RESET }, 263 { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err", 264 .reset_level = HNAE3_GLOBAL_RESET }, 265 { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err", 266 .reset_level = HNAE3_GLOBAL_RESET }, 267 { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err", 268 .reset_level = HNAE3_GLOBAL_RESET }, 269 { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err", 270 .reset_level = HNAE3_GLOBAL_RESET }, 271 { /* sentinel */ } 272 }; 273 274 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = { 275 { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err", 276 .reset_level = HNAE3_GLOBAL_RESET }, 277 { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err", 278 .reset_level = HNAE3_GLOBAL_RESET }, 279 { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err", 280 .reset_level = HNAE3_GLOBAL_RESET }, 281 { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err", 282 .reset_level = HNAE3_GLOBAL_RESET }, 283 { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err", 284 .reset_level = HNAE3_GLOBAL_RESET }, 285 { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err", 286 .reset_level = HNAE3_GLOBAL_RESET }, 287 { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err", 288 .reset_level = HNAE3_GLOBAL_RESET }, 289 { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err", 290 .reset_level = HNAE3_GLOBAL_RESET }, 291 { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err", 292 .reset_level = HNAE3_GLOBAL_RESET }, 293 { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err", 294 .reset_level = HNAE3_GLOBAL_RESET }, 295 { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err", 296 .reset_level = HNAE3_GLOBAL_RESET }, 297 { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err", 298 .reset_level = HNAE3_GLOBAL_RESET }, 299 { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err", 300 .reset_level = HNAE3_GLOBAL_RESET }, 301 { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err", 302 .reset_level = HNAE3_GLOBAL_RESET }, 303 { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err", 304 .reset_level = HNAE3_GLOBAL_RESET }, 305 { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err", 306 .reset_level = HNAE3_GLOBAL_RESET }, 307 { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err", 308 .reset_level = HNAE3_GLOBAL_RESET }, 309 { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err", 310 .reset_level = HNAE3_GLOBAL_RESET }, 311 { /* sentinel */ } 312 }; 313 314 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = { 315 { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err", 316 .reset_level = HNAE3_GLOBAL_RESET }, 317 { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err", 318 .reset_level = HNAE3_GLOBAL_RESET }, 319 { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err", 320 .reset_level = HNAE3_GLOBAL_RESET }, 321 { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err", 322 .reset_level = HNAE3_GLOBAL_RESET }, 323 { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err", 324 .reset_level = HNAE3_GLOBAL_RESET }, 325 { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err", 326 .reset_level = HNAE3_GLOBAL_RESET }, 327 { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err", 328 .reset_level = HNAE3_GLOBAL_RESET }, 329 { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err", 330 .reset_level = HNAE3_GLOBAL_RESET }, 331 { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err", 332 .reset_level = HNAE3_GLOBAL_RESET }, 333 { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err", 334 .reset_level = HNAE3_GLOBAL_RESET }, 335 { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err", 336 .reset_level = HNAE3_GLOBAL_RESET }, 337 { /* sentinel */ } 338 }; 339 340 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = { 341 { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err", 342 .reset_level = HNAE3_NONE_RESET }, 343 { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err", 344 .reset_level = HNAE3_GLOBAL_RESET }, 345 { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err", 346 .reset_level = HNAE3_NONE_RESET }, 347 { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err", 348 .reset_level = HNAE3_GLOBAL_RESET }, 349 { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err", 350 .reset_level = HNAE3_NONE_RESET }, 351 { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err", 352 .reset_level = HNAE3_GLOBAL_RESET }, 353 { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err", 354 .reset_level = HNAE3_NONE_RESET }, 355 { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err", 356 .reset_level = HNAE3_GLOBAL_RESET }, 357 { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err", 358 .reset_level = HNAE3_GLOBAL_RESET }, 359 { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err", 360 .reset_level = HNAE3_GLOBAL_RESET }, 361 { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err", 362 .reset_level = HNAE3_GLOBAL_RESET }, 363 { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err", 364 .reset_level = HNAE3_GLOBAL_RESET }, 365 { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err", 366 .reset_level = HNAE3_GLOBAL_RESET }, 367 { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err", 368 .reset_level = HNAE3_GLOBAL_RESET }, 369 { /* sentinel */ } 370 }; 371 372 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = { 373 { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err", 374 .reset_level = HNAE3_GLOBAL_RESET }, 375 { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err", 376 .reset_level = HNAE3_GLOBAL_RESET }, 377 { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err", 378 .reset_level = HNAE3_GLOBAL_RESET }, 379 { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err", 380 .reset_level = HNAE3_GLOBAL_RESET }, 381 { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err", 382 .reset_level = HNAE3_GLOBAL_RESET }, 383 { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err", 384 .reset_level = HNAE3_GLOBAL_RESET }, 385 { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err", 386 .reset_level = HNAE3_GLOBAL_RESET }, 387 { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err", 388 .reset_level = HNAE3_GLOBAL_RESET }, 389 { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err", 390 .reset_level = HNAE3_GLOBAL_RESET }, 391 { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err", 392 .reset_level = HNAE3_GLOBAL_RESET }, 393 { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err", 394 .reset_level = HNAE3_GLOBAL_RESET }, 395 { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err", 396 .reset_level = HNAE3_GLOBAL_RESET }, 397 { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err", 398 .reset_level = HNAE3_GLOBAL_RESET }, 399 { .int_msk = BIT(26), .msg = "rd_bus_err", 400 .reset_level = HNAE3_GLOBAL_RESET }, 401 { .int_msk = BIT(27), .msg = "wr_bus_err", 402 .reset_level = HNAE3_GLOBAL_RESET }, 403 { .int_msk = BIT(28), .msg = "reg_search_miss", 404 .reset_level = HNAE3_GLOBAL_RESET }, 405 { .int_msk = BIT(29), .msg = "rx_q_search_miss", 406 .reset_level = HNAE3_NONE_RESET }, 407 { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect", 408 .reset_level = HNAE3_NONE_RESET }, 409 { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl", 410 .reset_level = HNAE3_GLOBAL_RESET }, 411 { /* sentinel */ } 412 }; 413 414 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = { 415 { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err", 416 .reset_level = HNAE3_GLOBAL_RESET }, 417 { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err", 418 .reset_level = HNAE3_GLOBAL_RESET }, 419 { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err", 420 .reset_level = HNAE3_GLOBAL_RESET }, 421 { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err", 422 .reset_level = HNAE3_GLOBAL_RESET }, 423 { /* sentinel */ } 424 }; 425 426 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = { 427 { .int_msk = BIT(0), .msg = "over_8bd_no_fe", 428 .reset_level = HNAE3_FUNC_RESET }, 429 { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err", 430 .reset_level = HNAE3_NONE_RESET }, 431 { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err", 432 .reset_level = HNAE3_NONE_RESET }, 433 { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison", 434 .reset_level = HNAE3_FUNC_RESET }, 435 { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison", 436 .reset_level = HNAE3_FUNC_RESET }, 437 { .int_msk = BIT(5), .msg = "buf_wait_timeout", 438 .reset_level = HNAE3_NONE_RESET }, 439 { /* sentinel */ } 440 }; 441 442 static const struct hclge_hw_error hclge_ssu_com_err_int[] = { 443 { .int_msk = BIT(0), .msg = "buf_sum_err", 444 .reset_level = HNAE3_NONE_RESET }, 445 { .int_msk = BIT(1), .msg = "ppp_mb_num_err", 446 .reset_level = HNAE3_NONE_RESET }, 447 { .int_msk = BIT(2), .msg = "ppp_mbid_err", 448 .reset_level = HNAE3_GLOBAL_RESET }, 449 { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err", 450 .reset_level = HNAE3_GLOBAL_RESET }, 451 { .int_msk = BIT(4), .msg = "ppp_rlt_host_err", 452 .reset_level = HNAE3_GLOBAL_RESET }, 453 { .int_msk = BIT(5), .msg = "cks_edit_position_err", 454 .reset_level = HNAE3_GLOBAL_RESET }, 455 { .int_msk = BIT(6), .msg = "cks_edit_condition_err", 456 .reset_level = HNAE3_GLOBAL_RESET }, 457 { .int_msk = BIT(7), .msg = "vlan_edit_condition_err", 458 .reset_level = HNAE3_GLOBAL_RESET }, 459 { .int_msk = BIT(8), .msg = "vlan_num_ot_err", 460 .reset_level = HNAE3_GLOBAL_RESET }, 461 { .int_msk = BIT(9), .msg = "vlan_num_in_err", 462 .reset_level = HNAE3_GLOBAL_RESET }, 463 { /* sentinel */ } 464 }; 465 466 #define HCLGE_SSU_MEM_ECC_ERR(x) \ 467 { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err", \ 468 .reset_level = HNAE3_GLOBAL_RESET } 469 470 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = { 471 HCLGE_SSU_MEM_ECC_ERR(0), 472 HCLGE_SSU_MEM_ECC_ERR(1), 473 HCLGE_SSU_MEM_ECC_ERR(2), 474 HCLGE_SSU_MEM_ECC_ERR(3), 475 HCLGE_SSU_MEM_ECC_ERR(4), 476 HCLGE_SSU_MEM_ECC_ERR(5), 477 HCLGE_SSU_MEM_ECC_ERR(6), 478 HCLGE_SSU_MEM_ECC_ERR(7), 479 HCLGE_SSU_MEM_ECC_ERR(8), 480 HCLGE_SSU_MEM_ECC_ERR(9), 481 HCLGE_SSU_MEM_ECC_ERR(10), 482 HCLGE_SSU_MEM_ECC_ERR(11), 483 HCLGE_SSU_MEM_ECC_ERR(12), 484 HCLGE_SSU_MEM_ECC_ERR(13), 485 HCLGE_SSU_MEM_ECC_ERR(14), 486 HCLGE_SSU_MEM_ECC_ERR(15), 487 HCLGE_SSU_MEM_ECC_ERR(16), 488 HCLGE_SSU_MEM_ECC_ERR(17), 489 HCLGE_SSU_MEM_ECC_ERR(18), 490 HCLGE_SSU_MEM_ECC_ERR(19), 491 HCLGE_SSU_MEM_ECC_ERR(20), 492 HCLGE_SSU_MEM_ECC_ERR(21), 493 HCLGE_SSU_MEM_ECC_ERR(22), 494 HCLGE_SSU_MEM_ECC_ERR(23), 495 HCLGE_SSU_MEM_ECC_ERR(24), 496 HCLGE_SSU_MEM_ECC_ERR(25), 497 HCLGE_SSU_MEM_ECC_ERR(26), 498 HCLGE_SSU_MEM_ECC_ERR(27), 499 HCLGE_SSU_MEM_ECC_ERR(28), 500 HCLGE_SSU_MEM_ECC_ERR(29), 501 HCLGE_SSU_MEM_ECC_ERR(30), 502 HCLGE_SSU_MEM_ECC_ERR(31), 503 { /* sentinel */ } 504 }; 505 506 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { 507 { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port", 508 .reset_level = HNAE3_FUNC_RESET }, 509 { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port", 510 .reset_level = HNAE3_GLOBAL_RESET }, 511 { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port", 512 .reset_level = HNAE3_GLOBAL_RESET }, 513 { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port", 514 .reset_level = HNAE3_GLOBAL_RESET }, 515 { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port", 516 .reset_level = HNAE3_GLOBAL_RESET }, 517 { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port", 518 .reset_level = HNAE3_GLOBAL_RESET }, 519 { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port", 520 .reset_level = HNAE3_GLOBAL_RESET }, 521 { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port", 522 .reset_level = HNAE3_GLOBAL_RESET }, 523 { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port", 524 .reset_level = HNAE3_GLOBAL_RESET }, 525 { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port", 526 .reset_level = HNAE3_GLOBAL_RESET }, 527 { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port", 528 .reset_level = HNAE3_GLOBAL_RESET }, 529 { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port", 530 .reset_level = HNAE3_GLOBAL_RESET }, 531 { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port", 532 .reset_level = HNAE3_GLOBAL_RESET }, 533 { /* sentinel */ } 534 }; 535 536 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = { 537 { .int_msk = BIT(0), .msg = "ig_mac_inf_int", 538 .reset_level = HNAE3_GLOBAL_RESET }, 539 { .int_msk = BIT(1), .msg = "ig_host_inf_int", 540 .reset_level = HNAE3_GLOBAL_RESET }, 541 { .int_msk = BIT(2), .msg = "ig_roc_buf_int", 542 .reset_level = HNAE3_GLOBAL_RESET }, 543 { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int", 544 .reset_level = HNAE3_GLOBAL_RESET }, 545 { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int", 546 .reset_level = HNAE3_GLOBAL_RESET }, 547 { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int", 548 .reset_level = HNAE3_GLOBAL_RESET }, 549 { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int", 550 .reset_level = HNAE3_GLOBAL_RESET }, 551 { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int", 552 .reset_level = HNAE3_GLOBAL_RESET }, 553 { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int", 554 .reset_level = HNAE3_GLOBAL_RESET }, 555 { .int_msk = BIT(9), .msg = "qm_eof_fifo_int", 556 .reset_level = HNAE3_GLOBAL_RESET }, 557 { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int", 558 .reset_level = HNAE3_GLOBAL_RESET }, 559 { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int", 560 .reset_level = HNAE3_GLOBAL_RESET }, 561 { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int", 562 .reset_level = HNAE3_GLOBAL_RESET }, 563 { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int", 564 .reset_level = HNAE3_GLOBAL_RESET }, 565 { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int", 566 .reset_level = HNAE3_GLOBAL_RESET }, 567 { .int_msk = BIT(15), .msg = "host_cmd_fifo_int", 568 .reset_level = HNAE3_GLOBAL_RESET }, 569 { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int", 570 .reset_level = HNAE3_GLOBAL_RESET }, 571 { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int", 572 .reset_level = HNAE3_GLOBAL_RESET }, 573 { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int", 574 .reset_level = HNAE3_GLOBAL_RESET }, 575 { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int", 576 .reset_level = HNAE3_GLOBAL_RESET }, 577 { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int", 578 .reset_level = HNAE3_GLOBAL_RESET }, 579 { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int", 580 .reset_level = HNAE3_GLOBAL_RESET }, 581 { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int", 582 .reset_level = HNAE3_GLOBAL_RESET }, 583 { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int", 584 .reset_level = HNAE3_GLOBAL_RESET }, 585 { /* sentinel */ } 586 }; 587 588 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = { 589 { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg", 590 .reset_level = HNAE3_GLOBAL_RESET }, 591 { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg", 592 .reset_level = HNAE3_GLOBAL_RESET }, 593 { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg", 594 .reset_level = HNAE3_GLOBAL_RESET }, 595 { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg", 596 .reset_level = HNAE3_GLOBAL_RESET }, 597 { /* sentinel */ } 598 }; 599 600 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = { 601 { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port", 602 .reset_level = HNAE3_FUNC_RESET }, 603 { .int_msk = BIT(9), .msg = "low_water_line_err_port", 604 .reset_level = HNAE3_NONE_RESET }, 605 { .int_msk = BIT(10), .msg = "hi_water_line_err_port", 606 .reset_level = HNAE3_GLOBAL_RESET }, 607 { /* sentinel */ } 608 }; 609 610 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { 611 { .int_msk = 0, .msg = "rocee qmm ovf: sgid invalid err" }, 612 { .int_msk = 0x4, .msg = "rocee qmm ovf: sgid ovf err" }, 613 { .int_msk = 0x8, .msg = "rocee qmm ovf: smac invalid err" }, 614 { .int_msk = 0xC, .msg = "rocee qmm ovf: smac ovf err" }, 615 { .int_msk = 0x10, .msg = "rocee qmm ovf: cqc invalid err" }, 616 { .int_msk = 0x11, .msg = "rocee qmm ovf: cqc ovf err" }, 617 { .int_msk = 0x12, .msg = "rocee qmm ovf: cqc hopnum err" }, 618 { .int_msk = 0x13, .msg = "rocee qmm ovf: cqc ba0 err" }, 619 { .int_msk = 0x14, .msg = "rocee qmm ovf: srqc invalid err" }, 620 { .int_msk = 0x15, .msg = "rocee qmm ovf: srqc ovf err" }, 621 { .int_msk = 0x16, .msg = "rocee qmm ovf: srqc hopnum err" }, 622 { .int_msk = 0x17, .msg = "rocee qmm ovf: srqc ba0 err" }, 623 { .int_msk = 0x18, .msg = "rocee qmm ovf: mpt invalid err" }, 624 { .int_msk = 0x19, .msg = "rocee qmm ovf: mpt ovf err" }, 625 { .int_msk = 0x1A, .msg = "rocee qmm ovf: mpt hopnum err" }, 626 { .int_msk = 0x1B, .msg = "rocee qmm ovf: mpt ba0 err" }, 627 { .int_msk = 0x1C, .msg = "rocee qmm ovf: qpc invalid err" }, 628 { .int_msk = 0x1D, .msg = "rocee qmm ovf: qpc ovf err" }, 629 { .int_msk = 0x1E, .msg = "rocee qmm ovf: qpc hopnum err" }, 630 { .int_msk = 0x1F, .msg = "rocee qmm ovf: qpc ba0 err" }, 631 { /* sentinel */ } 632 }; 633 634 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = { 635 { 636 .module_id = MODULE_NONE, 637 .msg = "MODULE_NONE" 638 }, { 639 .module_id = MODULE_BIOS_COMMON, 640 .msg = "MODULE_BIOS_COMMON" 641 }, { 642 .module_id = MODULE_GE, 643 .msg = "MODULE_GE" 644 }, { 645 .module_id = MODULE_IGU_EGU, 646 .msg = "MODULE_IGU_EGU" 647 }, { 648 .module_id = MODULE_LGE, 649 .msg = "MODULE_LGE" 650 }, { 651 .module_id = MODULE_NCSI, 652 .msg = "MODULE_NCSI" 653 }, { 654 .module_id = MODULE_PPP, 655 .msg = "MODULE_PPP" 656 }, { 657 .module_id = MODULE_QCN, 658 .msg = "MODULE_QCN" 659 }, { 660 .module_id = MODULE_RCB_RX, 661 .msg = "MODULE_RCB_RX" 662 }, { 663 .module_id = MODULE_RTC, 664 .msg = "MODULE_RTC" 665 }, { 666 .module_id = MODULE_SSU, 667 .msg = "MODULE_SSU" 668 }, { 669 .module_id = MODULE_TM, 670 .msg = "MODULE_TM" 671 }, { 672 .module_id = MODULE_RCB_TX, 673 .msg = "MODULE_RCB_TX" 674 }, { 675 .module_id = MODULE_TXDMA, 676 .msg = "MODULE_TXDMA" 677 }, { 678 .module_id = MODULE_MASTER, 679 .msg = "MODULE_MASTER" 680 }, { 681 .module_id = MODULE_ROCEE_TOP, 682 .msg = "MODULE_ROCEE_TOP" 683 }, { 684 .module_id = MODULE_ROCEE_TIMER, 685 .msg = "MODULE_ROCEE_TIMER" 686 }, { 687 .module_id = MODULE_ROCEE_MDB, 688 .msg = "MODULE_ROCEE_MDB" 689 }, { 690 .module_id = MODULE_ROCEE_TSP, 691 .msg = "MODULE_ROCEE_TSP" 692 }, { 693 .module_id = MODULE_ROCEE_TRP, 694 .msg = "MODULE_ROCEE_TRP" 695 }, { 696 .module_id = MODULE_ROCEE_SCC, 697 .msg = "MODULE_ROCEE_SCC" 698 }, { 699 .module_id = MODULE_ROCEE_CAEP, 700 .msg = "MODULE_ROCEE_CAEP" 701 }, { 702 .module_id = MODULE_ROCEE_GEN_AC, 703 .msg = "MODULE_ROCEE_GEN_AC" 704 }, { 705 .module_id = MODULE_ROCEE_QMM, 706 .msg = "MODULE_ROCEE_QMM" 707 }, { 708 .module_id = MODULE_ROCEE_LSAN, 709 .msg = "MODULE_ROCEE_LSAN" 710 } 711 }; 712 713 static const struct hclge_hw_type_id hclge_hw_type_id_st[] = { 714 { 715 .type_id = NONE_ERROR, 716 .msg = "none_error" 717 }, { 718 .type_id = FIFO_ERROR, 719 .msg = "fifo_error" 720 }, { 721 .type_id = MEMORY_ERROR, 722 .msg = "memory_error" 723 }, { 724 .type_id = POISON_ERROR, 725 .msg = "poison_error" 726 }, { 727 .type_id = MSIX_ECC_ERROR, 728 .msg = "msix_ecc_error" 729 }, { 730 .type_id = TQP_INT_ECC_ERROR, 731 .msg = "tqp_int_ecc_error" 732 }, { 733 .type_id = PF_ABNORMAL_INT_ERROR, 734 .msg = "pf_abnormal_int_error" 735 }, { 736 .type_id = MPF_ABNORMAL_INT_ERROR, 737 .msg = "mpf_abnormal_int_error" 738 }, { 739 .type_id = COMMON_ERROR, 740 .msg = "common_error" 741 }, { 742 .type_id = PORT_ERROR, 743 .msg = "port_error" 744 }, { 745 .type_id = ETS_ERROR, 746 .msg = "ets_error" 747 }, { 748 .type_id = NCSI_ERROR, 749 .msg = "ncsi_error" 750 }, { 751 .type_id = GLB_ERROR, 752 .msg = "glb_error" 753 }, { 754 .type_id = ROCEE_NORMAL_ERR, 755 .msg = "rocee_normal_error" 756 }, { 757 .type_id = ROCEE_OVF_ERR, 758 .msg = "rocee_ovf_error" 759 } 760 }; 761 762 static void hclge_log_error(struct device *dev, char *reg, 763 const struct hclge_hw_error *err, 764 u32 err_sts, unsigned long *reset_requests) 765 { 766 while (err->msg) { 767 if (err->int_msk & err_sts) { 768 dev_err(dev, "%s %s found [error status=0x%x]\n", 769 reg, err->msg, err_sts); 770 if (err->reset_level && 771 err->reset_level != HNAE3_NONE_RESET) 772 set_bit(err->reset_level, reset_requests); 773 } 774 err++; 775 } 776 } 777 778 /* hclge_cmd_query_error: read the error information 779 * @hdev: pointer to struct hclge_dev 780 * @desc: descriptor for describing the command 781 * @cmd: command opcode 782 * @flag: flag for extended command structure 783 * 784 * This function query the error info from hw register/s using command 785 */ 786 static int hclge_cmd_query_error(struct hclge_dev *hdev, 787 struct hclge_desc *desc, u32 cmd, u16 flag) 788 { 789 struct device *dev = &hdev->pdev->dev; 790 int desc_num = 1; 791 int ret; 792 793 hclge_cmd_setup_basic_desc(&desc[0], cmd, true); 794 if (flag) { 795 desc[0].flag |= cpu_to_le16(flag); 796 hclge_cmd_setup_basic_desc(&desc[1], cmd, true); 797 desc_num = 2; 798 } 799 800 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 801 if (ret) 802 dev_err(dev, "query error cmd failed (%d)\n", ret); 803 804 return ret; 805 } 806 807 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev) 808 { 809 struct hclge_desc desc; 810 811 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false); 812 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR); 813 814 return hclge_cmd_send(&hdev->hw, &desc, 1); 815 } 816 817 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en) 818 { 819 struct device *dev = &hdev->pdev->dev; 820 struct hclge_desc desc[2]; 821 int ret; 822 823 /* configure common error interrupts */ 824 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); 825 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 826 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); 827 828 if (en) { 829 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); 830 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | 831 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); 832 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); 833 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN | 834 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN); 835 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); 836 } 837 838 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); 839 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | 840 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); 841 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); 842 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK | 843 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK); 844 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); 845 846 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 847 if (ret) 848 dev_err(dev, 849 "fail(%d) to configure common err interrupts\n", ret); 850 851 return ret; 852 } 853 854 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en) 855 { 856 struct device *dev = &hdev->pdev->dev; 857 struct hclge_desc desc; 858 int ret; 859 860 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) 861 return 0; 862 863 /* configure NCSI error interrupts */ 864 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false); 865 if (en) 866 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN); 867 868 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 869 if (ret) 870 dev_err(dev, 871 "fail(%d) to configure NCSI error interrupts\n", ret); 872 873 return ret; 874 } 875 876 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en) 877 { 878 struct device *dev = &hdev->pdev->dev; 879 struct hclge_desc desc; 880 int ret; 881 882 /* configure IGU,EGU error interrupts */ 883 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false); 884 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE); 885 if (en) 886 desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN); 887 888 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK); 889 890 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 891 if (ret) { 892 dev_err(dev, 893 "fail(%d) to configure IGU common interrupts\n", ret); 894 return ret; 895 } 896 897 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false); 898 if (en) 899 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN); 900 901 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK); 902 903 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 904 if (ret) { 905 dev_err(dev, 906 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret); 907 return ret; 908 } 909 910 ret = hclge_config_ncsi_hw_err_int(hdev, en); 911 912 return ret; 913 } 914 915 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, 916 bool en) 917 { 918 struct device *dev = &hdev->pdev->dev; 919 struct hclge_desc desc[2]; 920 int ret; 921 922 /* configure PPP error interrupts */ 923 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 924 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 925 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 926 927 if (cmd == HCLGE_PPP_CMD0_INT_CMD) { 928 if (en) { 929 desc[0].data[0] = 930 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN); 931 desc[0].data[1] = 932 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN); 933 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN); 934 } 935 936 desc[1].data[0] = 937 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK); 938 desc[1].data[1] = 939 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK); 940 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 941 desc[1].data[2] = 942 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK); 943 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { 944 if (en) { 945 desc[0].data[0] = 946 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN); 947 desc[0].data[1] = 948 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN); 949 } 950 951 desc[1].data[0] = 952 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK); 953 desc[1].data[1] = 954 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK); 955 } 956 957 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 958 if (ret) 959 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret); 960 961 return ret; 962 } 963 964 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en) 965 { 966 int ret; 967 968 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, 969 en); 970 if (ret) 971 return ret; 972 973 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, 974 en); 975 976 return ret; 977 } 978 979 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) 980 { 981 struct device *dev = &hdev->pdev->dev; 982 struct hclge_desc desc; 983 int ret; 984 985 /* configure TM SCH hw errors */ 986 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false); 987 if (en) 988 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN); 989 990 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 991 if (ret) { 992 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret); 993 return ret; 994 } 995 996 /* configure TM QCN hw errors */ 997 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false); 998 if (en) 999 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); 1000 1001 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1002 if (ret) 1003 dev_err(dev, 1004 "fail(%d) to configure TM QCN mem errors\n", ret); 1005 1006 return ret; 1007 } 1008 1009 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en) 1010 { 1011 struct device *dev = &hdev->pdev->dev; 1012 struct hclge_desc desc; 1013 int ret; 1014 1015 /* configure MAC common error interrupts */ 1016 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false); 1017 if (en) 1018 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN); 1019 1020 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK); 1021 1022 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1023 if (ret) 1024 dev_err(dev, 1025 "fail(%d) to configure MAC COMMON error intr\n", ret); 1026 1027 return ret; 1028 } 1029 1030 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en) 1031 { 1032 struct hclge_desc desc; 1033 1034 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false); 1035 if (en) 1036 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN); 1037 else 1038 desc.data[0] = 0; 1039 1040 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK); 1041 1042 return hclge_cmd_send(&hdev->hw, &desc, 1); 1043 } 1044 1045 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, 1046 bool en) 1047 { 1048 struct device *dev = &hdev->pdev->dev; 1049 struct hclge_desc desc[2]; 1050 int desc_num = 1; 1051 int ret; 1052 1053 /* configure PPU error interrupts */ 1054 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) { 1055 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1056 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1057 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 1058 if (en) { 1059 desc[0].data[0] = 1060 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN); 1061 desc[0].data[1] = 1062 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN); 1063 desc[1].data[3] = 1064 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN); 1065 desc[1].data[4] = 1066 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN); 1067 } 1068 1069 desc[1].data[0] = 1070 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK); 1071 desc[1].data[1] = 1072 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK); 1073 desc[1].data[2] = 1074 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK); 1075 desc[1].data[3] |= 1076 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK); 1077 desc_num = 2; 1078 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) { 1079 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1080 if (en) 1081 desc[0].data[0] = 1082 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2); 1083 1084 desc[0].data[2] = 1085 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK); 1086 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) { 1087 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1088 if (en) 1089 desc[0].data[0] = 1090 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN); 1091 1092 desc[0].data[2] = 1093 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK); 1094 } else { 1095 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n"); 1096 return -EINVAL; 1097 } 1098 1099 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 1100 1101 return ret; 1102 } 1103 1104 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en) 1105 { 1106 struct device *dev = &hdev->pdev->dev; 1107 int ret; 1108 1109 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD, 1110 en); 1111 if (ret) { 1112 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n", 1113 ret); 1114 return ret; 1115 } 1116 1117 ret = hclge_config_ppu_error_interrupts(hdev, 1118 HCLGE_PPU_MPF_OTHER_INT_CMD, 1119 en); 1120 if (ret) { 1121 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret); 1122 return ret; 1123 } 1124 1125 ret = hclge_config_ppu_error_interrupts(hdev, 1126 HCLGE_PPU_PF_OTHER_INT_CMD, en); 1127 if (ret) 1128 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n", 1129 ret); 1130 return ret; 1131 } 1132 1133 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) 1134 { 1135 struct device *dev = &hdev->pdev->dev; 1136 struct hclge_desc desc[2]; 1137 int ret; 1138 1139 /* configure SSU ecc error interrupts */ 1140 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false); 1141 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1142 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false); 1143 if (en) { 1144 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN); 1145 desc[0].data[1] = 1146 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN); 1147 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN); 1148 } 1149 1150 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK); 1151 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK); 1152 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK); 1153 1154 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1155 if (ret) { 1156 dev_err(dev, 1157 "fail(%d) to configure SSU ECC error interrupt\n", ret); 1158 return ret; 1159 } 1160 1161 /* configure SSU common error interrupts */ 1162 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false); 1163 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1164 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false); 1165 1166 if (en) { 1167 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1168 desc[0].data[0] = 1169 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN); 1170 else 1171 desc[0].data[0] = 1172 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5)); 1173 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN); 1174 desc[0].data[2] = 1175 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN); 1176 } 1177 1178 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK | 1179 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK); 1180 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK); 1181 1182 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1183 if (ret) 1184 dev_err(dev, 1185 "fail(%d) to configure SSU COMMON error intr\n", ret); 1186 1187 return ret; 1188 } 1189 1190 /* hclge_query_bd_num: query number of buffer descriptors 1191 * @hdev: pointer to struct hclge_dev 1192 * @is_ras: true for ras, false for msix 1193 * @mpf_bd_num: number of main PF interrupt buffer descriptors 1194 * @pf_bd_num: number of not main PF interrupt buffer descriptors 1195 * 1196 * This function querys number of mpf and pf buffer descriptors. 1197 */ 1198 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, 1199 u32 *mpf_bd_num, u32 *pf_bd_num) 1200 { 1201 struct device *dev = &hdev->pdev->dev; 1202 u32 mpf_min_bd_num, pf_min_bd_num; 1203 enum hclge_opcode_type opcode; 1204 struct hclge_desc desc_bd; 1205 int ret; 1206 1207 if (is_ras) { 1208 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM; 1209 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM; 1210 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM; 1211 } else { 1212 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM; 1213 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM; 1214 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM; 1215 } 1216 1217 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true); 1218 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 1219 if (ret) { 1220 dev_err(dev, "fail(%d) to query msix int status bd num\n", 1221 ret); 1222 return ret; 1223 } 1224 1225 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]); 1226 *pf_bd_num = le32_to_cpu(desc_bd.data[1]); 1227 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) { 1228 dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n", 1229 *mpf_bd_num, *pf_bd_num); 1230 return -EINVAL; 1231 } 1232 1233 return 0; 1234 } 1235 1236 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors 1237 * @hdev: pointer to struct hclge_dev 1238 * @desc: descriptor for describing the command 1239 * @num: number of extended command structures 1240 * 1241 * This function handles all the main PF RAS errors in the 1242 * hw register/s using command. 1243 */ 1244 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, 1245 struct hclge_desc *desc, 1246 int num) 1247 { 1248 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1249 struct device *dev = &hdev->pdev->dev; 1250 __le32 *desc_data; 1251 u32 status; 1252 int ret; 1253 1254 /* query all main PF RAS errors */ 1255 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT, 1256 true); 1257 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1258 if (ret) { 1259 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret); 1260 return ret; 1261 } 1262 1263 /* log HNS common errors */ 1264 status = le32_to_cpu(desc[0].data[0]); 1265 if (status) 1266 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", 1267 &hclge_imp_tcm_ecc_int[0], status, 1268 &ae_dev->hw_err_reset_req); 1269 1270 status = le32_to_cpu(desc[0].data[1]); 1271 if (status) 1272 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", 1273 &hclge_cmdq_nic_mem_ecc_int[0], status, 1274 &ae_dev->hw_err_reset_req); 1275 1276 if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) 1277 dev_warn(dev, "imp_rd_data_poison_err found\n"); 1278 1279 status = le32_to_cpu(desc[0].data[3]); 1280 if (status) 1281 hclge_log_error(dev, "TQP_INT_ECC_INT_STS", 1282 &hclge_tqp_int_ecc_int[0], status, 1283 &ae_dev->hw_err_reset_req); 1284 1285 status = le32_to_cpu(desc[0].data[4]); 1286 if (status) 1287 hclge_log_error(dev, "MSIX_ECC_INT_STS", 1288 &hclge_msix_sram_ecc_int[0], status, 1289 &ae_dev->hw_err_reset_req); 1290 1291 /* log SSU(Storage Switch Unit) errors */ 1292 desc_data = (__le32 *)&desc[2]; 1293 status = le32_to_cpu(*(desc_data + 2)); 1294 if (status) 1295 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", 1296 &hclge_ssu_mem_ecc_err_int[0], status, 1297 &ae_dev->hw_err_reset_req); 1298 1299 status = le32_to_cpu(*(desc_data + 3)) & BIT(0); 1300 if (status) { 1301 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n", 1302 status); 1303 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1304 } 1305 1306 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; 1307 if (status) 1308 hclge_log_error(dev, "SSU_COMMON_ERR_INT", 1309 &hclge_ssu_com_err_int[0], status, 1310 &ae_dev->hw_err_reset_req); 1311 1312 /* log IGU(Ingress Unit) errors */ 1313 desc_data = (__le32 *)&desc[3]; 1314 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; 1315 if (status) 1316 hclge_log_error(dev, "IGU_INT_STS", 1317 &hclge_igu_int[0], status, 1318 &ae_dev->hw_err_reset_req); 1319 1320 /* log PPP(Programmable Packet Process) errors */ 1321 desc_data = (__le32 *)&desc[4]; 1322 status = le32_to_cpu(*(desc_data + 1)); 1323 if (status) 1324 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", 1325 &hclge_ppp_mpf_abnormal_int_st1[0], status, 1326 &ae_dev->hw_err_reset_req); 1327 1328 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; 1329 if (status) 1330 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", 1331 &hclge_ppp_mpf_abnormal_int_st3[0], status, 1332 &ae_dev->hw_err_reset_req); 1333 1334 /* log PPU(RCB) errors */ 1335 desc_data = (__le32 *)&desc[5]; 1336 status = le32_to_cpu(*(desc_data + 1)); 1337 if (status) { 1338 dev_err(dev, 1339 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n"); 1340 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1341 } 1342 1343 status = le32_to_cpu(*(desc_data + 2)); 1344 if (status) 1345 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", 1346 &hclge_ppu_mpf_abnormal_int_st2[0], status, 1347 &ae_dev->hw_err_reset_req); 1348 1349 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; 1350 if (status) 1351 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", 1352 &hclge_ppu_mpf_abnormal_int_st3[0], status, 1353 &ae_dev->hw_err_reset_req); 1354 1355 /* log TM(Traffic Manager) errors */ 1356 desc_data = (__le32 *)&desc[6]; 1357 status = le32_to_cpu(*desc_data); 1358 if (status) 1359 hclge_log_error(dev, "TM_SCH_RINT", 1360 &hclge_tm_sch_rint[0], status, 1361 &ae_dev->hw_err_reset_req); 1362 1363 /* log QCN(Quantized Congestion Control) errors */ 1364 desc_data = (__le32 *)&desc[7]; 1365 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; 1366 if (status) 1367 hclge_log_error(dev, "QCN_FIFO_RINT", 1368 &hclge_qcn_fifo_rint[0], status, 1369 &ae_dev->hw_err_reset_req); 1370 1371 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; 1372 if (status) 1373 hclge_log_error(dev, "QCN_ECC_RINT", 1374 &hclge_qcn_ecc_rint[0], status, 1375 &ae_dev->hw_err_reset_req); 1376 1377 /* log NCSI errors */ 1378 desc_data = (__le32 *)&desc[9]; 1379 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; 1380 if (status) 1381 hclge_log_error(dev, "NCSI_ECC_INT_RPT", 1382 &hclge_ncsi_err_int[0], status, 1383 &ae_dev->hw_err_reset_req); 1384 1385 /* clear all main PF RAS errors */ 1386 hclge_cmd_reuse_desc(&desc[0], false); 1387 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1388 if (ret) 1389 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret); 1390 1391 return ret; 1392 } 1393 1394 /* hclge_handle_pf_ras_error: handle all PF RAS errors 1395 * @hdev: pointer to struct hclge_dev 1396 * @desc: descriptor for describing the command 1397 * @num: number of extended command structures 1398 * 1399 * This function handles all the PF RAS errors in the 1400 * hw register/s using command. 1401 */ 1402 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, 1403 struct hclge_desc *desc, 1404 int num) 1405 { 1406 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1407 struct device *dev = &hdev->pdev->dev; 1408 __le32 *desc_data; 1409 u32 status; 1410 int ret; 1411 1412 /* query all PF RAS errors */ 1413 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT, 1414 true); 1415 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1416 if (ret) { 1417 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret); 1418 return ret; 1419 } 1420 1421 /* log SSU(Storage Switch Unit) errors */ 1422 status = le32_to_cpu(desc[0].data[0]); 1423 if (status) 1424 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 1425 &hclge_ssu_port_based_err_int[0], status, 1426 &ae_dev->hw_err_reset_req); 1427 1428 status = le32_to_cpu(desc[0].data[1]); 1429 if (status) 1430 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", 1431 &hclge_ssu_fifo_overflow_int[0], status, 1432 &ae_dev->hw_err_reset_req); 1433 1434 status = le32_to_cpu(desc[0].data[2]); 1435 if (status) 1436 hclge_log_error(dev, "SSU_ETS_TCG_INT", 1437 &hclge_ssu_ets_tcg_int[0], status, 1438 &ae_dev->hw_err_reset_req); 1439 1440 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ 1441 desc_data = (__le32 *)&desc[1]; 1442 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; 1443 if (status) 1444 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", 1445 &hclge_igu_egu_tnl_int[0], status, 1446 &ae_dev->hw_err_reset_req); 1447 1448 /* log PPU(RCB) errors */ 1449 desc_data = (__le32 *)&desc[3]; 1450 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; 1451 if (status) { 1452 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", 1453 &hclge_ppu_pf_abnormal_int[0], status, 1454 &ae_dev->hw_err_reset_req); 1455 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR); 1456 } 1457 1458 /* clear all PF RAS errors */ 1459 hclge_cmd_reuse_desc(&desc[0], false); 1460 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1461 if (ret) 1462 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret); 1463 1464 return ret; 1465 } 1466 1467 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) 1468 { 1469 u32 mpf_bd_num, pf_bd_num, bd_num; 1470 struct hclge_desc *desc; 1471 int ret; 1472 1473 /* query the number of registers in the RAS int status */ 1474 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num); 1475 if (ret) 1476 return ret; 1477 1478 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 1479 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 1480 if (!desc) 1481 return -ENOMEM; 1482 1483 /* handle all main PF RAS errors */ 1484 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num); 1485 if (ret) { 1486 kfree(desc); 1487 return ret; 1488 } 1489 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 1490 1491 /* handle all PF RAS errors */ 1492 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num); 1493 kfree(desc); 1494 1495 return ret; 1496 } 1497 1498 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) 1499 { 1500 struct device *dev = &hdev->pdev->dev; 1501 struct hclge_desc desc[3]; 1502 int ret; 1503 1504 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 1505 true); 1506 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 1507 true); 1508 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 1509 true); 1510 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1511 desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1512 1513 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); 1514 if (ret) { 1515 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); 1516 return ret; 1517 } 1518 1519 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", 1520 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 1521 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 1522 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 1523 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", 1524 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), 1525 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), 1526 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); 1527 dev_err(dev, "AXI3: %08X %08X %08X %08X\n", 1528 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), 1529 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); 1530 1531 return 0; 1532 } 1533 1534 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) 1535 { 1536 struct device *dev = &hdev->pdev->dev; 1537 struct hclge_desc desc[2]; 1538 int ret; 1539 1540 ret = hclge_cmd_query_error(hdev, &desc[0], 1541 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, 1542 HCLGE_CMD_FLAG_NEXT); 1543 if (ret) { 1544 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); 1545 return ret; 1546 } 1547 1548 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", 1549 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 1550 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 1551 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 1552 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), 1553 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); 1554 1555 return 0; 1556 } 1557 1558 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) 1559 { 1560 struct device *dev = &hdev->pdev->dev; 1561 struct hclge_desc desc[2]; 1562 int ret; 1563 1564 /* read overflow error status */ 1565 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD, 1566 0); 1567 if (ret) { 1568 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret); 1569 return ret; 1570 } 1571 1572 /* log overflow error */ 1573 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 1574 const struct hclge_hw_error *err; 1575 u32 err_sts; 1576 1577 err = &hclge_rocee_qmm_ovf_err_int[0]; 1578 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK & 1579 le32_to_cpu(desc[0].data[0]); 1580 while (err->msg) { 1581 if (err->int_msk == err_sts) { 1582 dev_err(dev, "%s [error status=0x%x] found\n", 1583 err->msg, 1584 le32_to_cpu(desc[0].data[0])); 1585 break; 1586 } 1587 err++; 1588 } 1589 } 1590 1591 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 1592 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n", 1593 le32_to_cpu(desc[0].data[1])); 1594 } 1595 1596 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 1597 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n", 1598 le32_to_cpu(desc[0].data[2])); 1599 } 1600 1601 return 0; 1602 } 1603 1604 static enum hnae3_reset_type 1605 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) 1606 { 1607 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET; 1608 struct device *dev = &hdev->pdev->dev; 1609 struct hclge_desc desc[2]; 1610 unsigned int status; 1611 int ret; 1612 1613 /* read RAS error interrupt status */ 1614 ret = hclge_cmd_query_error(hdev, &desc[0], 1615 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0); 1616 if (ret) { 1617 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret); 1618 /* reset everything for now */ 1619 return HNAE3_GLOBAL_RESET; 1620 } 1621 1622 status = le32_to_cpu(desc[0].data[0]); 1623 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { 1624 if (status & HCLGE_ROCEE_RERR_INT_MASK) 1625 dev_err(dev, "ROCEE RAS AXI rresp error\n"); 1626 1627 if (status & HCLGE_ROCEE_BERR_INT_MASK) 1628 dev_err(dev, "ROCEE RAS AXI bresp error\n"); 1629 1630 reset_type = HNAE3_FUNC_RESET; 1631 1632 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR); 1633 1634 ret = hclge_log_rocee_axi_error(hdev); 1635 if (ret) 1636 return HNAE3_GLOBAL_RESET; 1637 } 1638 1639 if (status & HCLGE_ROCEE_ECC_INT_MASK) { 1640 dev_err(dev, "ROCEE RAS 2bit ECC error\n"); 1641 reset_type = HNAE3_GLOBAL_RESET; 1642 1643 ret = hclge_log_rocee_ecc_error(hdev); 1644 if (ret) 1645 return HNAE3_GLOBAL_RESET; 1646 } 1647 1648 if (status & HCLGE_ROCEE_OVF_INT_MASK) { 1649 ret = hclge_log_rocee_ovf_error(hdev); 1650 if (ret) { 1651 dev_err(dev, "failed(%d) to process ovf error\n", ret); 1652 /* reset everything for now */ 1653 return HNAE3_GLOBAL_RESET; 1654 } 1655 } 1656 1657 /* clear error status */ 1658 hclge_cmd_reuse_desc(&desc[0], false); 1659 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1); 1660 if (ret) { 1661 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret); 1662 /* reset everything for now */ 1663 return HNAE3_GLOBAL_RESET; 1664 } 1665 1666 return reset_type; 1667 } 1668 1669 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) 1670 { 1671 struct device *dev = &hdev->pdev->dev; 1672 struct hclge_desc desc; 1673 int ret; 1674 1675 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 || 1676 !hnae3_dev_roce_supported(hdev)) 1677 return 0; 1678 1679 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false); 1680 if (en) { 1681 /* enable ROCEE hw error interrupts */ 1682 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN); 1683 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN); 1684 1685 hclge_log_and_clear_rocee_ras_error(hdev); 1686 } 1687 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK); 1688 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK); 1689 1690 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1691 if (ret) 1692 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret); 1693 1694 return ret; 1695 } 1696 1697 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev) 1698 { 1699 struct hclge_dev *hdev = ae_dev->priv; 1700 enum hnae3_reset_type reset_type; 1701 1702 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) 1703 return; 1704 1705 reset_type = hclge_log_and_clear_rocee_ras_error(hdev); 1706 if (reset_type != HNAE3_NONE_RESET) 1707 set_bit(reset_type, &ae_dev->hw_err_reset_req); 1708 } 1709 1710 static const struct hclge_hw_blk hw_blk[] = { 1711 { 1712 .msk = BIT(0), .name = "IGU_EGU", 1713 .config_err_int = hclge_config_igu_egu_hw_err_int, 1714 }, 1715 { 1716 .msk = BIT(1), .name = "PPP", 1717 .config_err_int = hclge_config_ppp_hw_err_int, 1718 }, 1719 { 1720 .msk = BIT(2), .name = "SSU", 1721 .config_err_int = hclge_config_ssu_hw_err_int, 1722 }, 1723 { 1724 .msk = BIT(3), .name = "PPU", 1725 .config_err_int = hclge_config_ppu_hw_err_int, 1726 }, 1727 { 1728 .msk = BIT(4), .name = "TM", 1729 .config_err_int = hclge_config_tm_hw_err_int, 1730 }, 1731 { 1732 .msk = BIT(5), .name = "COMMON", 1733 .config_err_int = hclge_config_common_hw_err_int, 1734 }, 1735 { 1736 .msk = BIT(8), .name = "MAC", 1737 .config_err_int = hclge_config_mac_err_int, 1738 }, 1739 { /* sentinel */ } 1740 }; 1741 1742 static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable) 1743 { 1744 u32 reg_val; 1745 1746 reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG); 1747 1748 if (enable) 1749 reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 1750 else 1751 reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 1752 1753 hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val); 1754 } 1755 1756 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) 1757 { 1758 const struct hclge_hw_blk *module = hw_blk; 1759 int ret = 0; 1760 1761 hclge_config_all_msix_error(hdev, state); 1762 1763 while (module->name) { 1764 if (module->config_err_int) { 1765 ret = module->config_err_int(hdev, state); 1766 if (ret) 1767 return ret; 1768 } 1769 module++; 1770 } 1771 1772 return ret; 1773 } 1774 1775 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) 1776 { 1777 struct hclge_dev *hdev = ae_dev->priv; 1778 struct device *dev = &hdev->pdev->dev; 1779 u32 status; 1780 1781 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 1782 dev_err(dev, 1783 "Can't recover - RAS error reported during dev init\n"); 1784 return PCI_ERS_RESULT_NONE; 1785 } 1786 1787 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 1788 if (status & HCLGE_RAS_REG_NFE_MASK || 1789 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) 1790 ae_dev->hw_err_reset_req = 0; 1791 else 1792 goto out; 1793 1794 /* Handling Non-fatal HNS RAS errors */ 1795 if (status & HCLGE_RAS_REG_NFE_MASK) { 1796 dev_err(dev, 1797 "HNS Non-Fatal RAS error(status=0x%x) identified\n", 1798 status); 1799 hclge_handle_all_ras_errors(hdev); 1800 } 1801 1802 /* Handling Non-fatal Rocee RAS errors */ 1803 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 && 1804 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { 1805 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n"); 1806 hclge_handle_rocee_ras_error(ae_dev); 1807 } 1808 1809 if (ae_dev->hw_err_reset_req) 1810 return PCI_ERS_RESULT_NEED_RESET; 1811 1812 out: 1813 return PCI_ERS_RESULT_RECOVERED; 1814 } 1815 1816 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev, 1817 struct hclge_desc *desc, bool is_mpf, 1818 u32 bd_num) 1819 { 1820 if (is_mpf) 1821 desc[0].opcode = 1822 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT); 1823 else 1824 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT); 1825 1826 desc[0].flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN); 1827 1828 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num); 1829 } 1830 1831 /* hclge_query_8bd_info: query information about over_8bd_nfe_err 1832 * @hdev: pointer to struct hclge_dev 1833 * @vf_id: Index of the virtual function with error 1834 * @q_id: Physical index of the queue with error 1835 * 1836 * This function get specific index of queue and function which causes 1837 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is 1838 * caused by PF instead of VF. 1839 */ 1840 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id, 1841 u16 *q_id) 1842 { 1843 struct hclge_query_ppu_pf_other_int_dfx_cmd *req; 1844 struct hclge_desc desc; 1845 int ret; 1846 1847 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true); 1848 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1849 if (ret) 1850 return ret; 1851 1852 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data; 1853 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id); 1854 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid); 1855 1856 return 0; 1857 } 1858 1859 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err 1860 * @hdev: pointer to struct hclge_dev 1861 * @reset_requests: reset level that we need to trigger later 1862 * 1863 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in 1864 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed. 1865 */ 1866 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, 1867 unsigned long *reset_requests) 1868 { 1869 struct device *dev = &hdev->pdev->dev; 1870 u16 vf_id; 1871 u16 q_id; 1872 int ret; 1873 1874 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id); 1875 if (ret) { 1876 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n", 1877 ret); 1878 return; 1879 } 1880 1881 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%u), queue_id(%u)\n", 1882 vf_id, q_id); 1883 1884 if (vf_id) { 1885 if (vf_id >= hdev->num_alloc_vport) { 1886 dev_err(dev, "invalid vf id(%u)\n", vf_id); 1887 return; 1888 } 1889 1890 /* If we need to trigger other reset whose level is higher 1891 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset 1892 * here. 1893 */ 1894 if (*reset_requests != 0) 1895 return; 1896 1897 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]); 1898 if (ret) 1899 dev_err(dev, "inform reset to vf(%u) failed %d!\n", 1900 hdev->vport->vport_id, ret); 1901 } else { 1902 set_bit(HNAE3_FUNC_RESET, reset_requests); 1903 } 1904 } 1905 1906 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors 1907 * @hdev: pointer to struct hclge_dev 1908 * @desc: descriptor for describing the command 1909 * @mpf_bd_num: number of extended command structures 1910 * @reset_requests: record of the reset level that we need 1911 * 1912 * This function handles all the main PF MSI-X errors in the hw register/s 1913 * using command. 1914 */ 1915 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev, 1916 struct hclge_desc *desc, 1917 int mpf_bd_num, 1918 unsigned long *reset_requests) 1919 { 1920 struct device *dev = &hdev->pdev->dev; 1921 __le32 *desc_data; 1922 u32 status; 1923 int ret; 1924 /* query all main PF MSIx errors */ 1925 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, 1926 true); 1927 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); 1928 if (ret) { 1929 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret); 1930 return ret; 1931 } 1932 1933 /* log MAC errors */ 1934 desc_data = (__le32 *)&desc[1]; 1935 status = le32_to_cpu(*desc_data); 1936 if (status) 1937 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", 1938 &hclge_mac_afifo_tnl_int[0], status, 1939 reset_requests); 1940 1941 /* log PPU(RCB) MPF errors */ 1942 desc_data = (__le32 *)&desc[5]; 1943 status = le32_to_cpu(*(desc_data + 2)) & 1944 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; 1945 if (status) 1946 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]", 1947 status); 1948 1949 /* clear all main PF MSIx errors */ 1950 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 1951 if (ret) 1952 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret); 1953 1954 return ret; 1955 } 1956 1957 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors 1958 * @hdev: pointer to struct hclge_dev 1959 * @desc: descriptor for describing the command 1960 * @mpf_bd_num: number of extended command structures 1961 * @reset_requests: record of the reset level that we need 1962 * 1963 * This function handles all the PF MSI-X errors in the hw register/s using 1964 * command. 1965 */ 1966 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev, 1967 struct hclge_desc *desc, 1968 int pf_bd_num, 1969 unsigned long *reset_requests) 1970 { 1971 struct device *dev = &hdev->pdev->dev; 1972 __le32 *desc_data; 1973 u32 status; 1974 int ret; 1975 1976 /* query all PF MSIx errors */ 1977 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, 1978 true); 1979 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); 1980 if (ret) { 1981 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret); 1982 return ret; 1983 } 1984 1985 /* log SSU PF errors */ 1986 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; 1987 if (status) 1988 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 1989 &hclge_ssu_port_based_pf_int[0], 1990 status, reset_requests); 1991 1992 /* read and log PPP PF errors */ 1993 desc_data = (__le32 *)&desc[2]; 1994 status = le32_to_cpu(*desc_data); 1995 if (status) 1996 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", 1997 &hclge_ppp_pf_abnormal_int[0], 1998 status, reset_requests); 1999 2000 /* log PPU(RCB) PF errors */ 2001 desc_data = (__le32 *)&desc[3]; 2002 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; 2003 if (status) 2004 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", 2005 &hclge_ppu_pf_abnormal_int[0], 2006 status, reset_requests); 2007 2008 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK; 2009 if (status) 2010 hclge_handle_over_8bd_err(hdev, reset_requests); 2011 2012 /* clear all PF MSIx errors */ 2013 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 2014 if (ret) 2015 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret); 2016 2017 return ret; 2018 } 2019 2020 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, 2021 unsigned long *reset_requests) 2022 { 2023 u32 mpf_bd_num, pf_bd_num, bd_num; 2024 struct hclge_desc *desc; 2025 int ret; 2026 2027 /* query the number of bds for the MSIx int status */ 2028 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 2029 if (ret) 2030 goto out; 2031 2032 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2033 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2034 if (!desc) 2035 return -ENOMEM; 2036 2037 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num, 2038 reset_requests); 2039 if (ret) 2040 goto msi_error; 2041 2042 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 2043 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests); 2044 if (ret) 2045 goto msi_error; 2046 2047 ret = hclge_handle_mac_tnl(hdev); 2048 2049 msi_error: 2050 kfree(desc); 2051 out: 2052 return ret; 2053 } 2054 2055 int hclge_handle_hw_msix_error(struct hclge_dev *hdev, 2056 unsigned long *reset_requests) 2057 { 2058 struct device *dev = &hdev->pdev->dev; 2059 2060 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 2061 dev_err(dev, 2062 "failed to handle msix error during dev init\n"); 2063 return -EAGAIN; 2064 } 2065 2066 return hclge_handle_all_hw_msix_error(hdev, reset_requests); 2067 } 2068 2069 int hclge_handle_mac_tnl(struct hclge_dev *hdev) 2070 { 2071 struct hclge_mac_tnl_stats mac_tnl_stats; 2072 struct device *dev = &hdev->pdev->dev; 2073 struct hclge_desc desc; 2074 u32 status; 2075 int ret; 2076 2077 /* query and clear mac tnl interruptions */ 2078 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true); 2079 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2080 if (ret) { 2081 dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret); 2082 return ret; 2083 } 2084 2085 status = le32_to_cpu(desc.data[0]); 2086 if (status) { 2087 /* When mac tnl interrupt occurs, we record current time and 2088 * register status here in a fifo, then clear the status. So 2089 * that if link status changes suddenly at some time, we can 2090 * query them by debugfs. 2091 */ 2092 mac_tnl_stats.time = local_clock(); 2093 mac_tnl_stats.status = status; 2094 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats); 2095 ret = hclge_clear_mac_tnl_int(hdev); 2096 if (ret) 2097 dev_err(dev, "failed to clear mac tnl int, ret = %d.\n", 2098 ret); 2099 } 2100 2101 return ret; 2102 } 2103 2104 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) 2105 { 2106 struct hclge_dev *hdev = ae_dev->priv; 2107 struct device *dev = &hdev->pdev->dev; 2108 u32 mpf_bd_num, pf_bd_num, bd_num; 2109 struct hclge_desc *desc; 2110 u32 status; 2111 int ret; 2112 2113 ae_dev->hw_err_reset_req = 0; 2114 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 2115 2116 /* query the number of bds for the MSIx int status */ 2117 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 2118 if (ret) 2119 return; 2120 2121 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2122 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2123 if (!desc) 2124 return; 2125 2126 /* Clear HNS hw errors reported through msix */ 2127 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) - 2128 HCLGE_DESC_NO_DATA_LEN); 2129 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 2130 if (ret) { 2131 dev_err(dev, "fail(%d) to clear mpf msix int during init\n", 2132 ret); 2133 goto msi_error; 2134 } 2135 2136 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) - 2137 HCLGE_DESC_NO_DATA_LEN); 2138 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 2139 if (ret) { 2140 dev_err(dev, "fail(%d) to clear pf msix int during init\n", 2141 ret); 2142 goto msi_error; 2143 } 2144 2145 /* Handle Non-fatal HNS RAS errors */ 2146 if (status & HCLGE_RAS_REG_NFE_MASK) { 2147 dev_err(dev, "HNS hw error(RAS) identified during init\n"); 2148 hclge_handle_all_ras_errors(hdev); 2149 } 2150 2151 msi_error: 2152 kfree(desc); 2153 } 2154 2155 bool hclge_find_error_source(struct hclge_dev *hdev) 2156 { 2157 u32 msix_src_flag, hw_err_src_flag; 2158 2159 msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) & 2160 HCLGE_VECTOR0_REG_MSIX_MASK; 2161 2162 hw_err_src_flag = hclge_read_dev(&hdev->hw, 2163 HCLGE_RAS_PF_OTHER_INT_STS_REG) & 2164 HCLGE_RAS_REG_ERR_MASK; 2165 2166 return msix_src_flag || hw_err_src_flag; 2167 } 2168 2169 void hclge_handle_occurred_error(struct hclge_dev *hdev) 2170 { 2171 struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); 2172 2173 if (hclge_find_error_source(hdev)) 2174 hclge_handle_error_info_log(ae_dev); 2175 } 2176 2177 static void 2178 hclge_handle_error_type_reg_log(struct device *dev, 2179 struct hclge_mod_err_info *mod_info, 2180 struct hclge_type_reg_err_info *type_reg_info) 2181 { 2182 #define HCLGE_ERR_TYPE_MASK 0x7F 2183 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7 2184 2185 u8 mod_id, total_module, type_id, total_type, i, is_ras; 2186 u8 index_module = MODULE_NONE; 2187 u8 index_type = NONE_ERROR; 2188 2189 mod_id = mod_info->mod_id; 2190 type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK; 2191 is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET; 2192 2193 total_module = ARRAY_SIZE(hclge_hw_module_id_st); 2194 total_type = ARRAY_SIZE(hclge_hw_type_id_st); 2195 2196 for (i = 0; i < total_module; i++) { 2197 if (mod_id == hclge_hw_module_id_st[i].module_id) { 2198 index_module = i; 2199 break; 2200 } 2201 } 2202 2203 for (i = 0; i < total_type; i++) { 2204 if (type_id == hclge_hw_type_id_st[i].type_id) { 2205 index_type = i; 2206 break; 2207 } 2208 } 2209 2210 if (index_module != MODULE_NONE && index_type != NONE_ERROR) 2211 dev_err(dev, 2212 "found %s %s, is %s error.\n", 2213 hclge_hw_module_id_st[index_module].msg, 2214 hclge_hw_type_id_st[index_type].msg, 2215 is_ras ? "ras" : "msix"); 2216 else 2217 dev_err(dev, 2218 "unknown module[%u] or type[%u].\n", mod_id, type_id); 2219 2220 dev_err(dev, "reg_value:\n"); 2221 for (i = 0; i < type_reg_info->reg_num; i++) 2222 dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]); 2223 } 2224 2225 static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, 2226 const u32 *buf, u32 buf_size) 2227 { 2228 struct hclge_type_reg_err_info *type_reg_info; 2229 struct hclge_dev *hdev = ae_dev->priv; 2230 struct device *dev = &hdev->pdev->dev; 2231 struct hclge_mod_err_info *mod_info; 2232 struct hclge_sum_err_info *sum_info; 2233 u8 mod_num, err_num, i; 2234 u32 offset = 0; 2235 2236 sum_info = (struct hclge_sum_err_info *)&buf[offset++]; 2237 if (sum_info->reset_type && 2238 sum_info->reset_type != HNAE3_NONE_RESET) 2239 set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req); 2240 mod_num = sum_info->mod_num; 2241 2242 while (mod_num--) { 2243 if (offset >= buf_size) { 2244 dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n", 2245 offset, buf_size); 2246 return; 2247 } 2248 mod_info = (struct hclge_mod_err_info *)&buf[offset++]; 2249 err_num = mod_info->err_num; 2250 2251 for (i = 0; i < err_num; i++) { 2252 if (offset >= buf_size) { 2253 dev_err(dev, 2254 "The offset(%u) exceeds buf size(%u).\n", 2255 offset, buf_size); 2256 return; 2257 } 2258 2259 type_reg_info = (struct hclge_type_reg_err_info *) 2260 &buf[offset++]; 2261 hclge_handle_error_type_reg_log(dev, mod_info, 2262 type_reg_info); 2263 2264 offset += type_reg_info->reg_num; 2265 } 2266 } 2267 } 2268 2269 static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num) 2270 { 2271 struct device *dev = &hdev->pdev->dev; 2272 struct hclge_desc desc_bd; 2273 int ret; 2274 2275 hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true); 2276 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 2277 if (ret) { 2278 dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret); 2279 return ret; 2280 } 2281 2282 *bd_num = le32_to_cpu(desc_bd.data[0]); 2283 if (!(*bd_num)) { 2284 dev_err(dev, "The value of bd_num is 0!\n"); 2285 return -EINVAL; 2286 } 2287 2288 return 0; 2289 } 2290 2291 static int hclge_query_all_err_info(struct hclge_dev *hdev, 2292 struct hclge_desc *desc, u32 bd_num) 2293 { 2294 struct device *dev = &hdev->pdev->dev; 2295 int ret; 2296 2297 hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true); 2298 ret = hclge_cmd_send(&hdev->hw, desc, bd_num); 2299 if (ret) 2300 dev_err(dev, "failed to query error info, ret = %d.\n", ret); 2301 2302 return ret; 2303 } 2304 2305 int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev) 2306 { 2307 u32 bd_num, desc_len, buf_len, buf_size, i; 2308 struct hclge_dev *hdev = ae_dev->priv; 2309 struct hclge_desc *desc; 2310 __le32 *desc_data; 2311 u32 *buf; 2312 int ret; 2313 2314 ret = hclge_query_all_err_bd_num(hdev, &bd_num); 2315 if (ret) 2316 goto out; 2317 2318 desc_len = bd_num * sizeof(struct hclge_desc); 2319 desc = kzalloc(desc_len, GFP_KERNEL); 2320 if (!desc) { 2321 ret = -ENOMEM; 2322 goto out; 2323 } 2324 2325 ret = hclge_query_all_err_info(hdev, desc, bd_num); 2326 if (ret) 2327 goto err_desc; 2328 2329 buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN; 2330 buf_size = buf_len / sizeof(u32); 2331 2332 desc_data = kzalloc(buf_len, GFP_KERNEL); 2333 if (!desc_data) { 2334 ret = -ENOMEM; 2335 goto err_desc; 2336 } 2337 2338 buf = kzalloc(buf_len, GFP_KERNEL); 2339 if (!buf) { 2340 ret = -ENOMEM; 2341 goto err_buf_alloc; 2342 } 2343 2344 memcpy(desc_data, &desc[0].data[0], buf_len); 2345 for (i = 0; i < buf_size; i++) 2346 buf[i] = le32_to_cpu(desc_data[i]); 2347 2348 hclge_handle_error_module_log(ae_dev, buf, buf_size); 2349 kfree(buf); 2350 2351 err_buf_alloc: 2352 kfree(desc_data); 2353 err_desc: 2354 kfree(desc); 2355 out: 2356 return ret; 2357 } 2358