1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include "umc_v6_7.h" 24 #include "amdgpu_ras.h" 25 #include "amdgpu_umc.h" 26 #include "amdgpu.h" 27 28 #include "umc/umc_6_7_0_offset.h" 29 #include "umc/umc_6_7_0_sh_mask.h" 30 31 const uint32_t 32 umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = { 33 {28, 20, 24, 16, 12, 4, 8, 0}, 34 {6, 30, 2, 26, 22, 14, 18, 10}, 35 {19, 11, 15, 7, 3, 27, 31, 23}, 36 {9, 1, 5, 29, 25, 17, 21, 13} 37 }; 38 const uint32_t 39 umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = { 40 {19, 11, 15, 7, 3, 27, 31, 23}, 41 {9, 1, 5, 29, 25, 17, 21, 13}, 42 {28, 20, 24, 16, 12, 4, 8, 0}, 43 {6, 30, 2, 26, 22, 14, 18, 10}, 44 }; 45 46 static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev, 47 uint32_t umc_inst, 48 uint32_t ch_inst) 49 { 50 uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst; 51 52 /* adjust umc and channel index offset, 53 * the register address is not linear on each umc instace */ 54 umc_inst = index / 4; 55 ch_inst = index % 4; 56 57 return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst; 58 } 59 60 static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, 61 uint64_t mc_umc_status, uint32_t umc_reg_offset) 62 { 63 uint32_t mc_umc_addr; 64 uint64_t reg_value; 65 66 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) 67 dev_info(adev->dev, "Deferred error, no user action is needed.\n"); 68 69 if (mc_umc_status) 70 dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); 71 72 /* print IPID registers value */ 73 mc_umc_addr = 74 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); 75 reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); 76 if (reg_value) 77 dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); 78 79 /* print SYND registers value */ 80 mc_umc_addr = 81 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); 82 reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); 83 if (reg_value) 84 dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); 85 86 /* print MISC0 registers value */ 87 mc_umc_addr = 88 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); 89 reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); 90 if (reg_value) 91 dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); 92 } 93 94 static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, 95 uint32_t umc_inst, uint32_t ch_inst, 96 unsigned long *error_count) 97 { 98 uint64_t mc_umc_status; 99 uint32_t eccinfo_table_idx; 100 uint32_t umc_reg_offset; 101 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 102 103 umc_reg_offset = get_umc_v6_7_reg_offset(adev, 104 umc_inst, ch_inst); 105 106 eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; 107 /* check for SRAM correctable error 108 MCUMC_STATUS is a 64 bit register */ 109 mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; 110 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 111 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { 112 *error_count += 1; 113 114 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); 115 116 if (ras->umc_ecc.record_ce_addr_supported) { 117 uint64_t err_addr, soc_pa; 118 uint32_t channel_index = 119 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; 120 121 err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr; 122 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); 123 /* translate umc channel address to soc pa, 3 parts are included */ 124 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | 125 ADDR_OF_256B_BLOCK(channel_index) | 126 OFFSET_IN_256B_BLOCK(err_addr); 127 128 /* The umc channel bits are not original values, they are hashed */ 129 SET_CHANNEL_HASH(channel_index, soc_pa); 130 131 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa); 132 } 133 } 134 } 135 136 static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, 137 uint32_t umc_inst, uint32_t ch_inst, 138 unsigned long *error_count) 139 { 140 uint64_t mc_umc_status; 141 uint32_t eccinfo_table_idx; 142 uint32_t umc_reg_offset; 143 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 144 145 umc_reg_offset = get_umc_v6_7_reg_offset(adev, 146 umc_inst, ch_inst); 147 148 eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; 149 /* check the MCUMC_STATUS */ 150 mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; 151 if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && 152 (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || 153 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 154 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || 155 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || 156 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { 157 *error_count += 1; 158 159 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); 160 } 161 } 162 163 static int umc_v6_7_ecc_info_querry_ecc_error_count(struct amdgpu_device *adev, 164 uint32_t node_inst, uint32_t umc_inst, 165 uint32_t ch_inst, void *data) 166 { 167 struct ras_err_data *err_data = (struct ras_err_data *)data; 168 169 umc_v6_7_ecc_info_query_correctable_error_count(adev, 170 umc_inst, ch_inst, 171 &(err_data->ce_count)); 172 173 umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, 174 umc_inst, ch_inst, 175 &(err_data->ue_count)); 176 177 return 0; 178 } 179 180 static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, 181 void *ras_error_status) 182 { 183 amdgpu_umc_loop_channels(adev, 184 umc_v6_7_ecc_info_querry_ecc_error_count, ras_error_status); 185 } 186 187 void umc_v6_7_convert_error_address(struct amdgpu_device *adev, 188 struct ras_err_data *err_data, uint64_t err_addr, 189 uint32_t ch_inst, uint32_t umc_inst) 190 { 191 uint32_t channel_index; 192 uint64_t soc_pa, retired_page, column; 193 194 channel_index = 195 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; 196 /* translate umc channel address to soc pa, 3 parts are included */ 197 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | 198 ADDR_OF_256B_BLOCK(channel_index) | 199 OFFSET_IN_256B_BLOCK(err_addr); 200 201 /* The umc channel bits are not original values, they are hashed */ 202 SET_CHANNEL_HASH(channel_index, soc_pa); 203 204 /* clear [C4 C3 C2] in soc physical address */ 205 soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); 206 207 /* loop for all possibilities of [C4 C3 C2] */ 208 for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { 209 retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); 210 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 211 amdgpu_umc_fill_error_record(err_data, err_addr, 212 retired_page, channel_index, umc_inst); 213 214 /* shift R14 bit */ 215 retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); 216 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 217 amdgpu_umc_fill_error_record(err_data, err_addr, 218 retired_page, channel_index, umc_inst); 219 } 220 } 221 222 static int umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, 223 uint32_t node_inst, uint32_t umc_inst, 224 uint32_t ch_inst, void *data) 225 { 226 uint64_t mc_umc_status, err_addr; 227 uint32_t eccinfo_table_idx; 228 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 229 struct ras_err_data *err_data = (struct ras_err_data *)data; 230 231 eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; 232 mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; 233 234 if (mc_umc_status == 0) 235 return 0; 236 237 if (!err_data->err_addr) 238 return 0; 239 240 /* calculate error address if ue error is detected */ 241 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 242 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { 243 244 err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; 245 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); 246 247 umc_v6_7_convert_error_address(adev, err_data, err_addr, 248 ch_inst, umc_inst); 249 } 250 251 return 0; 252 } 253 254 static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev, 255 void *ras_error_status) 256 { 257 amdgpu_umc_loop_channels(adev, 258 umc_v6_7_ecc_info_query_error_address, ras_error_status); 259 } 260 261 static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, 262 uint32_t umc_reg_offset, 263 unsigned long *error_count, 264 uint32_t ch_inst, 265 uint32_t umc_inst) 266 { 267 uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; 268 uint32_t ecc_err_cnt, ecc_err_cnt_addr; 269 uint64_t mc_umc_status; 270 uint32_t mc_umc_status_addr; 271 272 /* UMC 6_1_1 registers */ 273 ecc_err_cnt_sel_addr = 274 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCntSel); 275 ecc_err_cnt_addr = 276 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCnt); 277 mc_umc_status_addr = 278 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); 279 280 /* select the lower chip and check the error count */ 281 ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); 282 ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 283 EccErrCntCsSel, 0); 284 WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); 285 286 ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); 287 *error_count += 288 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - 289 UMC_V6_7_CE_CNT_INIT); 290 291 /* select the higher chip and check the err counter */ 292 ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 293 EccErrCntCsSel, 1); 294 WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); 295 296 ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); 297 *error_count += 298 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - 299 UMC_V6_7_CE_CNT_INIT); 300 301 /* check for SRAM correctable error 302 MCUMC_STATUS is a 64 bit register */ 303 mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); 304 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 305 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { 306 *error_count += 1; 307 308 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); 309 310 { 311 uint64_t err_addr, soc_pa; 312 uint32_t mc_umc_addrt0; 313 uint32_t channel_index; 314 315 mc_umc_addrt0 = 316 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); 317 318 channel_index = 319 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; 320 321 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); 322 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); 323 324 /* translate umc channel address to soc pa, 3 parts are included */ 325 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | 326 ADDR_OF_256B_BLOCK(channel_index) | 327 OFFSET_IN_256B_BLOCK(err_addr); 328 329 /* The umc channel bits are not original values, they are hashed */ 330 SET_CHANNEL_HASH(channel_index, soc_pa); 331 332 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa); 333 } 334 } 335 } 336 337 static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev, 338 uint32_t umc_reg_offset, 339 unsigned long *error_count) 340 { 341 uint64_t mc_umc_status; 342 uint32_t mc_umc_status_addr; 343 344 mc_umc_status_addr = 345 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); 346 347 /* check the MCUMC_STATUS */ 348 mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); 349 if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && 350 (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || 351 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 352 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || 353 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || 354 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { 355 *error_count += 1; 356 357 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); 358 } 359 } 360 361 static int umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, 362 uint32_t node_inst, uint32_t umc_inst, 363 uint32_t ch_inst, void *data) 364 { 365 uint32_t ecc_err_cnt_addr; 366 uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; 367 uint32_t umc_reg_offset = 368 get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); 369 370 ecc_err_cnt_sel_addr = 371 SOC15_REG_OFFSET(UMC, 0, 372 regUMCCH0_0_EccErrCntSel); 373 ecc_err_cnt_addr = 374 SOC15_REG_OFFSET(UMC, 0, 375 regUMCCH0_0_EccErrCnt); 376 377 /* select the lower chip */ 378 ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + 379 umc_reg_offset) * 4); 380 ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, 381 UMCCH0_0_EccErrCntSel, 382 EccErrCntCsSel, 0); 383 WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, 384 ecc_err_cnt_sel); 385 386 /* clear lower chip error count */ 387 WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, 388 UMC_V6_7_CE_CNT_INIT); 389 390 /* select the higher chip */ 391 ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + 392 umc_reg_offset) * 4); 393 ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, 394 UMCCH0_0_EccErrCntSel, 395 EccErrCntCsSel, 1); 396 WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, 397 ecc_err_cnt_sel); 398 399 /* clear higher chip error count */ 400 WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, 401 UMC_V6_7_CE_CNT_INIT); 402 403 return 0; 404 } 405 406 static void umc_v6_7_reset_error_count(struct amdgpu_device *adev) 407 { 408 amdgpu_umc_loop_channels(adev, 409 umc_v6_7_reset_error_count_per_channel, NULL); 410 } 411 412 static int umc_v6_7_query_ecc_error_count(struct amdgpu_device *adev, 413 uint32_t node_inst, uint32_t umc_inst, 414 uint32_t ch_inst, void *data) 415 { 416 struct ras_err_data *err_data = (struct ras_err_data *)data; 417 uint32_t umc_reg_offset = 418 get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); 419 420 umc_v6_7_query_correctable_error_count(adev, 421 umc_reg_offset, 422 &(err_data->ce_count), 423 ch_inst, umc_inst); 424 425 umc_v6_7_querry_uncorrectable_error_count(adev, 426 umc_reg_offset, 427 &(err_data->ue_count)); 428 429 return 0; 430 } 431 432 static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, 433 void *ras_error_status) 434 { 435 amdgpu_umc_loop_channels(adev, 436 umc_v6_7_query_ecc_error_count, ras_error_status); 437 438 umc_v6_7_reset_error_count(adev); 439 } 440 441 static int umc_v6_7_query_error_address(struct amdgpu_device *adev, 442 uint32_t node_inst, uint32_t umc_inst, 443 uint32_t ch_inst, void *data) 444 { 445 uint32_t mc_umc_status_addr; 446 uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr; 447 struct ras_err_data *err_data = (struct ras_err_data *)data; 448 uint32_t umc_reg_offset = 449 get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst); 450 451 mc_umc_status_addr = 452 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); 453 mc_umc_addrt0 = 454 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); 455 456 mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); 457 458 if (mc_umc_status == 0) 459 return 0; 460 461 if (!err_data->err_addr) { 462 /* clear umc status */ 463 WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); 464 return 0; 465 } 466 467 /* calculate error address if ue error is detected */ 468 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 469 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { 470 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); 471 err_addr = 472 REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); 473 474 umc_v6_7_convert_error_address(adev, err_data, err_addr, 475 ch_inst, umc_inst); 476 } 477 478 /* clear umc status */ 479 WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); 480 481 return 0; 482 } 483 484 static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, 485 void *ras_error_status) 486 { 487 amdgpu_umc_loop_channels(adev, 488 umc_v6_7_query_error_address, ras_error_status); 489 } 490 491 static uint32_t umc_v6_7_query_ras_poison_mode_per_channel( 492 struct amdgpu_device *adev, 493 uint32_t umc_reg_offset) 494 { 495 uint32_t ecc_ctrl_addr, ecc_ctrl; 496 497 ecc_ctrl_addr = 498 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccCtrl); 499 ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr + 500 umc_reg_offset) * 4); 501 502 return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_EccCtrl, UCFatalEn); 503 } 504 505 static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev) 506 { 507 uint32_t umc_reg_offset = 0; 508 509 /* Enabling fatal error in umc instance0 channel0 will be 510 * considered as fatal error mode 511 */ 512 umc_reg_offset = get_umc_v6_7_reg_offset(adev, 0, 0); 513 return !umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset); 514 } 515 516 const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = { 517 .query_ras_error_count = umc_v6_7_query_ras_error_count, 518 .query_ras_error_address = umc_v6_7_query_ras_error_address, 519 }; 520 521 struct amdgpu_umc_ras umc_v6_7_ras = { 522 .ras_block = { 523 .hw_ops = &umc_v6_7_ras_hw_ops, 524 }, 525 .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, 526 .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, 527 .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, 528 }; 529