19884c2b1SHawking Zhang /* 29884c2b1SHawking Zhang * Copyright 2019 Advanced Micro Devices, Inc. 39884c2b1SHawking Zhang * 49884c2b1SHawking Zhang * Permission is hereby granted, free of charge, to any person obtaining a 59884c2b1SHawking Zhang * copy of this software and associated documentation files (the "Software"), 69884c2b1SHawking Zhang * to deal in the Software without restriction, including without limitation 79884c2b1SHawking Zhang * the rights to use, copy, modify, merge, publish, distribute, sublicense, 89884c2b1SHawking Zhang * and/or sell copies of the Software, and to permit persons to whom the 99884c2b1SHawking Zhang * Software is furnished to do so, subject to the following conditions: 109884c2b1SHawking Zhang * 119884c2b1SHawking Zhang * The above copyright notice and this permission notice shall be included in 129884c2b1SHawking Zhang * all copies or substantial portions of the Software. 139884c2b1SHawking Zhang * 149884c2b1SHawking Zhang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 159884c2b1SHawking Zhang * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 169884c2b1SHawking Zhang * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 179884c2b1SHawking Zhang * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 189884c2b1SHawking Zhang * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 199884c2b1SHawking Zhang * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 209884c2b1SHawking Zhang * OTHER DEALINGS IN THE SOFTWARE. 219884c2b1SHawking Zhang * 229884c2b1SHawking Zhang */ 239884c2b1SHawking Zhang #include "umc_v6_1.h" 249884c2b1SHawking Zhang #include "amdgpu_ras.h" 259884c2b1SHawking Zhang #include "amdgpu.h" 269884c2b1SHawking Zhang 279884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_offset.h" 289884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_sh_mask.h" 299884c2b1SHawking Zhang #include "umc/umc_6_1_1_offset.h" 309884c2b1SHawking Zhang #include "umc/umc_6_1_1_sh_mask.h" 31fb71a336SGuchun Chen #include "umc/umc_6_1_2_offset.h" 329884c2b1SHawking Zhang 33bd68fb94SJohn Clements #define UMC_6_INST_DIST 0x40000 34bd68fb94SJohn Clements 358c948103STao Zhou /* 368c948103STao Zhou * (addr / 256) * 8192, the higher 26 bits in ErrorAddr 378c948103STao Zhou * is the index of 8KB block 388c948103STao Zhou */ 398c948103STao Zhou #define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) 408c948103STao Zhou /* channel index is the index of 256B block */ 418c948103STao Zhou #define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) 428c948103STao Zhou /* offset in 256B block */ 438c948103STao Zhou #define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) 448c948103STao Zhou 45c8aa6ae3SJohn Clements #define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++) 46c8aa6ae3SJohn Clements #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) 47c8aa6ae3SJohn Clements #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) 48c8aa6ae3SJohn Clements 493aacf4eaSTao Zhou const uint32_t 50c2742aefSTao Zhou umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { 51c2742aefSTao Zhou {2, 18, 11, 27}, {4, 20, 13, 29}, 52c2742aefSTao Zhou {1, 17, 8, 24}, {7, 23, 14, 30}, 53c2742aefSTao Zhou {10, 26, 3, 19}, {12, 28, 5, 21}, 54c2742aefSTao Zhou {9, 25, 0, 16}, {15, 31, 6, 22} 55c2742aefSTao Zhou }; 56c2742aefSTao Zhou 57eee2eabaSJohn Clements static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) 58eee2eabaSJohn Clements { 59eee2eabaSJohn Clements WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, 60eee2eabaSJohn Clements RSMU_UMC_INDEX_MODE_EN, 1); 61eee2eabaSJohn Clements } 62eee2eabaSJohn Clements 630ee51f1dSJohn Clements static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) 640ee51f1dSJohn Clements { 650ee51f1dSJohn Clements WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, 660ee51f1dSJohn Clements RSMU_UMC_INDEX_MODE_EN, 0); 670ee51f1dSJohn Clements } 680ee51f1dSJohn Clements 69eee2eabaSJohn Clements static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) 70eee2eabaSJohn Clements { 71eee2eabaSJohn Clements uint32_t rsmu_umc_index; 72eee2eabaSJohn Clements 73eee2eabaSJohn Clements rsmu_umc_index = RREG32_SOC15(RSMU, 0, 74eee2eabaSJohn Clements mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); 75eee2eabaSJohn Clements 76eee2eabaSJohn Clements return REG_GET_FIELD(rsmu_umc_index, 77eee2eabaSJohn Clements RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, 78eee2eabaSJohn Clements RSMU_UMC_INDEX_MODE_EN); 79eee2eabaSJohn Clements } 80eee2eabaSJohn Clements 81bd68fb94SJohn Clements static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, 82bd68fb94SJohn Clements uint32_t umc_inst, 83bd68fb94SJohn Clements uint32_t ch_inst) 849884c2b1SHawking Zhang { 85bd68fb94SJohn Clements return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst; 8687d2b92fSTao Zhou } 8787d2b92fSTao Zhou 889884c2b1SHawking Zhang static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, 899884c2b1SHawking Zhang uint32_t umc_reg_offset, 909884c2b1SHawking Zhang unsigned long *error_count) 919884c2b1SHawking Zhang { 929884c2b1SHawking Zhang uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; 939884c2b1SHawking Zhang uint32_t ecc_err_cnt, ecc_err_cnt_addr; 949884c2b1SHawking Zhang uint64_t mc_umc_status; 959884c2b1SHawking Zhang uint32_t mc_umc_status_addr; 969884c2b1SHawking Zhang 974cf781c2SJohn Clements if (adev->asic_type == CHIP_ARCTURUS) { 984cf781c2SJohn Clements /* UMC 6_1_2 registers */ 994cf781c2SJohn Clements ecc_err_cnt_sel_addr = 1004cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); 1014cf781c2SJohn Clements ecc_err_cnt_addr = 1024cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); 1034cf781c2SJohn Clements mc_umc_status_addr = 1044cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); 1054cf781c2SJohn Clements } else { 1064cf781c2SJohn Clements /* UMC 6_1_1 registers */ 1079884c2b1SHawking Zhang ecc_err_cnt_sel_addr = 1089884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); 1099884c2b1SHawking Zhang ecc_err_cnt_addr = 1109884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); 1119884c2b1SHawking Zhang mc_umc_status_addr = 1129884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); 1134cf781c2SJohn Clements } 1149884c2b1SHawking Zhang 1159884c2b1SHawking Zhang /* select the lower chip and check the error count */ 1160ee51f1dSJohn Clements ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); 1179884c2b1SHawking Zhang ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 1189884c2b1SHawking Zhang EccErrCntCsSel, 0); 1190ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); 1200ee51f1dSJohn Clements ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); 1219884c2b1SHawking Zhang *error_count += 122b1a58953STao Zhou (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - 123b1a58953STao Zhou UMC_V6_1_CE_CNT_INIT); 1249884c2b1SHawking Zhang /* clear the lower chip err count */ 1250ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); 1269884c2b1SHawking Zhang 1279884c2b1SHawking Zhang /* select the higher chip and check the err counter */ 1289884c2b1SHawking Zhang ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 1299884c2b1SHawking Zhang EccErrCntCsSel, 1); 1300ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); 1310ee51f1dSJohn Clements ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); 1329884c2b1SHawking Zhang *error_count += 133b1a58953STao Zhou (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - 134b1a58953STao Zhou UMC_V6_1_CE_CNT_INIT); 1359884c2b1SHawking Zhang /* clear the higher chip err count */ 1360ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); 1379884c2b1SHawking Zhang 1389884c2b1SHawking Zhang /* check for SRAM correctable error 1399884c2b1SHawking Zhang MCUMC_STATUS is a 64 bit register */ 140955c7120SJohn Clements mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); 1419884c2b1SHawking Zhang if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && 1429884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 1439884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) 1449884c2b1SHawking Zhang *error_count += 1; 1459884c2b1SHawking Zhang } 1469884c2b1SHawking Zhang 1479884c2b1SHawking Zhang static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, 1489884c2b1SHawking Zhang uint32_t umc_reg_offset, 1499884c2b1SHawking Zhang unsigned long *error_count) 1509884c2b1SHawking Zhang { 1519884c2b1SHawking Zhang uint64_t mc_umc_status; 1529884c2b1SHawking Zhang uint32_t mc_umc_status_addr; 1539884c2b1SHawking Zhang 1544cf781c2SJohn Clements if (adev->asic_type == CHIP_ARCTURUS) { 1554cf781c2SJohn Clements /* UMC 6_1_2 registers */ 1564cf781c2SJohn Clements mc_umc_status_addr = 1574cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); 1584cf781c2SJohn Clements } else { 1594cf781c2SJohn Clements /* UMC 6_1_1 registers */ 1609884c2b1SHawking Zhang mc_umc_status_addr = 1619884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); 1624cf781c2SJohn Clements } 1639884c2b1SHawking Zhang 1649884c2b1SHawking Zhang /* check the MCUMC_STATUS */ 165955c7120SJohn Clements mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); 166f1ed4afaSTao Zhou if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && 167f1ed4afaSTao Zhou (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || 168f1ed4afaSTao Zhou REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 1699884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || 1709884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || 1719884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) 1729884c2b1SHawking Zhang *error_count += 1; 1739884c2b1SHawking Zhang } 1749884c2b1SHawking Zhang 1752b671b60STao Zhou static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, 1762b671b60STao Zhou void *ras_error_status) 1772b671b60STao Zhou { 178bd68fb94SJohn Clements struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; 179bd68fb94SJohn Clements 180bd68fb94SJohn Clements uint32_t umc_inst = 0; 181bd68fb94SJohn Clements uint32_t ch_inst = 0; 182bd68fb94SJohn Clements uint32_t umc_reg_offset = 0; 183bd68fb94SJohn Clements 184eee2eabaSJohn Clements uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); 185eee2eabaSJohn Clements 186eee2eabaSJohn Clements if (rsmu_umc_index_state) 187eee2eabaSJohn Clements umc_v6_1_disable_umc_index_mode(adev); 188eee2eabaSJohn Clements 189d38c3ac7SGuchun Chen if ((adev->asic_type == CHIP_ARCTURUS) && 190d38c3ac7SGuchun Chen amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) 191d38c3ac7SGuchun Chen DRM_WARN("Fail to disable DF-Cstate.\n"); 192d38c3ac7SGuchun Chen 193c8aa6ae3SJohn Clements LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { 194bd68fb94SJohn Clements umc_reg_offset = get_umc_6_reg_offset(adev, 195bd68fb94SJohn Clements umc_inst, 196bd68fb94SJohn Clements ch_inst); 197bd68fb94SJohn Clements 198bd68fb94SJohn Clements umc_v6_1_query_correctable_error_count(adev, 199bd68fb94SJohn Clements umc_reg_offset, 200bd68fb94SJohn Clements &(err_data->ce_count)); 201bd68fb94SJohn Clements umc_v6_1_querry_uncorrectable_error_count(adev, 202bd68fb94SJohn Clements umc_reg_offset, 203bd68fb94SJohn Clements &(err_data->ue_count)); 204bd68fb94SJohn Clements } 205eee2eabaSJohn Clements 206d38c3ac7SGuchun Chen if ((adev->asic_type == CHIP_ARCTURUS) && 207d38c3ac7SGuchun Chen amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) 208d38c3ac7SGuchun Chen DRM_WARN("Fail to enable DF-Cstate\n"); 209d38c3ac7SGuchun Chen 210eee2eabaSJohn Clements if (rsmu_umc_index_state) 211eee2eabaSJohn Clements umc_v6_1_enable_umc_index_mode(adev); 212bd68fb94SJohn Clements } 2139884c2b1SHawking Zhang 2148c948103STao Zhou static void umc_v6_1_query_error_address(struct amdgpu_device *adev, 2152b671b60STao Zhou struct ras_err_data *err_data, 216bd68fb94SJohn Clements uint32_t umc_reg_offset, 217c8aa6ae3SJohn Clements uint32_t ch_inst, 218bd68fb94SJohn Clements uint32_t umc_inst) 2198c948103STao Zhou { 2202b671b60STao Zhou uint32_t lsb, mc_umc_status_addr; 2215d4667ecSGuchun Chen uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; 22287d2b92fSTao Zhou struct eeprom_table_record *err_rec; 223c8aa6ae3SJohn Clements uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; 2248c948103STao Zhou 2254cf781c2SJohn Clements if (adev->asic_type == CHIP_ARCTURUS) { 2264cf781c2SJohn Clements /* UMC 6_1_2 registers */ 2274cf781c2SJohn Clements mc_umc_status_addr = 2284cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); 2295d4667ecSGuchun Chen mc_umc_addrt0 = 2305d4667ecSGuchun Chen SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT); 2314cf781c2SJohn Clements } else { 2324cf781c2SJohn Clements /* UMC 6_1_1 registers */ 2338c948103STao Zhou mc_umc_status_addr = 2348c948103STao Zhou SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); 2355d4667ecSGuchun Chen mc_umc_addrt0 = 2365d4667ecSGuchun Chen SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); 2374cf781c2SJohn Clements } 2382b671b60STao Zhou 2391a2172b5SJohn Clements mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); 2401a2172b5SJohn Clements 2411a2172b5SJohn Clements if (mc_umc_status == 0) 2421a2172b5SJohn Clements return; 2431a2172b5SJohn Clements 2442b671b60STao Zhou if (!err_data->err_addr) { 2452b671b60STao Zhou /* clear umc status */ 246955c7120SJohn Clements WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); 2472b671b60STao Zhou return; 2482b671b60STao Zhou } 2492b671b60STao Zhou 25087d2b92fSTao Zhou err_rec = &err_data->err_addr[err_data->err_addr_cnt]; 2518c948103STao Zhou 2528c948103STao Zhou /* calculate error address if ue/ce error is detected */ 2538c948103STao Zhou if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 2548c948103STao Zhou (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 2558c948103STao Zhou REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { 2568c948103STao Zhou 257eee2eabaSJohn Clements err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); 2588c948103STao Zhou /* the lowest lsb bits should be ignored */ 2598c948103STao Zhou lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); 2608c948103STao Zhou err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); 2618c948103STao Zhou err_addr &= ~((0x1ULL << lsb) - 1); 2628c948103STao Zhou 2638c948103STao Zhou /* translate umc channel address to soc pa, 3 parts are included */ 26487d2b92fSTao Zhou retired_page = ADDR_OF_8KB_BLOCK(err_addr) | 2652b671b60STao Zhou ADDR_OF_256B_BLOCK(channel_index) | 2662b671b60STao Zhou OFFSET_IN_256B_BLOCK(err_addr); 2678c948103STao Zhou 26887d2b92fSTao Zhou /* we only save ue error information currently, ce is skipped */ 26987d2b92fSTao Zhou if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) 27087d2b92fSTao Zhou == 1) { 27187d2b92fSTao Zhou err_rec->address = err_addr; 27287d2b92fSTao Zhou /* page frame address is saved */ 273afa44809STao Zhou err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; 27487d2b92fSTao Zhou err_rec->ts = (uint64_t)ktime_get_real_seconds(); 27587d2b92fSTao Zhou err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; 27687d2b92fSTao Zhou err_rec->cu = 0; 27787d2b92fSTao Zhou err_rec->mem_channel = channel_index; 278bd68fb94SJohn Clements err_rec->mcumc_id = umc_inst; 27987d2b92fSTao Zhou 2808c948103STao Zhou err_data->err_addr_cnt++; 2818c948103STao Zhou } 28287d2b92fSTao Zhou } 2832b671b60STao Zhou 2842b671b60STao Zhou /* clear umc status */ 285955c7120SJohn Clements WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); 2868c948103STao Zhou } 2878c948103STao Zhou 2888c948103STao Zhou static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, 2898c948103STao Zhou void *ras_error_status) 2908c948103STao Zhou { 291bd68fb94SJohn Clements struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; 292bd68fb94SJohn Clements 293bd68fb94SJohn Clements uint32_t umc_inst = 0; 294bd68fb94SJohn Clements uint32_t ch_inst = 0; 295bd68fb94SJohn Clements uint32_t umc_reg_offset = 0; 296bd68fb94SJohn Clements 297eee2eabaSJohn Clements uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); 298eee2eabaSJohn Clements 299eee2eabaSJohn Clements if (rsmu_umc_index_state) 300eee2eabaSJohn Clements umc_v6_1_disable_umc_index_mode(adev); 301eee2eabaSJohn Clements 302d38c3ac7SGuchun Chen if ((adev->asic_type == CHIP_ARCTURUS) && 303d38c3ac7SGuchun Chen amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) 304d38c3ac7SGuchun Chen DRM_WARN("Fail to disable DF-Cstate.\n"); 305d38c3ac7SGuchun Chen 306c8aa6ae3SJohn Clements LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { 307bd68fb94SJohn Clements umc_reg_offset = get_umc_6_reg_offset(adev, 308bd68fb94SJohn Clements umc_inst, 309bd68fb94SJohn Clements ch_inst); 310bd68fb94SJohn Clements 311bd68fb94SJohn Clements umc_v6_1_query_error_address(adev, 312bd68fb94SJohn Clements err_data, 313bd68fb94SJohn Clements umc_reg_offset, 314bd68fb94SJohn Clements ch_inst, 315bd68fb94SJohn Clements umc_inst); 316bd68fb94SJohn Clements } 317bd68fb94SJohn Clements 318d38c3ac7SGuchun Chen if ((adev->asic_type == CHIP_ARCTURUS) && 319d38c3ac7SGuchun Chen amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) 320d38c3ac7SGuchun Chen DRM_WARN("Fail to enable DF-Cstate\n"); 321d38c3ac7SGuchun Chen 322eee2eabaSJohn Clements if (rsmu_umc_index_state) 323eee2eabaSJohn Clements umc_v6_1_enable_umc_index_mode(adev); 3248c948103STao Zhou } 3258c948103STao Zhou 326d99659a0STao Zhou static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, 327bd68fb94SJohn Clements uint32_t umc_reg_offset) 328b7f92097STao Zhou { 329b7f92097STao Zhou uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; 330b7f92097STao Zhou uint32_t ecc_err_cnt_addr; 331b7f92097STao Zhou 3324cf781c2SJohn Clements if (adev->asic_type == CHIP_ARCTURUS) { 3334cf781c2SJohn Clements /* UMC 6_1_2 registers */ 3344cf781c2SJohn Clements ecc_err_cnt_sel_addr = 3354cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); 3364cf781c2SJohn Clements ecc_err_cnt_addr = 3374cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); 3384cf781c2SJohn Clements } else { 3394cf781c2SJohn Clements /* UMC 6_1_1 registers */ 340b7f92097STao Zhou ecc_err_cnt_sel_addr = 341b7f92097STao Zhou SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); 342b7f92097STao Zhou ecc_err_cnt_addr = 343b7f92097STao Zhou SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); 3444cf781c2SJohn Clements } 345b7f92097STao Zhou 346b7f92097STao Zhou /* select the lower chip and check the error count */ 3470ee51f1dSJohn Clements ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); 348b7f92097STao Zhou ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 349b7f92097STao Zhou EccErrCntCsSel, 0); 350b7f92097STao Zhou /* set ce error interrupt type to APIC based interrupt */ 351b7f92097STao Zhou ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 352b7f92097STao Zhou EccErrInt, 0x1); 3530ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); 354b7f92097STao Zhou /* set error count to initial value */ 3550ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); 356b7f92097STao Zhou 357b7f92097STao Zhou /* select the higher chip and check the err counter */ 358b7f92097STao Zhou ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 359b7f92097STao Zhou EccErrCntCsSel, 1); 3600ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); 3610ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); 362b7f92097STao Zhou } 363b7f92097STao Zhou 364d99659a0STao Zhou static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) 3653aacf4eaSTao Zhou { 366bd68fb94SJohn Clements uint32_t umc_inst = 0; 367bd68fb94SJohn Clements uint32_t ch_inst = 0; 368bd68fb94SJohn Clements uint32_t umc_reg_offset = 0; 3693aacf4eaSTao Zhou 370eee2eabaSJohn Clements uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); 371eee2eabaSJohn Clements 372eee2eabaSJohn Clements if (rsmu_umc_index_state) 3730ee51f1dSJohn Clements umc_v6_1_disable_umc_index_mode(adev); 3740ee51f1dSJohn Clements 375c8aa6ae3SJohn Clements LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { 376bd68fb94SJohn Clements umc_reg_offset = get_umc_6_reg_offset(adev, 377bd68fb94SJohn Clements umc_inst, 378bd68fb94SJohn Clements ch_inst); 379bd68fb94SJohn Clements 380bd68fb94SJohn Clements umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); 381bd68fb94SJohn Clements } 382eee2eabaSJohn Clements 383eee2eabaSJohn Clements if (rsmu_umc_index_state) 384eee2eabaSJohn Clements umc_v6_1_enable_umc_index_mode(adev); 385bd68fb94SJohn Clements } 3863aacf4eaSTao Zhou 3879884c2b1SHawking Zhang const struct amdgpu_umc_funcs umc_v6_1_funcs = { 388d99659a0STao Zhou .err_cnt_init = umc_v6_1_err_cnt_init, 38986edcc7dSTao Zhou .ras_late_init = amdgpu_umc_ras_late_init, 3909884c2b1SHawking Zhang .query_ras_error_count = umc_v6_1_query_ras_error_count, 3918c948103STao Zhou .query_ras_error_address = umc_v6_1_query_ras_error_address, 3929884c2b1SHawking Zhang }; 393