19884c2b1SHawking Zhang /* 29884c2b1SHawking Zhang * Copyright 2019 Advanced Micro Devices, Inc. 39884c2b1SHawking Zhang * 49884c2b1SHawking Zhang * Permission is hereby granted, free of charge, to any person obtaining a 59884c2b1SHawking Zhang * copy of this software and associated documentation files (the "Software"), 69884c2b1SHawking Zhang * to deal in the Software without restriction, including without limitation 79884c2b1SHawking Zhang * the rights to use, copy, modify, merge, publish, distribute, sublicense, 89884c2b1SHawking Zhang * and/or sell copies of the Software, and to permit persons to whom the 99884c2b1SHawking Zhang * Software is furnished to do so, subject to the following conditions: 109884c2b1SHawking Zhang * 119884c2b1SHawking Zhang * The above copyright notice and this permission notice shall be included in 129884c2b1SHawking Zhang * all copies or substantial portions of the Software. 139884c2b1SHawking Zhang * 149884c2b1SHawking Zhang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 159884c2b1SHawking Zhang * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 169884c2b1SHawking Zhang * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 179884c2b1SHawking Zhang * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 189884c2b1SHawking Zhang * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 199884c2b1SHawking Zhang * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 209884c2b1SHawking Zhang * OTHER DEALINGS IN THE SOFTWARE. 219884c2b1SHawking Zhang * 229884c2b1SHawking Zhang */ 239884c2b1SHawking Zhang #include "umc_v6_1.h" 249884c2b1SHawking Zhang #include "amdgpu_ras.h" 259884c2b1SHawking Zhang #include "amdgpu.h" 269884c2b1SHawking Zhang 279884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_offset.h" 289884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_sh_mask.h" 299884c2b1SHawking Zhang #include "umc/umc_6_1_1_offset.h" 309884c2b1SHawking Zhang #include "umc/umc_6_1_1_sh_mask.h" 319884c2b1SHawking Zhang 329884c2b1SHawking Zhang static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, 339884c2b1SHawking Zhang uint32_t umc_instance) 349884c2b1SHawking Zhang { 359884c2b1SHawking Zhang uint32_t rsmu_umc_index; 369884c2b1SHawking Zhang 379884c2b1SHawking Zhang rsmu_umc_index = RREG32_SOC15(RSMU, 0, 389884c2b1SHawking Zhang mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); 399884c2b1SHawking Zhang rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, 409884c2b1SHawking Zhang RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, 419884c2b1SHawking Zhang RSMU_UMC_INDEX_MODE_EN, 1); 429884c2b1SHawking Zhang rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, 439884c2b1SHawking Zhang RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, 449884c2b1SHawking Zhang RSMU_UMC_INDEX_INSTANCE, umc_instance); 459884c2b1SHawking Zhang rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, 469884c2b1SHawking Zhang RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, 479884c2b1SHawking Zhang RSMU_UMC_INDEX_WREN, 1 << umc_instance); 489884c2b1SHawking Zhang WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, 499884c2b1SHawking Zhang rsmu_umc_index); 509884c2b1SHawking Zhang } 519884c2b1SHawking Zhang 529884c2b1SHawking Zhang static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) 539884c2b1SHawking Zhang { 549884c2b1SHawking Zhang WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, 559884c2b1SHawking Zhang RSMU_UMC_INDEX_MODE_EN, 0); 569884c2b1SHawking Zhang } 579884c2b1SHawking Zhang 589884c2b1SHawking Zhang static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, 599884c2b1SHawking Zhang uint32_t umc_reg_offset, 609884c2b1SHawking Zhang unsigned long *error_count) 619884c2b1SHawking Zhang { 629884c2b1SHawking Zhang uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; 639884c2b1SHawking Zhang uint32_t ecc_err_cnt, ecc_err_cnt_addr; 649884c2b1SHawking Zhang uint64_t mc_umc_status; 659884c2b1SHawking Zhang uint32_t mc_umc_status_addr; 669884c2b1SHawking Zhang 679884c2b1SHawking Zhang ecc_err_cnt_sel_addr = 689884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); 699884c2b1SHawking Zhang ecc_err_cnt_addr = 709884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); 719884c2b1SHawking Zhang mc_umc_status_addr = 729884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); 739884c2b1SHawking Zhang 749884c2b1SHawking Zhang /* select the lower chip and check the error count */ 759884c2b1SHawking Zhang ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); 769884c2b1SHawking Zhang ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 779884c2b1SHawking Zhang EccErrCntCsSel, 0); 789884c2b1SHawking Zhang WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); 799884c2b1SHawking Zhang ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); 809884c2b1SHawking Zhang *error_count += 819884c2b1SHawking Zhang REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt); 829884c2b1SHawking Zhang /* clear the lower chip err count */ 839884c2b1SHawking Zhang WREG32(ecc_err_cnt_addr + umc_reg_offset, 0); 849884c2b1SHawking Zhang 859884c2b1SHawking Zhang /* select the higher chip and check the err counter */ 869884c2b1SHawking Zhang ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, 879884c2b1SHawking Zhang EccErrCntCsSel, 1); 889884c2b1SHawking Zhang WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); 899884c2b1SHawking Zhang ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); 909884c2b1SHawking Zhang *error_count += 919884c2b1SHawking Zhang REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt); 929884c2b1SHawking Zhang /* clear the higher chip err count */ 939884c2b1SHawking Zhang WREG32(ecc_err_cnt_addr + umc_reg_offset, 0); 949884c2b1SHawking Zhang 959884c2b1SHawking Zhang /* check for SRAM correctable error 969884c2b1SHawking Zhang MCUMC_STATUS is a 64 bit register */ 975bbfb64aSTao Zhou mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); 989884c2b1SHawking Zhang if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && 999884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 1009884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) 1019884c2b1SHawking Zhang *error_count += 1; 1029884c2b1SHawking Zhang } 1039884c2b1SHawking Zhang 1049884c2b1SHawking Zhang static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, 1059884c2b1SHawking Zhang uint32_t umc_reg_offset, 1069884c2b1SHawking Zhang unsigned long *error_count) 1079884c2b1SHawking Zhang { 1089884c2b1SHawking Zhang uint64_t mc_umc_status; 1099884c2b1SHawking Zhang uint32_t mc_umc_status_addr; 1109884c2b1SHawking Zhang 1119884c2b1SHawking Zhang mc_umc_status_addr = 1129884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); 1139884c2b1SHawking Zhang 1149884c2b1SHawking Zhang /* check the MCUMC_STATUS */ 1155bbfb64aSTao Zhou mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset); 1169884c2b1SHawking Zhang if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 1179884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && 1189884c2b1SHawking Zhang (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 1199884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || 1209884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || 1219884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) 1229884c2b1SHawking Zhang *error_count += 1; 1239884c2b1SHawking Zhang } 1249884c2b1SHawking Zhang 1259884c2b1SHawking Zhang static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, 1269884c2b1SHawking Zhang void *ras_error_status) 1279884c2b1SHawking Zhang { 1289884c2b1SHawking Zhang struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 1295bbfb64aSTao Zhou uint32_t umc_inst, channel_inst, umc_reg_offset, mc_umc_status_addr; 1305bbfb64aSTao Zhou 1315bbfb64aSTao Zhou mc_umc_status_addr = 1325bbfb64aSTao Zhou SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); 1339884c2b1SHawking Zhang 1349884c2b1SHawking Zhang for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) { 1359884c2b1SHawking Zhang /* enable the index mode to query eror count per channel */ 1369884c2b1SHawking Zhang umc_v6_1_enable_umc_index_mode(adev, umc_inst); 1379884c2b1SHawking Zhang for (channel_inst = 0; channel_inst < UMC_V6_1_CHANNEL_INSTANCE_NUM; channel_inst++) { 1389884c2b1SHawking Zhang /* calc the register offset according to channel instance */ 1399884c2b1SHawking Zhang umc_reg_offset = UMC_V6_1_PER_CHANNEL_OFFSET * channel_inst; 1409884c2b1SHawking Zhang umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, 1419884c2b1SHawking Zhang &(err_data->ce_count)); 1429884c2b1SHawking Zhang umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, 1439884c2b1SHawking Zhang &(err_data->ue_count)); 1445bbfb64aSTao Zhou /* clear umc status */ 1455bbfb64aSTao Zhou WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL); 1469884c2b1SHawking Zhang } 1479884c2b1SHawking Zhang } 1489884c2b1SHawking Zhang umc_v6_1_disable_umc_index_mode(adev); 1499884c2b1SHawking Zhang } 1509884c2b1SHawking Zhang 1519884c2b1SHawking Zhang const struct amdgpu_umc_funcs umc_v6_1_funcs = { 1529884c2b1SHawking Zhang .query_ras_error_count = umc_v6_1_query_ras_error_count, 1539884c2b1SHawking Zhang }; 154