xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c (revision 5bbfb64a)
19884c2b1SHawking Zhang /*
29884c2b1SHawking Zhang  * Copyright 2019 Advanced Micro Devices, Inc.
39884c2b1SHawking Zhang  *
49884c2b1SHawking Zhang  * Permission is hereby granted, free of charge, to any person obtaining a
59884c2b1SHawking Zhang  * copy of this software and associated documentation files (the "Software"),
69884c2b1SHawking Zhang  * to deal in the Software without restriction, including without limitation
79884c2b1SHawking Zhang  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
89884c2b1SHawking Zhang  * and/or sell copies of the Software, and to permit persons to whom the
99884c2b1SHawking Zhang  * Software is furnished to do so, subject to the following conditions:
109884c2b1SHawking Zhang  *
119884c2b1SHawking Zhang  * The above copyright notice and this permission notice shall be included in
129884c2b1SHawking Zhang  * all copies or substantial portions of the Software.
139884c2b1SHawking Zhang  *
149884c2b1SHawking Zhang  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
159884c2b1SHawking Zhang  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
169884c2b1SHawking Zhang  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
179884c2b1SHawking Zhang  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
189884c2b1SHawking Zhang  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
199884c2b1SHawking Zhang  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
209884c2b1SHawking Zhang  * OTHER DEALINGS IN THE SOFTWARE.
219884c2b1SHawking Zhang  *
229884c2b1SHawking Zhang  */
239884c2b1SHawking Zhang #include "umc_v6_1.h"
249884c2b1SHawking Zhang #include "amdgpu_ras.h"
259884c2b1SHawking Zhang #include "amdgpu.h"
269884c2b1SHawking Zhang 
279884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_offset.h"
289884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_sh_mask.h"
299884c2b1SHawking Zhang #include "umc/umc_6_1_1_offset.h"
309884c2b1SHawking Zhang #include "umc/umc_6_1_1_sh_mask.h"
319884c2b1SHawking Zhang 
329884c2b1SHawking Zhang static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev,
339884c2b1SHawking Zhang 					   uint32_t umc_instance)
349884c2b1SHawking Zhang {
359884c2b1SHawking Zhang 	uint32_t rsmu_umc_index;
369884c2b1SHawking Zhang 
379884c2b1SHawking Zhang 	rsmu_umc_index = RREG32_SOC15(RSMU, 0,
389884c2b1SHawking Zhang 			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
399884c2b1SHawking Zhang 	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
409884c2b1SHawking Zhang 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
419884c2b1SHawking Zhang 			RSMU_UMC_INDEX_MODE_EN, 1);
429884c2b1SHawking Zhang 	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
439884c2b1SHawking Zhang 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
449884c2b1SHawking Zhang 			RSMU_UMC_INDEX_INSTANCE, umc_instance);
459884c2b1SHawking Zhang 	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
469884c2b1SHawking Zhang 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
479884c2b1SHawking Zhang 			RSMU_UMC_INDEX_WREN, 1 << umc_instance);
489884c2b1SHawking Zhang 	WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
499884c2b1SHawking Zhang 				rsmu_umc_index);
509884c2b1SHawking Zhang }
519884c2b1SHawking Zhang 
529884c2b1SHawking Zhang static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
539884c2b1SHawking Zhang {
549884c2b1SHawking Zhang 	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
559884c2b1SHawking Zhang 			RSMU_UMC_INDEX_MODE_EN, 0);
569884c2b1SHawking Zhang }
579884c2b1SHawking Zhang 
589884c2b1SHawking Zhang static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
599884c2b1SHawking Zhang 						   uint32_t umc_reg_offset,
609884c2b1SHawking Zhang 						   unsigned long *error_count)
619884c2b1SHawking Zhang {
629884c2b1SHawking Zhang 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
639884c2b1SHawking Zhang 	uint32_t ecc_err_cnt, ecc_err_cnt_addr;
649884c2b1SHawking Zhang 	uint64_t mc_umc_status;
659884c2b1SHawking Zhang 	uint32_t mc_umc_status_addr;
669884c2b1SHawking Zhang 
679884c2b1SHawking Zhang 	ecc_err_cnt_sel_addr =
689884c2b1SHawking Zhang 		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
699884c2b1SHawking Zhang 	ecc_err_cnt_addr =
709884c2b1SHawking Zhang 		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
719884c2b1SHawking Zhang 	mc_umc_status_addr =
729884c2b1SHawking Zhang 		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
739884c2b1SHawking Zhang 
749884c2b1SHawking Zhang 	/* select the lower chip and check the error count */
759884c2b1SHawking Zhang 	ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
769884c2b1SHawking Zhang 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
779884c2b1SHawking Zhang 					EccErrCntCsSel, 0);
789884c2b1SHawking Zhang 	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
799884c2b1SHawking Zhang 	ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
809884c2b1SHawking Zhang 	*error_count +=
819884c2b1SHawking Zhang 		REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt);
829884c2b1SHawking Zhang 	/* clear the lower chip err count */
839884c2b1SHawking Zhang 	WREG32(ecc_err_cnt_addr + umc_reg_offset, 0);
849884c2b1SHawking Zhang 
859884c2b1SHawking Zhang 	/* select the higher chip and check the err counter */
869884c2b1SHawking Zhang 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
879884c2b1SHawking Zhang 					EccErrCntCsSel, 1);
889884c2b1SHawking Zhang 	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
899884c2b1SHawking Zhang 	ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
909884c2b1SHawking Zhang 	*error_count +=
919884c2b1SHawking Zhang 		REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt);
929884c2b1SHawking Zhang 	/* clear the higher chip err count */
939884c2b1SHawking Zhang 	WREG32(ecc_err_cnt_addr + umc_reg_offset, 0);
949884c2b1SHawking Zhang 
959884c2b1SHawking Zhang 	/* check for SRAM correctable error
969884c2b1SHawking Zhang 	  MCUMC_STATUS is a 64 bit register */
975bbfb64aSTao Zhou 	mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset);
989884c2b1SHawking Zhang 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
999884c2b1SHawking Zhang 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
1009884c2b1SHawking Zhang 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
1019884c2b1SHawking Zhang 		*error_count += 1;
1029884c2b1SHawking Zhang }
1039884c2b1SHawking Zhang 
1049884c2b1SHawking Zhang static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
1059884c2b1SHawking Zhang 						      uint32_t umc_reg_offset,
1069884c2b1SHawking Zhang 						      unsigned long *error_count)
1079884c2b1SHawking Zhang {
1089884c2b1SHawking Zhang 	uint64_t mc_umc_status;
1099884c2b1SHawking Zhang 	uint32_t mc_umc_status_addr;
1109884c2b1SHawking Zhang 
1119884c2b1SHawking Zhang 	mc_umc_status_addr =
1129884c2b1SHawking Zhang                 SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
1139884c2b1SHawking Zhang 
1149884c2b1SHawking Zhang 	/* check the MCUMC_STATUS */
1155bbfb64aSTao Zhou 	mc_umc_status = RREG64(mc_umc_status_addr + umc_reg_offset);
1169884c2b1SHawking Zhang 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
1179884c2b1SHawking Zhang 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
1189884c2b1SHawking Zhang 		(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
1199884c2b1SHawking Zhang 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
1209884c2b1SHawking Zhang 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
1219884c2b1SHawking Zhang 		REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
1229884c2b1SHawking Zhang 		*error_count += 1;
1239884c2b1SHawking Zhang }
1249884c2b1SHawking Zhang 
1259884c2b1SHawking Zhang static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
1269884c2b1SHawking Zhang 					   void *ras_error_status)
1279884c2b1SHawking Zhang {
1289884c2b1SHawking Zhang 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1295bbfb64aSTao Zhou 	uint32_t umc_inst, channel_inst, umc_reg_offset, mc_umc_status_addr;
1305bbfb64aSTao Zhou 
1315bbfb64aSTao Zhou 	mc_umc_status_addr =
1325bbfb64aSTao Zhou 		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
1339884c2b1SHawking Zhang 
1349884c2b1SHawking Zhang 	for (umc_inst = 0; umc_inst < UMC_V6_1_UMC_INSTANCE_NUM; umc_inst++) {
1359884c2b1SHawking Zhang 		/* enable the index mode to query eror count per channel */
1369884c2b1SHawking Zhang 		umc_v6_1_enable_umc_index_mode(adev, umc_inst);
1379884c2b1SHawking Zhang 		for (channel_inst = 0; channel_inst < UMC_V6_1_CHANNEL_INSTANCE_NUM; channel_inst++) {
1389884c2b1SHawking Zhang 			/* calc the register offset according to channel instance */
1399884c2b1SHawking Zhang 			umc_reg_offset = UMC_V6_1_PER_CHANNEL_OFFSET * channel_inst;
1409884c2b1SHawking Zhang 			umc_v6_1_query_correctable_error_count(adev, umc_reg_offset,
1419884c2b1SHawking Zhang 							       &(err_data->ce_count));
1429884c2b1SHawking Zhang 			umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset,
1439884c2b1SHawking Zhang 								  &(err_data->ue_count));
1445bbfb64aSTao Zhou 			/* clear umc status */
1455bbfb64aSTao Zhou 			WREG64(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
1469884c2b1SHawking Zhang 		}
1479884c2b1SHawking Zhang 	}
1489884c2b1SHawking Zhang 	umc_v6_1_disable_umc_index_mode(adev);
1499884c2b1SHawking Zhang }
1509884c2b1SHawking Zhang 
1519884c2b1SHawking Zhang const struct amdgpu_umc_funcs umc_v6_1_funcs = {
1529884c2b1SHawking Zhang 	.query_ras_error_count = umc_v6_1_query_ras_error_count,
1539884c2b1SHawking Zhang };
154