xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c (revision 1a2172b5)
19884c2b1SHawking Zhang /*
29884c2b1SHawking Zhang  * Copyright 2019 Advanced Micro Devices, Inc.
39884c2b1SHawking Zhang  *
49884c2b1SHawking Zhang  * Permission is hereby granted, free of charge, to any person obtaining a
59884c2b1SHawking Zhang  * copy of this software and associated documentation files (the "Software"),
69884c2b1SHawking Zhang  * to deal in the Software without restriction, including without limitation
79884c2b1SHawking Zhang  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
89884c2b1SHawking Zhang  * and/or sell copies of the Software, and to permit persons to whom the
99884c2b1SHawking Zhang  * Software is furnished to do so, subject to the following conditions:
109884c2b1SHawking Zhang  *
119884c2b1SHawking Zhang  * The above copyright notice and this permission notice shall be included in
129884c2b1SHawking Zhang  * all copies or substantial portions of the Software.
139884c2b1SHawking Zhang  *
149884c2b1SHawking Zhang  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
159884c2b1SHawking Zhang  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
169884c2b1SHawking Zhang  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
179884c2b1SHawking Zhang  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
189884c2b1SHawking Zhang  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
199884c2b1SHawking Zhang  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
209884c2b1SHawking Zhang  * OTHER DEALINGS IN THE SOFTWARE.
219884c2b1SHawking Zhang  *
229884c2b1SHawking Zhang  */
239884c2b1SHawking Zhang #include "umc_v6_1.h"
249884c2b1SHawking Zhang #include "amdgpu_ras.h"
259884c2b1SHawking Zhang #include "amdgpu.h"
269884c2b1SHawking Zhang 
279884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_offset.h"
289884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_sh_mask.h"
299884c2b1SHawking Zhang #include "umc/umc_6_1_1_offset.h"
309884c2b1SHawking Zhang #include "umc/umc_6_1_1_sh_mask.h"
31fb71a336SGuchun Chen #include "umc/umc_6_1_2_offset.h"
329884c2b1SHawking Zhang 
33bd68fb94SJohn Clements #define UMC_6_INST_DIST			0x40000
34bd68fb94SJohn Clements 
358c948103STao Zhou /*
368c948103STao Zhou  * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
378c948103STao Zhou  * is the index of 8KB block
388c948103STao Zhou  */
398c948103STao Zhou #define ADDR_OF_8KB_BLOCK(addr)			(((addr) & ~0xffULL) << 5)
408c948103STao Zhou /* channel index is the index of 256B block */
418c948103STao Zhou #define ADDR_OF_256B_BLOCK(channel_index)	((channel_index) << 8)
428c948103STao Zhou /* offset in 256B block */
438c948103STao Zhou #define OFFSET_IN_256B_BLOCK(addr)		((addr) & 0xffULL)
448c948103STao Zhou 
45c8aa6ae3SJohn Clements #define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++)
46c8aa6ae3SJohn Clements #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
47c8aa6ae3SJohn Clements #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
48c8aa6ae3SJohn Clements 
493aacf4eaSTao Zhou const uint32_t
50c2742aefSTao Zhou 	umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
51c2742aefSTao Zhou 		{2, 18, 11, 27},	{4, 20, 13, 29},
52c2742aefSTao Zhou 		{1, 17, 8, 24},		{7, 23, 14, 30},
53c2742aefSTao Zhou 		{10, 26, 3, 19},	{12, 28, 5, 21},
54c2742aefSTao Zhou 		{9, 25, 0, 16},		{15, 31, 6, 22}
55c2742aefSTao Zhou };
56c2742aefSTao Zhou 
57eee2eabaSJohn Clements static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
58eee2eabaSJohn Clements {
59eee2eabaSJohn Clements 	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
60eee2eabaSJohn Clements 			RSMU_UMC_INDEX_MODE_EN, 1);
61eee2eabaSJohn Clements }
62eee2eabaSJohn Clements 
630ee51f1dSJohn Clements static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
640ee51f1dSJohn Clements {
650ee51f1dSJohn Clements 	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
660ee51f1dSJohn Clements 			RSMU_UMC_INDEX_MODE_EN, 0);
670ee51f1dSJohn Clements }
680ee51f1dSJohn Clements 
69eee2eabaSJohn Clements static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
70eee2eabaSJohn Clements {
71eee2eabaSJohn Clements 	uint32_t rsmu_umc_index;
72eee2eabaSJohn Clements 
73eee2eabaSJohn Clements 	rsmu_umc_index = RREG32_SOC15(RSMU, 0,
74eee2eabaSJohn Clements 			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
75eee2eabaSJohn Clements 
76eee2eabaSJohn Clements 	return REG_GET_FIELD(rsmu_umc_index,
77eee2eabaSJohn Clements 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
78eee2eabaSJohn Clements 			RSMU_UMC_INDEX_MODE_EN);
79eee2eabaSJohn Clements }
80eee2eabaSJohn Clements 
81bd68fb94SJohn Clements static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
82bd68fb94SJohn Clements 					    uint32_t umc_inst,
83bd68fb94SJohn Clements 					    uint32_t ch_inst)
849884c2b1SHawking Zhang {
85bd68fb94SJohn Clements 	return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
8687d2b92fSTao Zhou }
8787d2b92fSTao Zhou 
889884c2b1SHawking Zhang static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
899884c2b1SHawking Zhang 						   uint32_t umc_reg_offset,
909884c2b1SHawking Zhang 						   unsigned long *error_count)
919884c2b1SHawking Zhang {
929884c2b1SHawking Zhang 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
939884c2b1SHawking Zhang 	uint32_t ecc_err_cnt, ecc_err_cnt_addr;
949884c2b1SHawking Zhang 	uint64_t mc_umc_status;
959884c2b1SHawking Zhang 	uint32_t mc_umc_status_addr;
969884c2b1SHawking Zhang 
974cf781c2SJohn Clements 	if (adev->asic_type == CHIP_ARCTURUS) {
984cf781c2SJohn Clements 		/* UMC 6_1_2 registers */
994cf781c2SJohn Clements 		ecc_err_cnt_sel_addr =
1004cf781c2SJohn Clements 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
1014cf781c2SJohn Clements 		ecc_err_cnt_addr =
1024cf781c2SJohn Clements 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
1034cf781c2SJohn Clements 		mc_umc_status_addr =
1044cf781c2SJohn Clements 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
1054cf781c2SJohn Clements 	} else {
1064cf781c2SJohn Clements 		/* UMC 6_1_1 registers */
1079884c2b1SHawking Zhang 		ecc_err_cnt_sel_addr =
1089884c2b1SHawking Zhang 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
1099884c2b1SHawking Zhang 		ecc_err_cnt_addr =
1109884c2b1SHawking Zhang 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
1119884c2b1SHawking Zhang 		mc_umc_status_addr =
1129884c2b1SHawking Zhang 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
1134cf781c2SJohn Clements 	}
1149884c2b1SHawking Zhang 
1159884c2b1SHawking Zhang 	/* select the lower chip and check the error count */
1160ee51f1dSJohn Clements 	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
1179884c2b1SHawking Zhang 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
1189884c2b1SHawking Zhang 					EccErrCntCsSel, 0);
1190ee51f1dSJohn Clements 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
1200ee51f1dSJohn Clements 	ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
1219884c2b1SHawking Zhang 	*error_count +=
122b1a58953STao Zhou 		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
123b1a58953STao Zhou 		 UMC_V6_1_CE_CNT_INIT);
1249884c2b1SHawking Zhang 	/* clear the lower chip err count */
1250ee51f1dSJohn Clements 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
1269884c2b1SHawking Zhang 
1279884c2b1SHawking Zhang 	/* select the higher chip and check the err counter */
1289884c2b1SHawking Zhang 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
1299884c2b1SHawking Zhang 					EccErrCntCsSel, 1);
1300ee51f1dSJohn Clements 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
1310ee51f1dSJohn Clements 	ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
1329884c2b1SHawking Zhang 	*error_count +=
133b1a58953STao Zhou 		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
134b1a58953STao Zhou 		 UMC_V6_1_CE_CNT_INIT);
1359884c2b1SHawking Zhang 	/* clear the higher chip err count */
1360ee51f1dSJohn Clements 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
1379884c2b1SHawking Zhang 
1389884c2b1SHawking Zhang 	/* check for SRAM correctable error
1399884c2b1SHawking Zhang 	  MCUMC_STATUS is a 64 bit register */
140955c7120SJohn Clements 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
1419884c2b1SHawking Zhang 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
1429884c2b1SHawking Zhang 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
1439884c2b1SHawking Zhang 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
1449884c2b1SHawking Zhang 		*error_count += 1;
1459884c2b1SHawking Zhang }
1469884c2b1SHawking Zhang 
1479884c2b1SHawking Zhang static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
1489884c2b1SHawking Zhang 						      uint32_t umc_reg_offset,
1499884c2b1SHawking Zhang 						      unsigned long *error_count)
1509884c2b1SHawking Zhang {
1519884c2b1SHawking Zhang 	uint64_t mc_umc_status;
1529884c2b1SHawking Zhang 	uint32_t mc_umc_status_addr;
1539884c2b1SHawking Zhang 
1544cf781c2SJohn Clements 	if (adev->asic_type == CHIP_ARCTURUS) {
1554cf781c2SJohn Clements 		/* UMC 6_1_2 registers */
1564cf781c2SJohn Clements 		mc_umc_status_addr =
1574cf781c2SJohn Clements 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
1584cf781c2SJohn Clements 	} else {
1594cf781c2SJohn Clements 		/* UMC 6_1_1 registers */
1609884c2b1SHawking Zhang 		mc_umc_status_addr =
1619884c2b1SHawking Zhang 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
1624cf781c2SJohn Clements 	}
1639884c2b1SHawking Zhang 
1649884c2b1SHawking Zhang 	/* check the MCUMC_STATUS */
165955c7120SJohn Clements 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
166f1ed4afaSTao Zhou 	if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
167f1ed4afaSTao Zhou 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
168f1ed4afaSTao Zhou 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
1699884c2b1SHawking Zhang 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
1709884c2b1SHawking Zhang 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
1719884c2b1SHawking Zhang 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
1729884c2b1SHawking Zhang 		*error_count += 1;
1739884c2b1SHawking Zhang }
1749884c2b1SHawking Zhang 
1752b671b60STao Zhou static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
1762b671b60STao Zhou 					   void *ras_error_status)
1772b671b60STao Zhou {
178bd68fb94SJohn Clements 	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
179bd68fb94SJohn Clements 
180bd68fb94SJohn Clements 	uint32_t umc_inst        = 0;
181bd68fb94SJohn Clements 	uint32_t ch_inst         = 0;
182bd68fb94SJohn Clements 	uint32_t umc_reg_offset  = 0;
183bd68fb94SJohn Clements 
184eee2eabaSJohn Clements 	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
185eee2eabaSJohn Clements 
186eee2eabaSJohn Clements 	if (rsmu_umc_index_state)
187eee2eabaSJohn Clements 		umc_v6_1_disable_umc_index_mode(adev);
188eee2eabaSJohn Clements 
189d38c3ac7SGuchun Chen 	if ((adev->asic_type == CHIP_ARCTURUS) &&
190d38c3ac7SGuchun Chen 		amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
191d38c3ac7SGuchun Chen 		DRM_WARN("Fail to disable DF-Cstate.\n");
192d38c3ac7SGuchun Chen 
193c8aa6ae3SJohn Clements 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
194bd68fb94SJohn Clements 		umc_reg_offset = get_umc_6_reg_offset(adev,
195bd68fb94SJohn Clements 						      umc_inst,
196bd68fb94SJohn Clements 						      ch_inst);
197bd68fb94SJohn Clements 
198bd68fb94SJohn Clements 		umc_v6_1_query_correctable_error_count(adev,
199bd68fb94SJohn Clements 						       umc_reg_offset,
200bd68fb94SJohn Clements 						       &(err_data->ce_count));
201bd68fb94SJohn Clements 		umc_v6_1_querry_uncorrectable_error_count(adev,
202bd68fb94SJohn Clements 							  umc_reg_offset,
203bd68fb94SJohn Clements 							  &(err_data->ue_count));
204bd68fb94SJohn Clements 	}
205eee2eabaSJohn Clements 
206d38c3ac7SGuchun Chen 	if ((adev->asic_type == CHIP_ARCTURUS) &&
207d38c3ac7SGuchun Chen 		amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
208d38c3ac7SGuchun Chen 		DRM_WARN("Fail to enable DF-Cstate\n");
209d38c3ac7SGuchun Chen 
210eee2eabaSJohn Clements 	if (rsmu_umc_index_state)
211eee2eabaSJohn Clements 		umc_v6_1_enable_umc_index_mode(adev);
212bd68fb94SJohn Clements }
2139884c2b1SHawking Zhang 
2148c948103STao Zhou static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
2152b671b60STao Zhou 					 struct ras_err_data *err_data,
216bd68fb94SJohn Clements 					 uint32_t umc_reg_offset,
217c8aa6ae3SJohn Clements 					 uint32_t ch_inst,
218bd68fb94SJohn Clements 					 uint32_t umc_inst)
2198c948103STao Zhou {
2202b671b60STao Zhou 	uint32_t lsb, mc_umc_status_addr;
2215d4667ecSGuchun Chen 	uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
22287d2b92fSTao Zhou 	struct eeprom_table_record *err_rec;
223c8aa6ae3SJohn Clements 	uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
2248c948103STao Zhou 
2254cf781c2SJohn Clements 	if (adev->asic_type == CHIP_ARCTURUS) {
2264cf781c2SJohn Clements 		/* UMC 6_1_2 registers */
2274cf781c2SJohn Clements 		mc_umc_status_addr =
2284cf781c2SJohn Clements 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
2295d4667ecSGuchun Chen 		mc_umc_addrt0 =
2305d4667ecSGuchun Chen 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
2314cf781c2SJohn Clements 	} else {
2324cf781c2SJohn Clements 		/* UMC 6_1_1 registers */
2338c948103STao Zhou 		mc_umc_status_addr =
2348c948103STao Zhou 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
2355d4667ecSGuchun Chen 		mc_umc_addrt0 =
2365d4667ecSGuchun Chen 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
2374cf781c2SJohn Clements 	}
2382b671b60STao Zhou 
2391a2172b5SJohn Clements 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
2401a2172b5SJohn Clements 
2411a2172b5SJohn Clements 	if (mc_umc_status == 0)
2421a2172b5SJohn Clements 		return;
2431a2172b5SJohn Clements 
2442b671b60STao Zhou 	if (!err_data->err_addr) {
2452b671b60STao Zhou 		/* clear umc status */
246955c7120SJohn Clements 		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
2472b671b60STao Zhou 		return;
2482b671b60STao Zhou 	}
2492b671b60STao Zhou 
25087d2b92fSTao Zhou 	err_rec = &err_data->err_addr[err_data->err_addr_cnt];
2518c948103STao Zhou 
2528c948103STao Zhou 	/* calculate error address if ue/ce error is detected */
2538c948103STao Zhou 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
2548c948103STao Zhou 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
2558c948103STao Zhou 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
2568c948103STao Zhou 
257eee2eabaSJohn Clements 		err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
2588c948103STao Zhou 		/* the lowest lsb bits should be ignored */
2598c948103STao Zhou 		lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
2608c948103STao Zhou 		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
2618c948103STao Zhou 		err_addr &= ~((0x1ULL << lsb) - 1);
2628c948103STao Zhou 
2638c948103STao Zhou 		/* translate umc channel address to soc pa, 3 parts are included */
26487d2b92fSTao Zhou 		retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
2652b671b60STao Zhou 				ADDR_OF_256B_BLOCK(channel_index) |
2662b671b60STao Zhou 				OFFSET_IN_256B_BLOCK(err_addr);
2678c948103STao Zhou 
26887d2b92fSTao Zhou 		/* we only save ue error information currently, ce is skipped */
26987d2b92fSTao Zhou 		if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
27087d2b92fSTao Zhou 				== 1) {
27187d2b92fSTao Zhou 			err_rec->address = err_addr;
27287d2b92fSTao Zhou 			/* page frame address is saved */
273afa44809STao Zhou 			err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
27487d2b92fSTao Zhou 			err_rec->ts = (uint64_t)ktime_get_real_seconds();
27587d2b92fSTao Zhou 			err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
27687d2b92fSTao Zhou 			err_rec->cu = 0;
27787d2b92fSTao Zhou 			err_rec->mem_channel = channel_index;
278bd68fb94SJohn Clements 			err_rec->mcumc_id = umc_inst;
27987d2b92fSTao Zhou 
2808c948103STao Zhou 			err_data->err_addr_cnt++;
2818c948103STao Zhou 		}
28287d2b92fSTao Zhou 	}
2832b671b60STao Zhou 
2842b671b60STao Zhou 	/* clear umc status */
285955c7120SJohn Clements 	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
2868c948103STao Zhou }
2878c948103STao Zhou 
2888c948103STao Zhou static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
2898c948103STao Zhou 					     void *ras_error_status)
2908c948103STao Zhou {
291bd68fb94SJohn Clements 	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
292bd68fb94SJohn Clements 
293bd68fb94SJohn Clements 	uint32_t umc_inst        = 0;
294bd68fb94SJohn Clements 	uint32_t ch_inst         = 0;
295bd68fb94SJohn Clements 	uint32_t umc_reg_offset  = 0;
296bd68fb94SJohn Clements 
297eee2eabaSJohn Clements 	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
298eee2eabaSJohn Clements 
299eee2eabaSJohn Clements 	if (rsmu_umc_index_state)
300eee2eabaSJohn Clements 		umc_v6_1_disable_umc_index_mode(adev);
301eee2eabaSJohn Clements 
302d38c3ac7SGuchun Chen 	if ((adev->asic_type == CHIP_ARCTURUS) &&
303d38c3ac7SGuchun Chen 		amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
304d38c3ac7SGuchun Chen 		DRM_WARN("Fail to disable DF-Cstate.\n");
305d38c3ac7SGuchun Chen 
306c8aa6ae3SJohn Clements 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
307bd68fb94SJohn Clements 		umc_reg_offset = get_umc_6_reg_offset(adev,
308bd68fb94SJohn Clements 						      umc_inst,
309bd68fb94SJohn Clements 						      ch_inst);
310bd68fb94SJohn Clements 
311bd68fb94SJohn Clements 		umc_v6_1_query_error_address(adev,
312bd68fb94SJohn Clements 					     err_data,
313bd68fb94SJohn Clements 					     umc_reg_offset,
314bd68fb94SJohn Clements 					     ch_inst,
315bd68fb94SJohn Clements 					     umc_inst);
316bd68fb94SJohn Clements 	}
317bd68fb94SJohn Clements 
318d38c3ac7SGuchun Chen 	if ((adev->asic_type == CHIP_ARCTURUS) &&
319d38c3ac7SGuchun Chen 		amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
320d38c3ac7SGuchun Chen 		DRM_WARN("Fail to enable DF-Cstate\n");
321d38c3ac7SGuchun Chen 
322eee2eabaSJohn Clements 	if (rsmu_umc_index_state)
323eee2eabaSJohn Clements 		umc_v6_1_enable_umc_index_mode(adev);
3248c948103STao Zhou }
3258c948103STao Zhou 
326d99659a0STao Zhou static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
327bd68fb94SJohn Clements 					      uint32_t umc_reg_offset)
328b7f92097STao Zhou {
329b7f92097STao Zhou 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
330b7f92097STao Zhou 	uint32_t ecc_err_cnt_addr;
331b7f92097STao Zhou 
3324cf781c2SJohn Clements 	if (adev->asic_type == CHIP_ARCTURUS) {
3334cf781c2SJohn Clements 		/* UMC 6_1_2 registers */
3344cf781c2SJohn Clements 		ecc_err_cnt_sel_addr =
3354cf781c2SJohn Clements 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
3364cf781c2SJohn Clements 		ecc_err_cnt_addr =
3374cf781c2SJohn Clements 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
3384cf781c2SJohn Clements 	} else {
3394cf781c2SJohn Clements 		/* UMC 6_1_1 registers */
340b7f92097STao Zhou 		ecc_err_cnt_sel_addr =
341b7f92097STao Zhou 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
342b7f92097STao Zhou 		ecc_err_cnt_addr =
343b7f92097STao Zhou 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
3444cf781c2SJohn Clements 	}
345b7f92097STao Zhou 
346b7f92097STao Zhou 	/* select the lower chip and check the error count */
3470ee51f1dSJohn Clements 	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
348b7f92097STao Zhou 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
349b7f92097STao Zhou 					EccErrCntCsSel, 0);
350b7f92097STao Zhou 	/* set ce error interrupt type to APIC based interrupt */
351b7f92097STao Zhou 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
352b7f92097STao Zhou 					EccErrInt, 0x1);
3530ee51f1dSJohn Clements 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
354b7f92097STao Zhou 	/* set error count to initial value */
3550ee51f1dSJohn Clements 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
356b7f92097STao Zhou 
357b7f92097STao Zhou 	/* select the higher chip and check the err counter */
358b7f92097STao Zhou 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
359b7f92097STao Zhou 					EccErrCntCsSel, 1);
3600ee51f1dSJohn Clements 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
3610ee51f1dSJohn Clements 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
362b7f92097STao Zhou }
363b7f92097STao Zhou 
364d99659a0STao Zhou static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
3653aacf4eaSTao Zhou {
366bd68fb94SJohn Clements 	uint32_t umc_inst        = 0;
367bd68fb94SJohn Clements 	uint32_t ch_inst         = 0;
368bd68fb94SJohn Clements 	uint32_t umc_reg_offset  = 0;
3693aacf4eaSTao Zhou 
370eee2eabaSJohn Clements 	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
371eee2eabaSJohn Clements 
372eee2eabaSJohn Clements 	if (rsmu_umc_index_state)
3730ee51f1dSJohn Clements 		umc_v6_1_disable_umc_index_mode(adev);
3740ee51f1dSJohn Clements 
375c8aa6ae3SJohn Clements 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
376bd68fb94SJohn Clements 		umc_reg_offset = get_umc_6_reg_offset(adev,
377bd68fb94SJohn Clements 						      umc_inst,
378bd68fb94SJohn Clements 						      ch_inst);
379bd68fb94SJohn Clements 
380bd68fb94SJohn Clements 		umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
381bd68fb94SJohn Clements 	}
382eee2eabaSJohn Clements 
383eee2eabaSJohn Clements 	if (rsmu_umc_index_state)
384eee2eabaSJohn Clements 		umc_v6_1_enable_umc_index_mode(adev);
385bd68fb94SJohn Clements }
3863aacf4eaSTao Zhou 
3879884c2b1SHawking Zhang const struct amdgpu_umc_funcs umc_v6_1_funcs = {
388d99659a0STao Zhou 	.err_cnt_init = umc_v6_1_err_cnt_init,
38986edcc7dSTao Zhou 	.ras_late_init = amdgpu_umc_ras_late_init,
3909884c2b1SHawking Zhang 	.query_ras_error_count = umc_v6_1_query_ras_error_count,
3918c948103STao Zhou 	.query_ras_error_address = umc_v6_1_query_ras_error_address,
3929884c2b1SHawking Zhang };
393