xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c (revision 55fd7e02)
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include "umc_v6_1.h"
24 #include "amdgpu_ras.h"
25 #include "amdgpu.h"
26 
27 #include "rsmu/rsmu_0_0_2_offset.h"
28 #include "rsmu/rsmu_0_0_2_sh_mask.h"
29 #include "umc/umc_6_1_1_offset.h"
30 #include "umc/umc_6_1_1_sh_mask.h"
31 #include "umc/umc_6_1_2_offset.h"
32 
33 #define UMC_6_INST_DIST			0x40000
34 
35 /*
36  * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
37  * is the index of 8KB block
38  */
39 #define ADDR_OF_8KB_BLOCK(addr)			(((addr) & ~0xffULL) << 5)
40 /* channel index is the index of 256B block */
41 #define ADDR_OF_256B_BLOCK(channel_index)	((channel_index) << 8)
42 /* offset in 256B block */
43 #define OFFSET_IN_256B_BLOCK(addr)		((addr) & 0xffULL)
44 
45 #define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++)
46 #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
47 #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
48 
49 const uint32_t
50 	umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
51 		{2, 18, 11, 27},	{4, 20, 13, 29},
52 		{1, 17, 8, 24},		{7, 23, 14, 30},
53 		{10, 26, 3, 19},	{12, 28, 5, 21},
54 		{9, 25, 0, 16},		{15, 31, 6, 22}
55 };
56 
57 static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
58 {
59 	uint32_t rsmu_umc_addr, rsmu_umc_val;
60 
61 	rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
62 			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
63 	rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
64 
65 	rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
66 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
67 			RSMU_UMC_INDEX_MODE_EN, 1);
68 
69 	WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
70 }
71 
72 static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
73 {
74 	uint32_t rsmu_umc_addr, rsmu_umc_val;
75 
76 	rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
77 			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
78 	rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
79 
80 	rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
81 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
82 			RSMU_UMC_INDEX_MODE_EN, 0);
83 
84 	WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
85 }
86 
87 static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
88 {
89 	uint32_t rsmu_umc_addr, rsmu_umc_val;
90 
91 	rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
92 			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
93 	rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
94 
95 	return REG_GET_FIELD(rsmu_umc_val,
96 			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
97 			RSMU_UMC_INDEX_MODE_EN);
98 }
99 
100 static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
101 					    uint32_t umc_inst,
102 					    uint32_t ch_inst)
103 {
104 	return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
105 }
106 
107 static void umc_v6_1_clear_error_count_per_channel(struct amdgpu_device *adev,
108 					uint32_t umc_reg_offset)
109 {
110 	uint32_t ecc_err_cnt_addr;
111 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
112 
113 	if (adev->asic_type == CHIP_ARCTURUS) {
114 		/* UMC 6_1_2 registers */
115 		ecc_err_cnt_sel_addr =
116 			SOC15_REG_OFFSET(UMC, 0,
117 					mmUMCCH0_0_EccErrCntSel_ARCT);
118 		ecc_err_cnt_addr =
119 			SOC15_REG_OFFSET(UMC, 0,
120 					mmUMCCH0_0_EccErrCnt_ARCT);
121 	} else {
122 		/* UMC 6_1_1 registers */
123 		ecc_err_cnt_sel_addr =
124 			SOC15_REG_OFFSET(UMC, 0,
125 					mmUMCCH0_0_EccErrCntSel);
126 		ecc_err_cnt_addr =
127 			SOC15_REG_OFFSET(UMC, 0,
128 					mmUMCCH0_0_EccErrCnt);
129 	}
130 
131 	/* select the lower chip */
132 	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
133 					umc_reg_offset) * 4);
134 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
135 					UMCCH0_0_EccErrCntSel,
136 					EccErrCntCsSel, 0);
137 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
138 			ecc_err_cnt_sel);
139 
140 	/* clear lower chip error count */
141 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
142 			UMC_V6_1_CE_CNT_INIT);
143 
144 	/* select the higher chip */
145 	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
146 					umc_reg_offset) * 4);
147 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
148 					UMCCH0_0_EccErrCntSel,
149 					EccErrCntCsSel, 1);
150 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
151 			ecc_err_cnt_sel);
152 
153 	/* clear higher chip error count */
154 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
155 			UMC_V6_1_CE_CNT_INIT);
156 }
157 
158 static void umc_v6_1_clear_error_count(struct amdgpu_device *adev)
159 {
160 	uint32_t umc_inst        = 0;
161 	uint32_t ch_inst         = 0;
162 	uint32_t umc_reg_offset  = 0;
163 	uint32_t rsmu_umc_index_state =
164 				umc_v6_1_get_umc_index_mode_state(adev);
165 
166 	if (rsmu_umc_index_state)
167 		umc_v6_1_disable_umc_index_mode(adev);
168 
169 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
170 		umc_reg_offset = get_umc_6_reg_offset(adev,
171 						umc_inst,
172 						ch_inst);
173 
174 		umc_v6_1_clear_error_count_per_channel(adev,
175 						umc_reg_offset);
176 	}
177 
178 	if (rsmu_umc_index_state)
179 		umc_v6_1_enable_umc_index_mode(adev);
180 }
181 
182 static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
183 						   uint32_t umc_reg_offset,
184 						   unsigned long *error_count)
185 {
186 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
187 	uint32_t ecc_err_cnt, ecc_err_cnt_addr;
188 	uint64_t mc_umc_status;
189 	uint32_t mc_umc_status_addr;
190 
191 	if (adev->asic_type == CHIP_ARCTURUS) {
192 		/* UMC 6_1_2 registers */
193 		ecc_err_cnt_sel_addr =
194 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
195 		ecc_err_cnt_addr =
196 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
197 		mc_umc_status_addr =
198 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
199 	} else {
200 		/* UMC 6_1_1 registers */
201 		ecc_err_cnt_sel_addr =
202 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
203 		ecc_err_cnt_addr =
204 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
205 		mc_umc_status_addr =
206 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
207 	}
208 
209 	/* select the lower chip and check the error count */
210 	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
211 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
212 					EccErrCntCsSel, 0);
213 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
214 
215 	ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
216 	*error_count +=
217 		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
218 		 UMC_V6_1_CE_CNT_INIT);
219 
220 	/* select the higher chip and check the err counter */
221 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
222 					EccErrCntCsSel, 1);
223 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
224 
225 	ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
226 	*error_count +=
227 		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
228 		 UMC_V6_1_CE_CNT_INIT);
229 
230 	/* check for SRAM correctable error
231 	  MCUMC_STATUS is a 64 bit register */
232 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
233 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
234 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
235 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
236 		*error_count += 1;
237 }
238 
239 static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
240 						      uint32_t umc_reg_offset,
241 						      unsigned long *error_count)
242 {
243 	uint64_t mc_umc_status;
244 	uint32_t mc_umc_status_addr;
245 
246 	if (adev->asic_type == CHIP_ARCTURUS) {
247 		/* UMC 6_1_2 registers */
248 		mc_umc_status_addr =
249 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
250 	} else {
251 		/* UMC 6_1_1 registers */
252 		mc_umc_status_addr =
253 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
254 	}
255 
256 	/* check the MCUMC_STATUS */
257 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
258 	if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
259 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
260 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
261 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
262 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
263 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
264 		*error_count += 1;
265 }
266 
267 static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
268 					   void *ras_error_status)
269 {
270 	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
271 
272 	uint32_t umc_inst        = 0;
273 	uint32_t ch_inst         = 0;
274 	uint32_t umc_reg_offset  = 0;
275 
276 	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
277 
278 	if (rsmu_umc_index_state)
279 		umc_v6_1_disable_umc_index_mode(adev);
280 
281 	if ((adev->asic_type == CHIP_ARCTURUS) &&
282 		amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
283 		DRM_WARN("Fail to disable DF-Cstate.\n");
284 
285 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
286 		umc_reg_offset = get_umc_6_reg_offset(adev,
287 						      umc_inst,
288 						      ch_inst);
289 
290 		umc_v6_1_query_correctable_error_count(adev,
291 						       umc_reg_offset,
292 						       &(err_data->ce_count));
293 		umc_v6_1_querry_uncorrectable_error_count(adev,
294 							  umc_reg_offset,
295 							  &(err_data->ue_count));
296 	}
297 
298 	if ((adev->asic_type == CHIP_ARCTURUS) &&
299 		amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
300 		DRM_WARN("Fail to enable DF-Cstate\n");
301 
302 	if (rsmu_umc_index_state)
303 		umc_v6_1_enable_umc_index_mode(adev);
304 
305 	umc_v6_1_clear_error_count(adev);
306 }
307 
308 static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
309 					 struct ras_err_data *err_data,
310 					 uint32_t umc_reg_offset,
311 					 uint32_t ch_inst,
312 					 uint32_t umc_inst)
313 {
314 	uint32_t lsb, mc_umc_status_addr;
315 	uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
316 	struct eeprom_table_record *err_rec;
317 	uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
318 
319 	if (adev->asic_type == CHIP_ARCTURUS) {
320 		/* UMC 6_1_2 registers */
321 		mc_umc_status_addr =
322 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
323 		mc_umc_addrt0 =
324 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
325 	} else {
326 		/* UMC 6_1_1 registers */
327 		mc_umc_status_addr =
328 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
329 		mc_umc_addrt0 =
330 			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
331 	}
332 
333 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
334 
335 	if (mc_umc_status == 0)
336 		return;
337 
338 	if (!err_data->err_addr) {
339 		/* clear umc status */
340 		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
341 		return;
342 	}
343 
344 	err_rec = &err_data->err_addr[err_data->err_addr_cnt];
345 
346 	/* calculate error address if ue/ce error is detected */
347 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
348 	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
349 	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
350 
351 		err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
352 		/* the lowest lsb bits should be ignored */
353 		lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
354 		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
355 		err_addr &= ~((0x1ULL << lsb) - 1);
356 
357 		/* translate umc channel address to soc pa, 3 parts are included */
358 		retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
359 				ADDR_OF_256B_BLOCK(channel_index) |
360 				OFFSET_IN_256B_BLOCK(err_addr);
361 
362 		/* we only save ue error information currently, ce is skipped */
363 		if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
364 				== 1) {
365 			err_rec->address = err_addr;
366 			/* page frame address is saved */
367 			err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
368 			err_rec->ts = (uint64_t)ktime_get_real_seconds();
369 			err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
370 			err_rec->cu = 0;
371 			err_rec->mem_channel = channel_index;
372 			err_rec->mcumc_id = umc_inst;
373 
374 			err_data->err_addr_cnt++;
375 		}
376 	}
377 
378 	/* clear umc status */
379 	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
380 }
381 
382 static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
383 					     void *ras_error_status)
384 {
385 	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
386 
387 	uint32_t umc_inst        = 0;
388 	uint32_t ch_inst         = 0;
389 	uint32_t umc_reg_offset  = 0;
390 
391 	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
392 
393 	if (rsmu_umc_index_state)
394 		umc_v6_1_disable_umc_index_mode(adev);
395 
396 	if ((adev->asic_type == CHIP_ARCTURUS) &&
397 		amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
398 		DRM_WARN("Fail to disable DF-Cstate.\n");
399 
400 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
401 		umc_reg_offset = get_umc_6_reg_offset(adev,
402 						      umc_inst,
403 						      ch_inst);
404 
405 		umc_v6_1_query_error_address(adev,
406 					     err_data,
407 					     umc_reg_offset,
408 					     ch_inst,
409 					     umc_inst);
410 	}
411 
412 	if ((adev->asic_type == CHIP_ARCTURUS) &&
413 		amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
414 		DRM_WARN("Fail to enable DF-Cstate\n");
415 
416 	if (rsmu_umc_index_state)
417 		umc_v6_1_enable_umc_index_mode(adev);
418 }
419 
420 static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
421 					      uint32_t umc_reg_offset)
422 {
423 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
424 	uint32_t ecc_err_cnt_addr;
425 
426 	if (adev->asic_type == CHIP_ARCTURUS) {
427 		/* UMC 6_1_2 registers */
428 		ecc_err_cnt_sel_addr =
429 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
430 		ecc_err_cnt_addr =
431 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
432 	} else {
433 		/* UMC 6_1_1 registers */
434 		ecc_err_cnt_sel_addr =
435 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
436 		ecc_err_cnt_addr =
437 			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
438 	}
439 
440 	/* select the lower chip and check the error count */
441 	ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
442 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
443 					EccErrCntCsSel, 0);
444 	/* set ce error interrupt type to APIC based interrupt */
445 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
446 					EccErrInt, 0x1);
447 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
448 	/* set error count to initial value */
449 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
450 
451 	/* select the higher chip and check the err counter */
452 	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
453 					EccErrCntCsSel, 1);
454 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
455 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
456 }
457 
458 static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
459 {
460 	uint32_t umc_inst        = 0;
461 	uint32_t ch_inst         = 0;
462 	uint32_t umc_reg_offset  = 0;
463 
464 	uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
465 
466 	if (rsmu_umc_index_state)
467 		umc_v6_1_disable_umc_index_mode(adev);
468 
469 	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
470 		umc_reg_offset = get_umc_6_reg_offset(adev,
471 						      umc_inst,
472 						      ch_inst);
473 
474 		umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
475 	}
476 
477 	if (rsmu_umc_index_state)
478 		umc_v6_1_enable_umc_index_mode(adev);
479 }
480 
481 const struct amdgpu_umc_funcs umc_v6_1_funcs = {
482 	.err_cnt_init = umc_v6_1_err_cnt_init,
483 	.ras_late_init = amdgpu_umc_ras_late_init,
484 	.query_ras_error_count = umc_v6_1_query_ras_error_count,
485 	.query_ras_error_address = umc_v6_1_query_ras_error_address,
486 };
487