19884c2b1SHawking Zhang /*
29884c2b1SHawking Zhang * Copyright 2019 Advanced Micro Devices, Inc.
39884c2b1SHawking Zhang *
49884c2b1SHawking Zhang * Permission is hereby granted, free of charge, to any person obtaining a
59884c2b1SHawking Zhang * copy of this software and associated documentation files (the "Software"),
69884c2b1SHawking Zhang * to deal in the Software without restriction, including without limitation
79884c2b1SHawking Zhang * the rights to use, copy, modify, merge, publish, distribute, sublicense,
89884c2b1SHawking Zhang * and/or sell copies of the Software, and to permit persons to whom the
99884c2b1SHawking Zhang * Software is furnished to do so, subject to the following conditions:
109884c2b1SHawking Zhang *
119884c2b1SHawking Zhang * The above copyright notice and this permission notice shall be included in
129884c2b1SHawking Zhang * all copies or substantial portions of the Software.
139884c2b1SHawking Zhang *
149884c2b1SHawking Zhang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
159884c2b1SHawking Zhang * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
169884c2b1SHawking Zhang * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
179884c2b1SHawking Zhang * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
189884c2b1SHawking Zhang * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
199884c2b1SHawking Zhang * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
209884c2b1SHawking Zhang * OTHER DEALINGS IN THE SOFTWARE.
219884c2b1SHawking Zhang *
229884c2b1SHawking Zhang */
239884c2b1SHawking Zhang #include "umc_v6_1.h"
249884c2b1SHawking Zhang #include "amdgpu_ras.h"
2549070c4eSHawking Zhang #include "amdgpu_umc.h"
269884c2b1SHawking Zhang #include "amdgpu.h"
279884c2b1SHawking Zhang
289884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_offset.h"
299884c2b1SHawking Zhang #include "rsmu/rsmu_0_0_2_sh_mask.h"
309884c2b1SHawking Zhang #include "umc/umc_6_1_1_offset.h"
319884c2b1SHawking Zhang #include "umc/umc_6_1_1_sh_mask.h"
32fb71a336SGuchun Chen #include "umc/umc_6_1_2_offset.h"
339884c2b1SHawking Zhang
34bd68fb94SJohn Clements #define UMC_6_INST_DIST 0x40000
35bd68fb94SJohn Clements
363aacf4eaSTao Zhou const uint32_t
37c2742aefSTao Zhou umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
38c2742aefSTao Zhou {2, 18, 11, 27}, {4, 20, 13, 29},
39c2742aefSTao Zhou {1, 17, 8, 24}, {7, 23, 14, 30},
40c2742aefSTao Zhou {10, 26, 3, 19}, {12, 28, 5, 21},
41c2742aefSTao Zhou {9, 25, 0, 16}, {15, 31, 6, 22}
42c2742aefSTao Zhou };
43c2742aefSTao Zhou
umc_v6_1_enable_umc_index_mode(struct amdgpu_device * adev)44eee2eabaSJohn Clements static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
45eee2eabaSJohn Clements {
4640e73314SGuchun Chen uint32_t rsmu_umc_addr, rsmu_umc_val;
4740e73314SGuchun Chen
4840e73314SGuchun Chen rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
4940e73314SGuchun Chen mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
5040e73314SGuchun Chen rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
5140e73314SGuchun Chen
5240e73314SGuchun Chen rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
5340e73314SGuchun Chen RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
54eee2eabaSJohn Clements RSMU_UMC_INDEX_MODE_EN, 1);
5540e73314SGuchun Chen
5640e73314SGuchun Chen WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
57eee2eabaSJohn Clements }
58eee2eabaSJohn Clements
umc_v6_1_disable_umc_index_mode(struct amdgpu_device * adev)590ee51f1dSJohn Clements static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
600ee51f1dSJohn Clements {
6140e73314SGuchun Chen uint32_t rsmu_umc_addr, rsmu_umc_val;
6240e73314SGuchun Chen
6340e73314SGuchun Chen rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
6440e73314SGuchun Chen mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
6540e73314SGuchun Chen rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
6640e73314SGuchun Chen
6740e73314SGuchun Chen rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val,
6840e73314SGuchun Chen RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
690ee51f1dSJohn Clements RSMU_UMC_INDEX_MODE_EN, 0);
7040e73314SGuchun Chen
7140e73314SGuchun Chen WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val);
720ee51f1dSJohn Clements }
730ee51f1dSJohn Clements
umc_v6_1_get_umc_index_mode_state(struct amdgpu_device * adev)74eee2eabaSJohn Clements static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
75eee2eabaSJohn Clements {
7640e73314SGuchun Chen uint32_t rsmu_umc_addr, rsmu_umc_val;
77eee2eabaSJohn Clements
7840e73314SGuchun Chen rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0,
79eee2eabaSJohn Clements mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
8040e73314SGuchun Chen rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4);
81eee2eabaSJohn Clements
8240e73314SGuchun Chen return REG_GET_FIELD(rsmu_umc_val,
83eee2eabaSJohn Clements RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
84eee2eabaSJohn Clements RSMU_UMC_INDEX_MODE_EN);
85eee2eabaSJohn Clements }
86eee2eabaSJohn Clements
get_umc_6_reg_offset(struct amdgpu_device * adev,uint32_t umc_inst,uint32_t ch_inst)87bd68fb94SJohn Clements static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
88bd68fb94SJohn Clements uint32_t umc_inst,
89bd68fb94SJohn Clements uint32_t ch_inst)
909884c2b1SHawking Zhang {
91bd68fb94SJohn Clements return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
9287d2b92fSTao Zhou }
9387d2b92fSTao Zhou
umc_v6_1_clear_error_count_per_channel(struct amdgpu_device * adev,uint32_t umc_reg_offset)94fd90456cSGuchun Chen static void umc_v6_1_clear_error_count_per_channel(struct amdgpu_device *adev,
95fd90456cSGuchun Chen uint32_t umc_reg_offset)
96fd90456cSGuchun Chen {
97fd90456cSGuchun Chen uint32_t ecc_err_cnt_addr;
98fd90456cSGuchun Chen uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
99fd90456cSGuchun Chen
100fd90456cSGuchun Chen if (adev->asic_type == CHIP_ARCTURUS) {
101fd90456cSGuchun Chen /* UMC 6_1_2 registers */
102fd90456cSGuchun Chen ecc_err_cnt_sel_addr =
103fd90456cSGuchun Chen SOC15_REG_OFFSET(UMC, 0,
104fd90456cSGuchun Chen mmUMCCH0_0_EccErrCntSel_ARCT);
105fd90456cSGuchun Chen ecc_err_cnt_addr =
106fd90456cSGuchun Chen SOC15_REG_OFFSET(UMC, 0,
107fd90456cSGuchun Chen mmUMCCH0_0_EccErrCnt_ARCT);
108fd90456cSGuchun Chen } else {
109fd90456cSGuchun Chen /* UMC 6_1_1 registers */
110fd90456cSGuchun Chen ecc_err_cnt_sel_addr =
111fd90456cSGuchun Chen SOC15_REG_OFFSET(UMC, 0,
112fd90456cSGuchun Chen mmUMCCH0_0_EccErrCntSel);
113fd90456cSGuchun Chen ecc_err_cnt_addr =
114fd90456cSGuchun Chen SOC15_REG_OFFSET(UMC, 0,
115fd90456cSGuchun Chen mmUMCCH0_0_EccErrCnt);
116fd90456cSGuchun Chen }
117fd90456cSGuchun Chen
118fd90456cSGuchun Chen /* select the lower chip */
119fd90456cSGuchun Chen ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
120fd90456cSGuchun Chen umc_reg_offset) * 4);
121fd90456cSGuchun Chen ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
122fd90456cSGuchun Chen UMCCH0_0_EccErrCntSel,
123fd90456cSGuchun Chen EccErrCntCsSel, 0);
124fd90456cSGuchun Chen WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
125fd90456cSGuchun Chen ecc_err_cnt_sel);
126fd90456cSGuchun Chen
127fd90456cSGuchun Chen /* clear lower chip error count */
128fd90456cSGuchun Chen WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
129fd90456cSGuchun Chen UMC_V6_1_CE_CNT_INIT);
130fd90456cSGuchun Chen
131fd90456cSGuchun Chen /* select the higher chip */
132fd90456cSGuchun Chen ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
133fd90456cSGuchun Chen umc_reg_offset) * 4);
134fd90456cSGuchun Chen ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
135fd90456cSGuchun Chen UMCCH0_0_EccErrCntSel,
136fd90456cSGuchun Chen EccErrCntCsSel, 1);
137fd90456cSGuchun Chen WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
138fd90456cSGuchun Chen ecc_err_cnt_sel);
139fd90456cSGuchun Chen
140fd90456cSGuchun Chen /* clear higher chip error count */
141fd90456cSGuchun Chen WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
142fd90456cSGuchun Chen UMC_V6_1_CE_CNT_INIT);
143fd90456cSGuchun Chen }
144fd90456cSGuchun Chen
umc_v6_1_clear_error_count(struct amdgpu_device * adev)145fd90456cSGuchun Chen static void umc_v6_1_clear_error_count(struct amdgpu_device *adev)
146fd90456cSGuchun Chen {
147fd90456cSGuchun Chen uint32_t umc_inst = 0;
148fd90456cSGuchun Chen uint32_t ch_inst = 0;
149fd90456cSGuchun Chen uint32_t umc_reg_offset = 0;
150fd90456cSGuchun Chen uint32_t rsmu_umc_index_state =
151fd90456cSGuchun Chen umc_v6_1_get_umc_index_mode_state(adev);
152fd90456cSGuchun Chen
153fd90456cSGuchun Chen if (rsmu_umc_index_state)
154fd90456cSGuchun Chen umc_v6_1_disable_umc_index_mode(adev);
155fd90456cSGuchun Chen
156fd90456cSGuchun Chen LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
157fd90456cSGuchun Chen umc_reg_offset = get_umc_6_reg_offset(adev,
158fd90456cSGuchun Chen umc_inst,
159fd90456cSGuchun Chen ch_inst);
160fd90456cSGuchun Chen
161fd90456cSGuchun Chen umc_v6_1_clear_error_count_per_channel(adev,
162fd90456cSGuchun Chen umc_reg_offset);
163fd90456cSGuchun Chen }
164fd90456cSGuchun Chen
165fd90456cSGuchun Chen if (rsmu_umc_index_state)
166fd90456cSGuchun Chen umc_v6_1_enable_umc_index_mode(adev);
167fd90456cSGuchun Chen }
168fd90456cSGuchun Chen
umc_v6_1_query_correctable_error_count(struct amdgpu_device * adev,uint32_t umc_reg_offset,unsigned long * error_count)1699884c2b1SHawking Zhang static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
1709884c2b1SHawking Zhang uint32_t umc_reg_offset,
1719884c2b1SHawking Zhang unsigned long *error_count)
1729884c2b1SHawking Zhang {
1739884c2b1SHawking Zhang uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
1749884c2b1SHawking Zhang uint32_t ecc_err_cnt, ecc_err_cnt_addr;
1759884c2b1SHawking Zhang uint64_t mc_umc_status;
1769884c2b1SHawking Zhang uint32_t mc_umc_status_addr;
1779884c2b1SHawking Zhang
1784cf781c2SJohn Clements if (adev->asic_type == CHIP_ARCTURUS) {
1794cf781c2SJohn Clements /* UMC 6_1_2 registers */
1804cf781c2SJohn Clements ecc_err_cnt_sel_addr =
1814cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
1824cf781c2SJohn Clements ecc_err_cnt_addr =
1834cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
1844cf781c2SJohn Clements mc_umc_status_addr =
1854cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
1864cf781c2SJohn Clements } else {
1874cf781c2SJohn Clements /* UMC 6_1_1 registers */
1889884c2b1SHawking Zhang ecc_err_cnt_sel_addr =
1899884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
1909884c2b1SHawking Zhang ecc_err_cnt_addr =
1919884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
1929884c2b1SHawking Zhang mc_umc_status_addr =
1939884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
1944cf781c2SJohn Clements }
1959884c2b1SHawking Zhang
1969884c2b1SHawking Zhang /* select the lower chip and check the error count */
1970ee51f1dSJohn Clements ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
1989884c2b1SHawking Zhang ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
1999884c2b1SHawking Zhang EccErrCntCsSel, 0);
2000ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
201fd90456cSGuchun Chen
2020ee51f1dSJohn Clements ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
2039884c2b1SHawking Zhang *error_count +=
204b1a58953STao Zhou (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
205b1a58953STao Zhou UMC_V6_1_CE_CNT_INIT);
2069884c2b1SHawking Zhang
2079884c2b1SHawking Zhang /* select the higher chip and check the err counter */
2089884c2b1SHawking Zhang ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
2099884c2b1SHawking Zhang EccErrCntCsSel, 1);
2100ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
211fd90456cSGuchun Chen
2120ee51f1dSJohn Clements ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
2139884c2b1SHawking Zhang *error_count +=
214b1a58953STao Zhou (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
215b1a58953STao Zhou UMC_V6_1_CE_CNT_INIT);
2169884c2b1SHawking Zhang
2179884c2b1SHawking Zhang /* check for SRAM correctable error
2189884c2b1SHawking Zhang MCUMC_STATUS is a 64 bit register */
219955c7120SJohn Clements mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
2209884c2b1SHawking Zhang if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
2219884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
2229884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
2239884c2b1SHawking Zhang *error_count += 1;
2249884c2b1SHawking Zhang }
2259884c2b1SHawking Zhang
umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device * adev,uint32_t umc_reg_offset,unsigned long * error_count)2269884c2b1SHawking Zhang static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev,
2279884c2b1SHawking Zhang uint32_t umc_reg_offset,
2289884c2b1SHawking Zhang unsigned long *error_count)
2299884c2b1SHawking Zhang {
2309884c2b1SHawking Zhang uint64_t mc_umc_status;
2319884c2b1SHawking Zhang uint32_t mc_umc_status_addr;
2329884c2b1SHawking Zhang
2334cf781c2SJohn Clements if (adev->asic_type == CHIP_ARCTURUS) {
2344cf781c2SJohn Clements /* UMC 6_1_2 registers */
2354cf781c2SJohn Clements mc_umc_status_addr =
2364cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
2374cf781c2SJohn Clements } else {
2384cf781c2SJohn Clements /* UMC 6_1_1 registers */
2399884c2b1SHawking Zhang mc_umc_status_addr =
2409884c2b1SHawking Zhang SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
2414cf781c2SJohn Clements }
2429884c2b1SHawking Zhang
2439884c2b1SHawking Zhang /* check the MCUMC_STATUS */
244955c7120SJohn Clements mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
245f1ed4afaSTao Zhou if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
246f1ed4afaSTao Zhou (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
247f1ed4afaSTao Zhou REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
2489884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
2499884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
2509884c2b1SHawking Zhang REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
2519884c2b1SHawking Zhang *error_count += 1;
2529884c2b1SHawking Zhang }
2539884c2b1SHawking Zhang
umc_v6_1_query_ras_error_count(struct amdgpu_device * adev,void * ras_error_status)2542b671b60STao Zhou static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
2552b671b60STao Zhou void *ras_error_status)
2562b671b60STao Zhou {
257bd68fb94SJohn Clements struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
258bd68fb94SJohn Clements
259bd68fb94SJohn Clements uint32_t umc_inst = 0;
260bd68fb94SJohn Clements uint32_t ch_inst = 0;
261bd68fb94SJohn Clements uint32_t umc_reg_offset = 0;
262bd68fb94SJohn Clements
263eee2eabaSJohn Clements uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
264eee2eabaSJohn Clements
265eee2eabaSJohn Clements if (rsmu_umc_index_state)
266eee2eabaSJohn Clements umc_v6_1_disable_umc_index_mode(adev);
267eee2eabaSJohn Clements
268d38c3ac7SGuchun Chen if ((adev->asic_type == CHIP_ARCTURUS) &&
269d38c3ac7SGuchun Chen amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
270d38c3ac7SGuchun Chen DRM_WARN("Fail to disable DF-Cstate.\n");
271d38c3ac7SGuchun Chen
272c8aa6ae3SJohn Clements LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
273bd68fb94SJohn Clements umc_reg_offset = get_umc_6_reg_offset(adev,
274bd68fb94SJohn Clements umc_inst,
275bd68fb94SJohn Clements ch_inst);
276bd68fb94SJohn Clements
277bd68fb94SJohn Clements umc_v6_1_query_correctable_error_count(adev,
278bd68fb94SJohn Clements umc_reg_offset,
279bd68fb94SJohn Clements &(err_data->ce_count));
280bd68fb94SJohn Clements umc_v6_1_querry_uncorrectable_error_count(adev,
281bd68fb94SJohn Clements umc_reg_offset,
282bd68fb94SJohn Clements &(err_data->ue_count));
283bd68fb94SJohn Clements }
284eee2eabaSJohn Clements
285d38c3ac7SGuchun Chen if ((adev->asic_type == CHIP_ARCTURUS) &&
286d38c3ac7SGuchun Chen amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
287d38c3ac7SGuchun Chen DRM_WARN("Fail to enable DF-Cstate\n");
288d38c3ac7SGuchun Chen
289eee2eabaSJohn Clements if (rsmu_umc_index_state)
290eee2eabaSJohn Clements umc_v6_1_enable_umc_index_mode(adev);
291fd90456cSGuchun Chen
292fd90456cSGuchun Chen umc_v6_1_clear_error_count(adev);
293bd68fb94SJohn Clements }
2949884c2b1SHawking Zhang
umc_v6_1_query_error_address(struct amdgpu_device * adev,struct ras_err_data * err_data,uint32_t umc_reg_offset,uint32_t ch_inst,uint32_t umc_inst)2958c948103STao Zhou static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
2962b671b60STao Zhou struct ras_err_data *err_data,
297bd68fb94SJohn Clements uint32_t umc_reg_offset,
298c8aa6ae3SJohn Clements uint32_t ch_inst,
299bd68fb94SJohn Clements uint32_t umc_inst)
3008c948103STao Zhou {
3012b671b60STao Zhou uint32_t lsb, mc_umc_status_addr;
3025d4667ecSGuchun Chen uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
303c8aa6ae3SJohn Clements uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
3048c948103STao Zhou
3054cf781c2SJohn Clements if (adev->asic_type == CHIP_ARCTURUS) {
3064cf781c2SJohn Clements /* UMC 6_1_2 registers */
3074cf781c2SJohn Clements mc_umc_status_addr =
3084cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
3095d4667ecSGuchun Chen mc_umc_addrt0 =
3105d4667ecSGuchun Chen SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
3114cf781c2SJohn Clements } else {
3124cf781c2SJohn Clements /* UMC 6_1_1 registers */
3138c948103STao Zhou mc_umc_status_addr =
3148c948103STao Zhou SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
3155d4667ecSGuchun Chen mc_umc_addrt0 =
3165d4667ecSGuchun Chen SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
3174cf781c2SJohn Clements }
3182b671b60STao Zhou
3191a2172b5SJohn Clements mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
3201a2172b5SJohn Clements
3211a2172b5SJohn Clements if (mc_umc_status == 0)
3221a2172b5SJohn Clements return;
3231a2172b5SJohn Clements
3242b671b60STao Zhou if (!err_data->err_addr) {
3252b671b60STao Zhou /* clear umc status */
326955c7120SJohn Clements WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
3272b671b60STao Zhou return;
3282b671b60STao Zhou }
3292b671b60STao Zhou
330*cdbb816bSTao Zhou /* calculate error address if ue error is detected */
3318c948103STao Zhou if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
332*cdbb816bSTao Zhou REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
3338c948103STao Zhou
334eee2eabaSJohn Clements err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
3358c948103STao Zhou /* the lowest lsb bits should be ignored */
3368c948103STao Zhou lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
3378c948103STao Zhou err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
3388c948103STao Zhou err_addr &= ~((0x1ULL << lsb) - 1);
3398c948103STao Zhou
3408c948103STao Zhou /* translate umc channel address to soc pa, 3 parts are included */
34187d2b92fSTao Zhou retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
3422b671b60STao Zhou ADDR_OF_256B_BLOCK(channel_index) |
3432b671b60STao Zhou OFFSET_IN_256B_BLOCK(err_addr);
3448c948103STao Zhou
345400013b2STao Zhou amdgpu_umc_fill_error_record(err_data, err_addr,
346400013b2STao Zhou retired_page, channel_index, umc_inst);
34787d2b92fSTao Zhou }
3482b671b60STao Zhou
3492b671b60STao Zhou /* clear umc status */
350955c7120SJohn Clements WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
3518c948103STao Zhou }
3528c948103STao Zhou
umc_v6_1_query_ras_error_address(struct amdgpu_device * adev,void * ras_error_status)3538c948103STao Zhou static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
3548c948103STao Zhou void *ras_error_status)
3558c948103STao Zhou {
356bd68fb94SJohn Clements struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
357bd68fb94SJohn Clements
358bd68fb94SJohn Clements uint32_t umc_inst = 0;
359bd68fb94SJohn Clements uint32_t ch_inst = 0;
360bd68fb94SJohn Clements uint32_t umc_reg_offset = 0;
361bd68fb94SJohn Clements
362eee2eabaSJohn Clements uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
363eee2eabaSJohn Clements
364eee2eabaSJohn Clements if (rsmu_umc_index_state)
365eee2eabaSJohn Clements umc_v6_1_disable_umc_index_mode(adev);
366eee2eabaSJohn Clements
367d38c3ac7SGuchun Chen if ((adev->asic_type == CHIP_ARCTURUS) &&
368d38c3ac7SGuchun Chen amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
369d38c3ac7SGuchun Chen DRM_WARN("Fail to disable DF-Cstate.\n");
370d38c3ac7SGuchun Chen
371c8aa6ae3SJohn Clements LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
372bd68fb94SJohn Clements umc_reg_offset = get_umc_6_reg_offset(adev,
373bd68fb94SJohn Clements umc_inst,
374bd68fb94SJohn Clements ch_inst);
375bd68fb94SJohn Clements
376bd68fb94SJohn Clements umc_v6_1_query_error_address(adev,
377bd68fb94SJohn Clements err_data,
378bd68fb94SJohn Clements umc_reg_offset,
379bd68fb94SJohn Clements ch_inst,
380bd68fb94SJohn Clements umc_inst);
381bd68fb94SJohn Clements }
382bd68fb94SJohn Clements
383d38c3ac7SGuchun Chen if ((adev->asic_type == CHIP_ARCTURUS) &&
384d38c3ac7SGuchun Chen amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
385d38c3ac7SGuchun Chen DRM_WARN("Fail to enable DF-Cstate\n");
386d38c3ac7SGuchun Chen
387eee2eabaSJohn Clements if (rsmu_umc_index_state)
388eee2eabaSJohn Clements umc_v6_1_enable_umc_index_mode(adev);
3898c948103STao Zhou }
3908c948103STao Zhou
umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device * adev,uint32_t umc_reg_offset)391d99659a0STao Zhou static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
392bd68fb94SJohn Clements uint32_t umc_reg_offset)
393b7f92097STao Zhou {
394b7f92097STao Zhou uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
395b7f92097STao Zhou uint32_t ecc_err_cnt_addr;
396b7f92097STao Zhou
3974cf781c2SJohn Clements if (adev->asic_type == CHIP_ARCTURUS) {
3984cf781c2SJohn Clements /* UMC 6_1_2 registers */
3994cf781c2SJohn Clements ecc_err_cnt_sel_addr =
4004cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
4014cf781c2SJohn Clements ecc_err_cnt_addr =
4024cf781c2SJohn Clements SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
4034cf781c2SJohn Clements } else {
4044cf781c2SJohn Clements /* UMC 6_1_1 registers */
405b7f92097STao Zhou ecc_err_cnt_sel_addr =
406b7f92097STao Zhou SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
407b7f92097STao Zhou ecc_err_cnt_addr =
408b7f92097STao Zhou SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
4094cf781c2SJohn Clements }
410b7f92097STao Zhou
411b7f92097STao Zhou /* select the lower chip and check the error count */
4120ee51f1dSJohn Clements ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
413b7f92097STao Zhou ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
414b7f92097STao Zhou EccErrCntCsSel, 0);
415b7f92097STao Zhou /* set ce error interrupt type to APIC based interrupt */
416b7f92097STao Zhou ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
417b7f92097STao Zhou EccErrInt, 0x1);
4180ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
419b7f92097STao Zhou /* set error count to initial value */
4200ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
421b7f92097STao Zhou
422b7f92097STao Zhou /* select the higher chip and check the err counter */
423b7f92097STao Zhou ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
424b7f92097STao Zhou EccErrCntCsSel, 1);
4250ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
4260ee51f1dSJohn Clements WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
427b7f92097STao Zhou }
428b7f92097STao Zhou
umc_v6_1_err_cnt_init(struct amdgpu_device * adev)429d99659a0STao Zhou static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
4303aacf4eaSTao Zhou {
431bd68fb94SJohn Clements uint32_t umc_inst = 0;
432bd68fb94SJohn Clements uint32_t ch_inst = 0;
433bd68fb94SJohn Clements uint32_t umc_reg_offset = 0;
4343aacf4eaSTao Zhou
435eee2eabaSJohn Clements uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
436eee2eabaSJohn Clements
437eee2eabaSJohn Clements if (rsmu_umc_index_state)
4380ee51f1dSJohn Clements umc_v6_1_disable_umc_index_mode(adev);
4390ee51f1dSJohn Clements
440c8aa6ae3SJohn Clements LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
441bd68fb94SJohn Clements umc_reg_offset = get_umc_6_reg_offset(adev,
442bd68fb94SJohn Clements umc_inst,
443bd68fb94SJohn Clements ch_inst);
444bd68fb94SJohn Clements
445bd68fb94SJohn Clements umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
446bd68fb94SJohn Clements }
447eee2eabaSJohn Clements
448eee2eabaSJohn Clements if (rsmu_umc_index_state)
449eee2eabaSJohn Clements umc_v6_1_enable_umc_index_mode(adev);
450bd68fb94SJohn Clements }
4513aacf4eaSTao Zhou
452efe17d5aSyipechai const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = {
4539884c2b1SHawking Zhang .query_ras_error_count = umc_v6_1_query_ras_error_count,
4548c948103STao Zhou .query_ras_error_address = umc_v6_1_query_ras_error_address,
4559884c2b1SHawking Zhang };
456efe17d5aSyipechai
457efe17d5aSyipechai struct amdgpu_umc_ras umc_v6_1_ras = {
458efe17d5aSyipechai .ras_block = {
459efe17d5aSyipechai .hw_ops = &umc_v6_1_ras_hw_ops,
460efe17d5aSyipechai },
461efe17d5aSyipechai .err_cnt_init = umc_v6_1_err_cnt_init,
462efe17d5aSyipechai };