13907c492SJohn Clements /*
23907c492SJohn Clements * Copyright 2021 Advanced Micro Devices, Inc.
33907c492SJohn Clements *
43907c492SJohn Clements * Permission is hereby granted, free of charge, to any person obtaining a
53907c492SJohn Clements * copy of this software and associated documentation files (the "Software"),
63907c492SJohn Clements * to deal in the Software without restriction, including without limitation
73907c492SJohn Clements * the rights to use, copy, modify, merge, publish, distribute, sublicense,
83907c492SJohn Clements * and/or sell copies of the Software, and to permit persons to whom the
93907c492SJohn Clements * Software is furnished to do so, subject to the following conditions:
103907c492SJohn Clements *
113907c492SJohn Clements * The above copyright notice and this permission notice shall be included in
123907c492SJohn Clements * all copies or substantial portions of the Software.
133907c492SJohn Clements *
143907c492SJohn Clements * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153907c492SJohn Clements * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163907c492SJohn Clements * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
173907c492SJohn Clements * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
183907c492SJohn Clements * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
193907c492SJohn Clements * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
203907c492SJohn Clements * OTHER DEALINGS IN THE SOFTWARE.
213907c492SJohn Clements *
223907c492SJohn Clements */
233907c492SJohn Clements #include "amdgpu_ras.h"
243907c492SJohn Clements #include "amdgpu.h"
253907c492SJohn Clements #include "amdgpu_mca.h"
263907c492SJohn Clements
273907c492SJohn Clements #include "umc/umc_6_7_0_offset.h"
283907c492SJohn Clements #include "umc/umc_6_7_0_sh_mask.h"
293907c492SJohn Clements
amdgpu_mca_query_correctable_error_count(struct amdgpu_device * adev,uint64_t mc_status_addr,unsigned long * error_count)303907c492SJohn Clements void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
313907c492SJohn Clements uint64_t mc_status_addr,
323907c492SJohn Clements unsigned long *error_count)
333907c492SJohn Clements {
34640ae42eSJohn Clements uint64_t mc_status = RREG64_PCIE(mc_status_addr);
353907c492SJohn Clements
363907c492SJohn Clements if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
373907c492SJohn Clements REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
383907c492SJohn Clements *error_count += 1;
393907c492SJohn Clements }
403907c492SJohn Clements
amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device * adev,uint64_t mc_status_addr,unsigned long * error_count)413907c492SJohn Clements void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
423907c492SJohn Clements uint64_t mc_status_addr,
433907c492SJohn Clements unsigned long *error_count)
443907c492SJohn Clements {
45640ae42eSJohn Clements uint64_t mc_status = RREG64_PCIE(mc_status_addr);
463907c492SJohn Clements
473907c492SJohn Clements if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
483907c492SJohn Clements (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
493907c492SJohn Clements REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
503907c492SJohn Clements REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
513907c492SJohn Clements REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
523907c492SJohn Clements REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
533907c492SJohn Clements *error_count += 1;
543907c492SJohn Clements }
553907c492SJohn Clements
amdgpu_mca_reset_error_count(struct amdgpu_device * adev,uint64_t mc_status_addr)563907c492SJohn Clements void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
573907c492SJohn Clements uint64_t mc_status_addr)
583907c492SJohn Clements {
59640ae42eSJohn Clements WREG64_PCIE(mc_status_addr, 0x0ULL);
603907c492SJohn Clements }
613907c492SJohn Clements
amdgpu_mca_query_ras_error_count(struct amdgpu_device * adev,uint64_t mc_status_addr,void * ras_error_status)623907c492SJohn Clements void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
633907c492SJohn Clements uint64_t mc_status_addr,
643907c492SJohn Clements void *ras_error_status)
653907c492SJohn Clements {
663907c492SJohn Clements struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
673907c492SJohn Clements
683907c492SJohn Clements amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count));
693907c492SJohn Clements amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count));
703907c492SJohn Clements
713907c492SJohn Clements amdgpu_mca_reset_error_count(adev, mc_status_addr);
723907c492SJohn Clements }
73*7f544c54SHawking Zhang
amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device * adev)74*7f544c54SHawking Zhang int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev)
75*7f544c54SHawking Zhang {
76*7f544c54SHawking Zhang int err;
77*7f544c54SHawking Zhang struct amdgpu_mca_ras_block *ras;
78*7f544c54SHawking Zhang
79*7f544c54SHawking Zhang if (!adev->mca.mp0.ras)
80*7f544c54SHawking Zhang return 0;
81*7f544c54SHawking Zhang
82*7f544c54SHawking Zhang ras = adev->mca.mp0.ras;
83*7f544c54SHawking Zhang
84*7f544c54SHawking Zhang err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
85*7f544c54SHawking Zhang if (err) {
86*7f544c54SHawking Zhang dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n");
87*7f544c54SHawking Zhang return err;
88*7f544c54SHawking Zhang }
89*7f544c54SHawking Zhang
90*7f544c54SHawking Zhang strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
91*7f544c54SHawking Zhang ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
92*7f544c54SHawking Zhang ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
93*7f544c54SHawking Zhang adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
94*7f544c54SHawking Zhang
95*7f544c54SHawking Zhang return 0;
96*7f544c54SHawking Zhang }
97*7f544c54SHawking Zhang
amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device * adev)98*7f544c54SHawking Zhang int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev)
99*7f544c54SHawking Zhang {
100*7f544c54SHawking Zhang int err;
101*7f544c54SHawking Zhang struct amdgpu_mca_ras_block *ras;
102*7f544c54SHawking Zhang
103*7f544c54SHawking Zhang if (!adev->mca.mp1.ras)
104*7f544c54SHawking Zhang return 0;
105*7f544c54SHawking Zhang
106*7f544c54SHawking Zhang ras = adev->mca.mp1.ras;
107*7f544c54SHawking Zhang
108*7f544c54SHawking Zhang err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
109*7f544c54SHawking Zhang if (err) {
110*7f544c54SHawking Zhang dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n");
111*7f544c54SHawking Zhang return err;
112*7f544c54SHawking Zhang }
113*7f544c54SHawking Zhang
114*7f544c54SHawking Zhang strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
115*7f544c54SHawking Zhang ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
116*7f544c54SHawking Zhang ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
117*7f544c54SHawking Zhang adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
118*7f544c54SHawking Zhang
119*7f544c54SHawking Zhang return 0;
120*7f544c54SHawking Zhang }
121*7f544c54SHawking Zhang
amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device * adev)122*7f544c54SHawking Zhang int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
123*7f544c54SHawking Zhang {
124*7f544c54SHawking Zhang int err;
125*7f544c54SHawking Zhang struct amdgpu_mca_ras_block *ras;
126*7f544c54SHawking Zhang
127*7f544c54SHawking Zhang if (!adev->mca.mpio.ras)
128*7f544c54SHawking Zhang return 0;
129*7f544c54SHawking Zhang
130*7f544c54SHawking Zhang ras = adev->mca.mpio.ras;
131*7f544c54SHawking Zhang
132*7f544c54SHawking Zhang err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
133*7f544c54SHawking Zhang if (err) {
134*7f544c54SHawking Zhang dev_err(adev->dev, "Failed to register mca.mpio ras block!\n");
135*7f544c54SHawking Zhang return err;
136*7f544c54SHawking Zhang }
137*7f544c54SHawking Zhang
138*7f544c54SHawking Zhang strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
139*7f544c54SHawking Zhang ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
140*7f544c54SHawking Zhang ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
141*7f544c54SHawking Zhang adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
142*7f544c54SHawking Zhang
143*7f544c54SHawking Zhang return 0;
144*7f544c54SHawking Zhang }
145