1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "soc15.h" 28 #include "soc15_common.h" 29 #include "vega10_enum.h" 30 31 #include "gc/gc_9_4_3_offset.h" 32 #include "gc/gc_9_4_3_sh_mask.h" 33 34 #include "gfx_v9_4_3.h" 35 36 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 37 38 static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) 39 { 40 uint64_t clock; 41 42 amdgpu_gfx_off_ctrl(adev, false); 43 mutex_lock(&adev->gfx.gpu_clock_mutex); 44 WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 45 clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) | 46 ((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 47 mutex_unlock(&adev->gfx.gpu_clock_mutex); 48 amdgpu_gfx_off_ctrl(adev, true); 49 50 return clock; 51 } 52 53 static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, 54 u32 se_num, 55 u32 sh_num, 56 u32 instance) 57 { 58 u32 data; 59 60 if (instance == 0xffffffff) 61 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 62 INSTANCE_BROADCAST_WRITES, 1); 63 else 64 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 65 INSTANCE_INDEX, instance); 66 67 if (se_num == 0xffffffff) 68 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 69 SE_BROADCAST_WRITES, 1); 70 else 71 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 72 73 if (sh_num == 0xffffffff) 74 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 75 SH_BROADCAST_WRITES, 1); 76 else 77 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 78 79 WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); 80 } 81 82 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 83 { 84 WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, 85 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 86 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 87 (address << SQ_IND_INDEX__INDEX__SHIFT) | 88 (SQ_IND_INDEX__FORCE_READ_MASK)); 89 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 90 } 91 92 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 93 uint32_t wave, uint32_t thread, 94 uint32_t regno, uint32_t num, uint32_t *out) 95 { 96 WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, 97 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 98 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 99 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 100 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 101 (SQ_IND_INDEX__FORCE_READ_MASK) | 102 (SQ_IND_INDEX__AUTO_INCR_MASK)); 103 while (num--) 104 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 105 } 106 107 static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, 108 uint32_t simd, uint32_t wave, 109 uint32_t *dst, int *no_fields) 110 { 111 /* type 1 wave data */ 112 dst[(*no_fields)++] = 1; 113 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 114 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 115 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 116 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 117 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 118 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 119 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 120 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 121 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 122 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 123 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 124 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 125 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 126 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 127 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 128 } 129 130 static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 131 uint32_t wave, uint32_t start, 132 uint32_t size, uint32_t *dst) 133 { 134 wave_read_regs(adev, simd, wave, 0, 135 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 136 } 137 138 static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 139 uint32_t wave, uint32_t thread, 140 uint32_t start, uint32_t size, 141 uint32_t *dst) 142 { 143 wave_read_regs(adev, simd, wave, thread, 144 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 145 } 146 147 static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, 148 u32 me, u32 pipe, u32 q, u32 vm) 149 { 150 soc15_grbm_select(adev, me, pipe, q, vm); 151 } 152 153 static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) 154 { 155 uint32_t rlc_setting; 156 157 /* if RLC is not enabled, do nothing */ 158 rlc_setting = RREG32_SOC15(GC, 0, regRLC_CNTL); 159 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 160 return false; 161 162 return true; 163 } 164 165 static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev) 166 { 167 uint32_t data; 168 unsigned i; 169 170 data = RLC_SAFE_MODE__CMD_MASK; 171 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 172 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 173 174 /* wait for RLC_SAFE_MODE */ 175 for (i = 0; i < adev->usec_timeout; i++) { 176 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 177 break; 178 udelay(1); 179 } 180 } 181 182 static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev) 183 { 184 uint32_t data; 185 186 data = RLC_SAFE_MODE__CMD_MASK; 187 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 188 } 189 190 static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) 191 { 192 /* init spm vmid with 0xf */ 193 if (adev->gfx.rlc.funcs->update_spm_vmid) 194 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 195 196 return 0; 197 } 198 199 static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) 200 { 201 u32 i, j, k; 202 u32 mask; 203 204 mutex_lock(&adev->grbm_idx_mutex); 205 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 206 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 207 gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); 208 for (k = 0; k < adev->usec_timeout; k++) { 209 if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0) 210 break; 211 udelay(1); 212 } 213 if (k == adev->usec_timeout) { 214 gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 215 0xffffffff, 0xffffffff); 216 mutex_unlock(&adev->grbm_idx_mutex); 217 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 218 i, j); 219 return; 220 } 221 } 222 } 223 gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 224 mutex_unlock(&adev->grbm_idx_mutex); 225 226 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 227 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 228 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 229 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 230 for (k = 0; k < adev->usec_timeout; k++) { 231 if ((RREG32_SOC15(GC, 0, regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 232 break; 233 udelay(1); 234 } 235 } 236 237 static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, 238 bool enable) 239 { 240 u32 tmp; 241 242 /* These interrupts should be enabled to drive DS clock */ 243 244 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); 245 246 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 247 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 248 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 249 if (adev->gfx.num_gfx_rings) 250 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 251 252 WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); 253 } 254 255 static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) 256 { 257 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 258 gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); 259 gfx_v9_4_3_wait_for_rlc_serdes(adev); 260 } 261 262 static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) 263 { 264 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 265 udelay(50); 266 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 267 udelay(50); 268 } 269 270 static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) 271 { 272 #ifdef AMDGPU_RLC_DEBUG_RETRY 273 u32 rlc_ucode_ver; 274 #endif 275 276 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 277 udelay(50); 278 279 /* carrizo do enable cp interrupt after cp inited */ 280 if (!(adev->flags & AMD_IS_APU)) { 281 gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); 282 udelay(50); 283 } 284 285 #ifdef AMDGPU_RLC_DEBUG_RETRY 286 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 287 rlc_ucode_ver = RREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_6); 288 if (rlc_ucode_ver == 0x108) { 289 dev_info(adev->dev, 290 "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 291 rlc_ucode_ver, adev->gfx.rlc_fw_version); 292 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 293 * default is 0x9C4 to create a 100us interval */ 294 WREG32_SOC15(GC, 0, regRLC_GPM_TIMER_INT_3, 0x9C4); 295 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 296 * to disable the page fault retry interrupts, default is 297 * 0x100 (256) */ 298 WREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_12, 0x100); 299 } 300 #endif 301 } 302 303 static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev) 304 { 305 const struct rlc_firmware_header_v2_0 *hdr; 306 const __le32 *fw_data; 307 unsigned i, fw_size; 308 309 if (!adev->gfx.rlc_fw) 310 return -EINVAL; 311 312 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 313 amdgpu_ucode_print_rlc_hdr(&hdr->header); 314 315 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 316 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 317 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 318 319 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 320 RLCG_UCODE_LOADING_START_ADDRESS); 321 for (i = 0; i < fw_size; i++) { 322 if (amdgpu_emu_mode == 1 && i % 100 == 0) { 323 dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i); 324 msleep(1); 325 } 326 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 327 } 328 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 329 330 return 0; 331 } 332 333 static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) 334 { 335 int r; 336 337 adev->gfx.rlc.funcs->stop(adev); 338 339 /* disable CG */ 340 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 341 342 /* TODO: revisit pg function */ 343 /* gfx_v9_4_3_init_pg(adev);*/ 344 345 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 346 /* legacy rlc firmware loading */ 347 r = gfx_v9_4_3_rlc_load_microcode(adev); 348 if (r) 349 return r; 350 } 351 352 adev->gfx.rlc.funcs->start(adev); 353 354 return 0; 355 } 356 357 static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 358 { 359 u32 reg, data; 360 361 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 362 if (amdgpu_sriov_is_pp_one_vf(adev)) 363 data = RREG32_NO_KIQ(reg); 364 else 365 data = RREG32(reg); 366 367 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 368 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 369 370 if (amdgpu_sriov_is_pp_one_vf(adev)) 371 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 372 else 373 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 374 } 375 376 static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = { 377 {SOC15_REG_ENTRY(GC, 0, regGRBM_GFX_INDEX)}, 378 {SOC15_REG_ENTRY(GC, 0, regSQ_IND_INDEX)}, 379 }; 380 381 static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev, 382 uint32_t offset, 383 struct soc15_reg_rlcg *entries, int arr_size) 384 { 385 int i; 386 uint32_t reg; 387 388 if (!entries) 389 return false; 390 391 for (i = 0; i < arr_size; i++) { 392 const struct soc15_reg_rlcg *entry; 393 394 entry = &entries[i]; 395 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 396 if (offset == reg) 397 return true; 398 } 399 400 return false; 401 } 402 403 static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 404 { 405 return gfx_v9_4_3_check_rlcg_range(adev, offset, 406 (void *)rlcg_access_gc_9_4_3, 407 ARRAY_SIZE(rlcg_access_gc_9_4_3)); 408 } 409 410 const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { 411 .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, 412 .select_se_sh = &gfx_v9_4_3_select_se_sh, 413 .read_wave_data = &gfx_v9_4_3_read_wave_data, 414 .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, 415 .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, 416 .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, 417 }; 418 419 const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { 420 .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, 421 .set_safe_mode = gfx_v9_4_3_set_safe_mode, 422 .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, 423 .init = gfx_v9_4_3_rlc_init, 424 .resume = gfx_v9_4_3_rlc_resume, 425 .stop = gfx_v9_4_3_rlc_stop, 426 .reset = gfx_v9_4_3_rlc_reset, 427 .start = gfx_v9_4_3_rlc_start, 428 .update_spm_vmid = gfx_v9_4_3_update_spm_vmid, 429 .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, 430 }; 431