1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <drm/drm_drv.h> 26 27 #include "amdgpu.h" 28 #include "amdgpu_vcn.h" 29 #include "amdgpu_pm.h" 30 #include "soc15.h" 31 #include "soc15d.h" 32 #include "soc15_hw_ip.h" 33 #include "vcn_v2_0.h" 34 #include "mmsch_v4_0_3.h" 35 36 #include "vcn/vcn_4_0_3_offset.h" 37 #include "vcn/vcn_4_0_3_sh_mask.h" 38 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" 39 40 #define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL 41 #define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX 42 #define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA 43 #define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX 44 45 #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 46 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 47 48 static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); 49 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); 50 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); 51 static int vcn_v4_0_3_set_powergating_state(void *handle, 52 enum amd_powergating_state state); 53 static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, 54 int inst_idx, struct dpg_pause_state *new_state); 55 static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring); 56 static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); 57 static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, 58 int inst_idx, bool indirect); 59 /** 60 * vcn_v4_0_3_early_init - set function pointers 61 * 62 * @handle: amdgpu_device pointer 63 * 64 * Set ring and irq function pointers 65 */ 66 static int vcn_v4_0_3_early_init(void *handle) 67 { 68 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 69 70 /* re-use enc ring as unified ring */ 71 adev->vcn.num_enc_rings = 1; 72 73 vcn_v4_0_3_set_unified_ring_funcs(adev); 74 vcn_v4_0_3_set_irq_funcs(adev); 75 vcn_v4_0_3_set_ras_funcs(adev); 76 77 return amdgpu_vcn_early_init(adev); 78 } 79 80 static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx) 81 { 82 struct amdgpu_vcn4_fw_shared *fw_shared; 83 84 fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; 85 fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); 86 fw_shared->sq.is_enabled = 1; 87 88 if (amdgpu_vcnfw_log) 89 amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); 90 91 return 0; 92 } 93 94 /** 95 * vcn_v4_0_3_sw_init - sw init for VCN block 96 * 97 * @handle: amdgpu_device pointer 98 * 99 * Load firmware and sw initialization 100 */ 101 static int vcn_v4_0_3_sw_init(void *handle) 102 { 103 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 104 struct amdgpu_ring *ring; 105 int i, r, vcn_inst; 106 107 r = amdgpu_vcn_sw_init(adev); 108 if (r) 109 return r; 110 111 amdgpu_vcn_setup_ucode(adev); 112 113 r = amdgpu_vcn_resume(adev); 114 if (r) 115 return r; 116 117 /* VCN DEC TRAP */ 118 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 119 VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); 120 if (r) 121 return r; 122 123 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 124 vcn_inst = GET_INST(VCN, i); 125 126 ring = &adev->vcn.inst[i].ring_enc[0]; 127 ring->use_doorbell = true; 128 129 if (!amdgpu_sriov_vf(adev)) 130 ring->doorbell_index = 131 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 132 9 * vcn_inst; 133 else 134 ring->doorbell_index = 135 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 136 32 * vcn_inst; 137 138 ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); 139 sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); 140 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, 141 AMDGPU_RING_PRIO_DEFAULT, 142 &adev->vcn.inst[i].sched_score); 143 if (r) 144 return r; 145 146 vcn_v4_0_3_fw_shared_init(adev, i); 147 } 148 149 if (amdgpu_sriov_vf(adev)) { 150 r = amdgpu_virt_alloc_mm_table(adev); 151 if (r) 152 return r; 153 } 154 155 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) 156 adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; 157 158 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { 159 r = amdgpu_vcn_ras_sw_init(adev); 160 if (r) { 161 dev_err(adev->dev, "Failed to initialize vcn ras block!\n"); 162 return r; 163 } 164 } 165 166 return 0; 167 } 168 169 /** 170 * vcn_v4_0_3_sw_fini - sw fini for VCN block 171 * 172 * @handle: amdgpu_device pointer 173 * 174 * VCN suspend and free up sw allocation 175 */ 176 static int vcn_v4_0_3_sw_fini(void *handle) 177 { 178 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 179 int i, r, idx; 180 181 if (drm_dev_enter(&adev->ddev, &idx)) { 182 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 183 volatile struct amdgpu_vcn4_fw_shared *fw_shared; 184 185 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 186 fw_shared->present_flag_0 = 0; 187 fw_shared->sq.is_enabled = cpu_to_le32(false); 188 } 189 drm_dev_exit(idx); 190 } 191 192 if (amdgpu_sriov_vf(adev)) 193 amdgpu_virt_free_mm_table(adev); 194 195 r = amdgpu_vcn_suspend(adev); 196 if (r) 197 return r; 198 199 r = amdgpu_vcn_sw_fini(adev); 200 201 return r; 202 } 203 204 /** 205 * vcn_v4_0_3_hw_init - start and test VCN block 206 * 207 * @handle: amdgpu_device pointer 208 * 209 * Initialize the hardware, boot up the VCPU and do some testing 210 */ 211 static int vcn_v4_0_3_hw_init(void *handle) 212 { 213 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 214 struct amdgpu_ring *ring; 215 int i, r, vcn_inst; 216 217 if (amdgpu_sriov_vf(adev)) { 218 r = vcn_v4_0_3_start_sriov(adev); 219 if (r) 220 goto done; 221 222 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 223 ring = &adev->vcn.inst[i].ring_enc[0]; 224 ring->wptr = 0; 225 ring->wptr_old = 0; 226 vcn_v4_0_3_unified_ring_set_wptr(ring); 227 ring->sched.ready = true; 228 } 229 } else { 230 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 231 struct amdgpu_vcn4_fw_shared *fw_shared; 232 233 vcn_inst = GET_INST(VCN, i); 234 ring = &adev->vcn.inst[i].ring_enc[0]; 235 236 if (ring->use_doorbell) { 237 adev->nbio.funcs->vcn_doorbell_range( 238 adev, ring->use_doorbell, 239 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 240 9 * vcn_inst, 241 adev->vcn.inst[i].aid_id); 242 243 WREG32_SOC15( 244 VCN, GET_INST(VCN, ring->me), 245 regVCN_RB1_DB_CTRL, 246 ring->doorbell_index 247 << VCN_RB1_DB_CTRL__OFFSET__SHIFT | 248 VCN_RB1_DB_CTRL__EN_MASK); 249 250 /* Read DB_CTRL to flush the write DB_CTRL command. */ 251 RREG32_SOC15( 252 VCN, GET_INST(VCN, ring->me), 253 regVCN_RB1_DB_CTRL); 254 } 255 256 /* Re-init fw_shared when RAS fatal error occurred */ 257 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 258 if (!fw_shared->sq.is_enabled) 259 vcn_v4_0_3_fw_shared_init(adev, i); 260 261 r = amdgpu_ring_test_helper(ring); 262 if (r) 263 goto done; 264 } 265 } 266 267 done: 268 if (!r) 269 DRM_DEV_INFO(adev->dev, "VCN decode initialized successfully(under %s).\n", 270 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); 271 272 return r; 273 } 274 275 /** 276 * vcn_v4_0_3_hw_fini - stop the hardware block 277 * 278 * @handle: amdgpu_device pointer 279 * 280 * Stop the VCN block, mark ring as not ready any more 281 */ 282 static int vcn_v4_0_3_hw_fini(void *handle) 283 { 284 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 285 286 cancel_delayed_work_sync(&adev->vcn.idle_work); 287 288 if (adev->vcn.cur_state != AMD_PG_STATE_GATE) 289 vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); 290 291 return 0; 292 } 293 294 /** 295 * vcn_v4_0_3_suspend - suspend VCN block 296 * 297 * @handle: amdgpu_device pointer 298 * 299 * HW fini and suspend VCN block 300 */ 301 static int vcn_v4_0_3_suspend(void *handle) 302 { 303 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 304 int r; 305 306 r = vcn_v4_0_3_hw_fini(adev); 307 if (r) 308 return r; 309 310 r = amdgpu_vcn_suspend(adev); 311 312 return r; 313 } 314 315 /** 316 * vcn_v4_0_3_resume - resume VCN block 317 * 318 * @handle: amdgpu_device pointer 319 * 320 * Resume firmware and hw init VCN block 321 */ 322 static int vcn_v4_0_3_resume(void *handle) 323 { 324 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 325 int r; 326 327 r = amdgpu_vcn_resume(adev); 328 if (r) 329 return r; 330 331 r = vcn_v4_0_3_hw_init(adev); 332 333 return r; 334 } 335 336 /** 337 * vcn_v4_0_3_mc_resume - memory controller programming 338 * 339 * @adev: amdgpu_device pointer 340 * @inst_idx: instance number 341 * 342 * Let the VCN memory controller know it's offsets 343 */ 344 static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) 345 { 346 uint32_t offset, size, vcn_inst; 347 const struct common_firmware_header *hdr; 348 349 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 350 size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 351 352 vcn_inst = GET_INST(VCN, inst_idx); 353 /* cache window 0: fw */ 354 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 355 WREG32_SOC15( 356 VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 357 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx] 358 .tmr_mc_addr_lo)); 359 WREG32_SOC15( 360 VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 361 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx] 362 .tmr_mc_addr_hi)); 363 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); 364 offset = 0; 365 } else { 366 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 367 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr)); 368 WREG32_SOC15(VCN, vcn_inst, 369 regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 370 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr)); 371 offset = size; 372 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 373 AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 374 } 375 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); 376 377 /* cache window 1: stack */ 378 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, 379 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset)); 380 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, 381 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset)); 382 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); 383 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, 384 AMDGPU_VCN_STACK_SIZE); 385 386 /* cache window 2: context */ 387 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, 388 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + 389 AMDGPU_VCN_STACK_SIZE)); 390 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, 391 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + 392 AMDGPU_VCN_STACK_SIZE)); 393 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); 394 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, 395 AMDGPU_VCN_CONTEXT_SIZE); 396 397 /* non-cache window */ 398 WREG32_SOC15( 399 VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, 400 lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr)); 401 WREG32_SOC15( 402 VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, 403 upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr)); 404 WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); 405 WREG32_SOC15( 406 VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, 407 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); 408 } 409 410 /** 411 * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode 412 * 413 * @adev: amdgpu_device pointer 414 * @inst_idx: instance number index 415 * @indirect: indirectly write sram 416 * 417 * Let the VCN memory controller know it's offsets with dpg mode 418 */ 419 static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) 420 { 421 uint32_t offset, size; 422 const struct common_firmware_header *hdr; 423 424 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 425 size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 426 427 /* cache window 0: fw */ 428 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 429 if (!indirect) { 430 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 431 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 432 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + 433 inst_idx].tmr_mc_addr_lo), 0, indirect); 434 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 435 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 436 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + 437 inst_idx].tmr_mc_addr_hi), 0, indirect); 438 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 439 VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); 440 } else { 441 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 442 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); 443 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 444 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); 445 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 446 VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); 447 } 448 offset = 0; 449 } else { 450 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 451 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 452 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); 453 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 454 VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 455 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); 456 offset = size; 457 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 458 VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 459 AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); 460 } 461 462 if (!indirect) 463 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 464 VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); 465 else 466 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 467 VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); 468 469 /* cache window 1: stack */ 470 if (!indirect) { 471 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 472 VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 473 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); 474 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 475 VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 476 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); 477 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 478 VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); 479 } else { 480 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 481 VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); 482 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 483 VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); 484 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 485 VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); 486 } 487 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 488 VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); 489 490 /* cache window 2: context */ 491 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 492 VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), 493 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + 494 AMDGPU_VCN_STACK_SIZE), 0, indirect); 495 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 496 VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), 497 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + 498 AMDGPU_VCN_STACK_SIZE), 0, indirect); 499 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 500 VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); 501 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 502 VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); 503 504 /* non-cache window */ 505 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 506 VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 507 lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); 508 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 509 VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 510 upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); 511 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 512 VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); 513 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 514 VCN, 0, regUVD_VCPU_NONCACHE_SIZE0), 515 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); 516 517 /* VCN global tiling registers */ 518 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 519 VCN, 0, regUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); 520 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 521 VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); 522 } 523 524 /** 525 * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating 526 * 527 * @adev: amdgpu_device pointer 528 * @inst_idx: instance number 529 * 530 * Disable clock gating for VCN block 531 */ 532 static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) 533 { 534 uint32_t data; 535 int vcn_inst; 536 537 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) 538 return; 539 540 vcn_inst = GET_INST(VCN, inst_idx); 541 542 /* VCN disable CGC */ 543 data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); 544 data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; 545 data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 546 data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 547 WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); 548 549 data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE); 550 data &= ~(UVD_CGC_GATE__SYS_MASK 551 | UVD_CGC_GATE__MPEG2_MASK 552 | UVD_CGC_GATE__REGS_MASK 553 | UVD_CGC_GATE__RBC_MASK 554 | UVD_CGC_GATE__LMI_MC_MASK 555 | UVD_CGC_GATE__LMI_UMC_MASK 556 | UVD_CGC_GATE__MPC_MASK 557 | UVD_CGC_GATE__LBSI_MASK 558 | UVD_CGC_GATE__LRBBM_MASK 559 | UVD_CGC_GATE__WCB_MASK 560 | UVD_CGC_GATE__VCPU_MASK 561 | UVD_CGC_GATE__MMSCH_MASK); 562 563 WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data); 564 SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF); 565 566 data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); 567 data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK 568 | UVD_CGC_CTRL__MPEG2_MODE_MASK 569 | UVD_CGC_CTRL__REGS_MODE_MASK 570 | UVD_CGC_CTRL__RBC_MODE_MASK 571 | UVD_CGC_CTRL__LMI_MC_MODE_MASK 572 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK 573 | UVD_CGC_CTRL__MPC_MODE_MASK 574 | UVD_CGC_CTRL__LBSI_MODE_MASK 575 | UVD_CGC_CTRL__LRBBM_MODE_MASK 576 | UVD_CGC_CTRL__WCB_MODE_MASK 577 | UVD_CGC_CTRL__VCPU_MODE_MASK 578 | UVD_CGC_CTRL__MMSCH_MODE_MASK); 579 WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); 580 581 data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE); 582 data |= (UVD_SUVD_CGC_GATE__SRE_MASK 583 | UVD_SUVD_CGC_GATE__SIT_MASK 584 | UVD_SUVD_CGC_GATE__SMP_MASK 585 | UVD_SUVD_CGC_GATE__SCM_MASK 586 | UVD_SUVD_CGC_GATE__SDB_MASK 587 | UVD_SUVD_CGC_GATE__SRE_H264_MASK 588 | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK 589 | UVD_SUVD_CGC_GATE__SIT_H264_MASK 590 | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK 591 | UVD_SUVD_CGC_GATE__SCM_H264_MASK 592 | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK 593 | UVD_SUVD_CGC_GATE__SDB_H264_MASK 594 | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK 595 | UVD_SUVD_CGC_GATE__ENT_MASK 596 | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK 597 | UVD_SUVD_CGC_GATE__SITE_MASK 598 | UVD_SUVD_CGC_GATE__SRE_VP9_MASK 599 | UVD_SUVD_CGC_GATE__SCM_VP9_MASK 600 | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK 601 | UVD_SUVD_CGC_GATE__SDB_VP9_MASK 602 | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); 603 WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data); 604 605 data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL); 606 data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK 607 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK 608 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK 609 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK 610 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK 611 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK 612 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK 613 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); 614 WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data); 615 } 616 617 /** 618 * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode 619 * 620 * @adev: amdgpu_device pointer 621 * @sram_sel: sram select 622 * @inst_idx: instance number index 623 * @indirect: indirectly write sram 624 * 625 * Disable clock gating for VCN block with dpg mode 626 */ 627 static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, 628 int inst_idx, uint8_t indirect) 629 { 630 uint32_t reg_data = 0; 631 632 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) 633 return; 634 635 /* enable sw clock gating control */ 636 reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 637 reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 638 reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 639 reg_data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK | 640 UVD_CGC_CTRL__MPEG2_MODE_MASK | 641 UVD_CGC_CTRL__REGS_MODE_MASK | 642 UVD_CGC_CTRL__RBC_MODE_MASK | 643 UVD_CGC_CTRL__LMI_MC_MODE_MASK | 644 UVD_CGC_CTRL__LMI_UMC_MODE_MASK | 645 UVD_CGC_CTRL__IDCT_MODE_MASK | 646 UVD_CGC_CTRL__MPRD_MODE_MASK | 647 UVD_CGC_CTRL__MPC_MODE_MASK | 648 UVD_CGC_CTRL__LBSI_MODE_MASK | 649 UVD_CGC_CTRL__LRBBM_MODE_MASK | 650 UVD_CGC_CTRL__WCB_MODE_MASK | 651 UVD_CGC_CTRL__VCPU_MODE_MASK); 652 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 653 VCN, 0, regUVD_CGC_CTRL), reg_data, sram_sel, indirect); 654 655 /* turn off clock gating */ 656 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 657 VCN, 0, regUVD_CGC_GATE), 0, sram_sel, indirect); 658 659 /* turn on SUVD clock gating */ 660 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 661 VCN, 0, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); 662 663 /* turn on sw mode in UVD_SUVD_CGC_CTRL */ 664 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 665 VCN, 0, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); 666 } 667 668 /** 669 * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating 670 * 671 * @adev: amdgpu_device pointer 672 * @inst_idx: instance number 673 * 674 * Enable clock gating for VCN block 675 */ 676 static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) 677 { 678 uint32_t data; 679 int vcn_inst; 680 681 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) 682 return; 683 684 vcn_inst = GET_INST(VCN, inst_idx); 685 686 /* enable VCN CGC */ 687 data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); 688 data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 689 data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 690 data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 691 WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); 692 693 data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); 694 data |= (UVD_CGC_CTRL__SYS_MODE_MASK 695 | UVD_CGC_CTRL__MPEG2_MODE_MASK 696 | UVD_CGC_CTRL__REGS_MODE_MASK 697 | UVD_CGC_CTRL__RBC_MODE_MASK 698 | UVD_CGC_CTRL__LMI_MC_MODE_MASK 699 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK 700 | UVD_CGC_CTRL__MPC_MODE_MASK 701 | UVD_CGC_CTRL__LBSI_MODE_MASK 702 | UVD_CGC_CTRL__LRBBM_MODE_MASK 703 | UVD_CGC_CTRL__WCB_MODE_MASK 704 | UVD_CGC_CTRL__VCPU_MODE_MASK); 705 WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); 706 707 data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL); 708 data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK 709 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK 710 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK 711 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK 712 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK 713 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK 714 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK 715 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); 716 WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data); 717 } 718 719 /** 720 * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode 721 * 722 * @adev: amdgpu_device pointer 723 * @inst_idx: instance number index 724 * @indirect: indirectly write sram 725 * 726 * Start VCN block with dpg mode 727 */ 728 static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) 729 { 730 volatile struct amdgpu_vcn4_fw_shared *fw_shared = 731 adev->vcn.inst[inst_idx].fw_shared.cpu_addr; 732 struct amdgpu_ring *ring; 733 int vcn_inst; 734 uint32_t tmp; 735 736 vcn_inst = GET_INST(VCN, inst_idx); 737 /* disable register anti-hang mechanism */ 738 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, 739 ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 740 /* enable dynamic power gating mode */ 741 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); 742 tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; 743 tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; 744 WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); 745 746 if (indirect) { 747 DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d", 748 inst_idx, adev->vcn.inst[inst_idx].aid_id); 749 adev->vcn.inst[inst_idx].dpg_sram_curr_addr = 750 (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; 751 /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */ 752 WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF, 753 adev->vcn.inst[inst_idx].aid_id, 0, true); 754 } 755 756 /* enable clock gating */ 757 vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); 758 759 /* enable VCPU clock */ 760 tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); 761 tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; 762 tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; 763 764 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 765 VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); 766 767 /* disable master interrupt */ 768 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 769 VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect); 770 771 /* setup regUVD_LMI_CTRL */ 772 tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 773 UVD_LMI_CTRL__REQ_MODE_MASK | 774 UVD_LMI_CTRL__CRC_RESET_MASK | 775 UVD_LMI_CTRL__MASK_MC_URGENT_MASK | 776 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 777 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | 778 (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 779 0x00100000L); 780 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 781 VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect); 782 783 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 784 VCN, 0, regUVD_MPC_CNTL), 785 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); 786 787 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 788 VCN, 0, regUVD_MPC_SET_MUXA0), 789 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | 790 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | 791 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | 792 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); 793 794 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 795 VCN, 0, regUVD_MPC_SET_MUXB0), 796 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | 797 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | 798 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | 799 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); 800 801 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 802 VCN, 0, regUVD_MPC_SET_MUX), 803 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | 804 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | 805 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); 806 807 vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect); 808 809 tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); 810 tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; 811 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 812 VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); 813 814 /* enable LMI MC and UMC channels */ 815 tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; 816 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 817 VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect); 818 819 vcn_v4_0_3_enable_ras(adev, inst_idx, indirect); 820 821 /* enable master interrupt */ 822 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 823 VCN, 0, regUVD_MASTINT_EN), 824 UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); 825 826 if (indirect) 827 amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); 828 829 ring = &adev->vcn.inst[inst_idx].ring_enc[0]; 830 831 /* program the RB_BASE for ring buffer */ 832 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, 833 lower_32_bits(ring->gpu_addr)); 834 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, 835 upper_32_bits(ring->gpu_addr)); 836 837 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, 838 ring->ring_size / sizeof(uint32_t)); 839 840 /* resetting ring, fw should not check RB ring */ 841 tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); 842 tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK); 843 WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); 844 fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; 845 846 /* Initialize the ring buffer's read and write pointers */ 847 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); 848 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); 849 ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); 850 851 tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); 852 tmp |= VCN_RB_ENABLE__RB_EN_MASK; 853 WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); 854 fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); 855 856 /*resetting done, fw can check RB ring */ 857 fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); 858 859 return 0; 860 } 861 862 static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) 863 { 864 int i, vcn_inst; 865 struct amdgpu_ring *ring_enc; 866 uint64_t cache_addr; 867 uint64_t rb_enc_addr; 868 uint64_t ctx_addr; 869 uint32_t param, resp, expected; 870 uint32_t offset, cache_size; 871 uint32_t tmp, timeout; 872 873 struct amdgpu_mm_table *table = &adev->virt.mm_table; 874 uint32_t *table_loc; 875 uint32_t table_size; 876 uint32_t size, size_dw; 877 uint32_t init_status; 878 uint32_t enabled_vcn; 879 880 struct mmsch_v4_0_cmd_direct_write 881 direct_wt = { {0} }; 882 struct mmsch_v4_0_cmd_direct_read_modify_write 883 direct_rd_mod_wt = { {0} }; 884 struct mmsch_v4_0_cmd_end end = { {0} }; 885 struct mmsch_v4_0_3_init_header header; 886 887 volatile struct amdgpu_vcn4_fw_shared *fw_shared; 888 volatile struct amdgpu_fw_shared_rb_setup *rb_setup; 889 890 direct_wt.cmd_header.command_type = 891 MMSCH_COMMAND__DIRECT_REG_WRITE; 892 direct_rd_mod_wt.cmd_header.command_type = 893 MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 894 end.cmd_header.command_type = MMSCH_COMMAND__END; 895 896 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 897 vcn_inst = GET_INST(VCN, i); 898 899 memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); 900 header.version = MMSCH_VERSION; 901 header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; 902 903 table_loc = (uint32_t *)table->cpu_addr; 904 table_loc += header.total_size; 905 906 table_size = 0; 907 908 MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), 909 ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); 910 911 cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); 912 913 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 914 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 915 regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 916 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); 917 918 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 919 regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 920 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); 921 922 offset = 0; 923 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 924 regUVD_VCPU_CACHE_OFFSET0), 0); 925 } else { 926 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 927 regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 928 lower_32_bits(adev->vcn.inst[i].gpu_addr)); 929 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 930 regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 931 upper_32_bits(adev->vcn.inst[i].gpu_addr)); 932 offset = cache_size; 933 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 934 regUVD_VCPU_CACHE_OFFSET0), 935 AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 936 } 937 938 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 939 regUVD_VCPU_CACHE_SIZE0), 940 cache_size); 941 942 cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset; 943 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 944 regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr)); 945 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 946 regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); 947 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 948 regUVD_VCPU_CACHE_OFFSET1), 0); 949 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 950 regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE); 951 952 cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset + 953 AMDGPU_VCN_STACK_SIZE; 954 955 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 956 regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr)); 957 958 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 959 regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); 960 961 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 962 regUVD_VCPU_CACHE_OFFSET2), 0); 963 964 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 965 regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); 966 967 fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr; 968 rb_setup = &fw_shared->rb_setup; 969 970 ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0]; 971 ring_enc->wptr = 0; 972 rb_enc_addr = ring_enc->gpu_addr; 973 974 rb_setup->is_rb_enabled_flags |= RB_ENABLED; 975 rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); 976 rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); 977 rb_setup->rb_size = ring_enc->ring_size / 4; 978 fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); 979 980 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 981 regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 982 lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); 983 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 984 regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 985 upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); 986 MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, 987 regUVD_VCPU_NONCACHE_SIZE0), 988 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); 989 MMSCH_V4_0_INSERT_END(); 990 991 header.vcn0.init_status = 0; 992 header.vcn0.table_offset = header.total_size; 993 header.vcn0.table_size = table_size; 994 header.total_size += table_size; 995 996 /* Send init table to mmsch */ 997 size = sizeof(struct mmsch_v4_0_3_init_header); 998 table_loc = (uint32_t *)table->cpu_addr; 999 memcpy((void *)table_loc, &header, size); 1000 1001 ctx_addr = table->gpu_addr; 1002 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); 1003 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); 1004 1005 tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID); 1006 tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; 1007 tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); 1008 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp); 1009 1010 size = header.total_size; 1011 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size); 1012 1013 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0); 1014 1015 param = 0x00000001; 1016 WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param); 1017 tmp = 0; 1018 timeout = 1000; 1019 resp = 0; 1020 expected = MMSCH_VF_MAILBOX_RESP__OK; 1021 while (resp != expected) { 1022 resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP); 1023 if (resp != 0) 1024 break; 1025 1026 udelay(10); 1027 tmp = tmp + 10; 1028 if (tmp >= timeout) { 1029 DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ 1030 " waiting for regMMSCH_VF_MAILBOX_RESP "\ 1031 "(expected=0x%08x, readback=0x%08x)\n", 1032 tmp, expected, resp); 1033 return -EBUSY; 1034 } 1035 } 1036 1037 enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; 1038 init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status; 1039 if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE 1040 && init_status != MMSCH_VF_ENGINE_STATUS__PASS) { 1041 DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ 1042 "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); 1043 } 1044 } 1045 1046 return 0; 1047 } 1048 1049 /** 1050 * vcn_v4_0_3_start - VCN start 1051 * 1052 * @adev: amdgpu_device pointer 1053 * 1054 * Start VCN block 1055 */ 1056 static int vcn_v4_0_3_start(struct amdgpu_device *adev) 1057 { 1058 volatile struct amdgpu_vcn4_fw_shared *fw_shared; 1059 struct amdgpu_ring *ring; 1060 int i, j, k, r, vcn_inst; 1061 uint32_t tmp; 1062 1063 if (adev->pm.dpm_enabled) 1064 amdgpu_dpm_enable_uvd(adev, true); 1065 1066 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1067 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 1068 r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram); 1069 continue; 1070 } 1071 1072 vcn_inst = GET_INST(VCN, i); 1073 /* set VCN status busy */ 1074 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | 1075 UVD_STATUS__UVD_BUSY; 1076 WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); 1077 1078 /*SW clock gating */ 1079 vcn_v4_0_3_disable_clock_gating(adev, i); 1080 1081 /* enable VCPU clock */ 1082 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 1083 UVD_VCPU_CNTL__CLK_EN_MASK, 1084 ~UVD_VCPU_CNTL__CLK_EN_MASK); 1085 1086 /* disable master interrupt */ 1087 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, 1088 ~UVD_MASTINT_EN__VCPU_EN_MASK); 1089 1090 /* enable LMI MC and UMC channels */ 1091 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, 1092 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 1093 1094 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); 1095 tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; 1096 tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; 1097 WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); 1098 1099 /* setup regUVD_LMI_CTRL */ 1100 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); 1101 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, 1102 tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 1103 UVD_LMI_CTRL__MASK_MC_URGENT_MASK | 1104 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 1105 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); 1106 1107 /* setup regUVD_MPC_CNTL */ 1108 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL); 1109 tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; 1110 tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; 1111 WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp); 1112 1113 /* setup UVD_MPC_SET_MUXA0 */ 1114 WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0, 1115 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | 1116 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | 1117 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | 1118 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); 1119 1120 /* setup UVD_MPC_SET_MUXB0 */ 1121 WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0, 1122 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | 1123 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | 1124 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | 1125 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); 1126 1127 /* setup UVD_MPC_SET_MUX */ 1128 WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX, 1129 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | 1130 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | 1131 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); 1132 1133 vcn_v4_0_3_mc_resume(adev, i); 1134 1135 /* VCN global tiling registers */ 1136 WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG, 1137 adev->gfx.config.gb_addr_config); 1138 WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, 1139 adev->gfx.config.gb_addr_config); 1140 1141 /* unblock VCPU register access */ 1142 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, 1143 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); 1144 1145 /* release VCPU reset to boot */ 1146 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, 1147 ~UVD_VCPU_CNTL__BLK_RST_MASK); 1148 1149 for (j = 0; j < 10; ++j) { 1150 uint32_t status; 1151 1152 for (k = 0; k < 100; ++k) { 1153 status = RREG32_SOC15(VCN, vcn_inst, 1154 regUVD_STATUS); 1155 if (status & 2) 1156 break; 1157 mdelay(10); 1158 } 1159 r = 0; 1160 if (status & 2) 1161 break; 1162 1163 DRM_DEV_ERROR(adev->dev, 1164 "VCN decode not responding, trying to reset the VCPU!!!\n"); 1165 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, 1166 regUVD_VCPU_CNTL), 1167 UVD_VCPU_CNTL__BLK_RST_MASK, 1168 ~UVD_VCPU_CNTL__BLK_RST_MASK); 1169 mdelay(10); 1170 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, 1171 regUVD_VCPU_CNTL), 1172 0, ~UVD_VCPU_CNTL__BLK_RST_MASK); 1173 1174 mdelay(10); 1175 r = -1; 1176 } 1177 1178 if (r) { 1179 DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n"); 1180 return r; 1181 } 1182 1183 /* enable master interrupt */ 1184 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 1185 UVD_MASTINT_EN__VCPU_EN_MASK, 1186 ~UVD_MASTINT_EN__VCPU_EN_MASK); 1187 1188 /* clear the busy bit of VCN_STATUS */ 1189 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, 1190 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); 1191 1192 ring = &adev->vcn.inst[i].ring_enc[0]; 1193 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 1194 1195 /* program the RB_BASE for ring buffer */ 1196 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, 1197 lower_32_bits(ring->gpu_addr)); 1198 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, 1199 upper_32_bits(ring->gpu_addr)); 1200 1201 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, 1202 ring->ring_size / sizeof(uint32_t)); 1203 1204 /* resetting ring, fw should not check RB ring */ 1205 tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); 1206 tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK); 1207 WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); 1208 1209 /* Initialize the ring buffer's read and write pointers */ 1210 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); 1211 WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); 1212 1213 tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); 1214 tmp |= VCN_RB_ENABLE__RB_EN_MASK; 1215 WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); 1216 1217 ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); 1218 fw_shared->sq.queue_mode &= 1219 cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF)); 1220 1221 } 1222 return 0; 1223 } 1224 1225 /** 1226 * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode 1227 * 1228 * @adev: amdgpu_device pointer 1229 * @inst_idx: instance number index 1230 * 1231 * Stop VCN block with dpg mode 1232 */ 1233 static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) 1234 { 1235 uint32_t tmp; 1236 int vcn_inst; 1237 1238 vcn_inst = GET_INST(VCN, inst_idx); 1239 1240 /* Wait for power status to be 1 */ 1241 SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, 1242 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 1243 1244 /* wait for read ptr to be equal to write ptr */ 1245 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); 1246 SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); 1247 1248 SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, 1249 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 1250 1251 /* disable dynamic power gating mode */ 1252 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, 1253 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); 1254 return 0; 1255 } 1256 1257 /** 1258 * vcn_v4_0_3_stop - VCN stop 1259 * 1260 * @adev: amdgpu_device pointer 1261 * 1262 * Stop VCN block 1263 */ 1264 static int vcn_v4_0_3_stop(struct amdgpu_device *adev) 1265 { 1266 volatile struct amdgpu_vcn4_fw_shared *fw_shared; 1267 int i, r = 0, vcn_inst; 1268 uint32_t tmp; 1269 1270 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1271 vcn_inst = GET_INST(VCN, i); 1272 1273 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; 1274 fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; 1275 1276 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 1277 vcn_v4_0_3_stop_dpg_mode(adev, i); 1278 continue; 1279 } 1280 1281 /* wait for vcn idle */ 1282 r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, 1283 UVD_STATUS__IDLE, 0x7); 1284 if (r) 1285 goto Done; 1286 1287 tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | 1288 UVD_LMI_STATUS__READ_CLEAN_MASK | 1289 UVD_LMI_STATUS__WRITE_CLEAN_MASK | 1290 UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; 1291 r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, 1292 tmp); 1293 if (r) 1294 goto Done; 1295 1296 /* stall UMC channel */ 1297 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); 1298 tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; 1299 WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); 1300 tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | 1301 UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; 1302 r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, 1303 tmp); 1304 if (r) 1305 goto Done; 1306 1307 /* Unblock VCPU Register access */ 1308 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 1309 UVD_RB_ARB_CTRL__VCPU_DIS_MASK, 1310 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); 1311 1312 /* release VCPU reset to boot */ 1313 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 1314 UVD_VCPU_CNTL__BLK_RST_MASK, 1315 ~UVD_VCPU_CNTL__BLK_RST_MASK); 1316 1317 /* disable VCPU clock */ 1318 WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, 1319 ~(UVD_VCPU_CNTL__CLK_EN_MASK)); 1320 1321 /* reset LMI UMC/LMI/VCPU */ 1322 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); 1323 tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; 1324 WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); 1325 1326 tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); 1327 tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; 1328 WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); 1329 1330 /* clear VCN status */ 1331 WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); 1332 1333 /* apply HW clock gating */ 1334 vcn_v4_0_3_enable_clock_gating(adev, i); 1335 } 1336 Done: 1337 if (adev->pm.dpm_enabled) 1338 amdgpu_dpm_enable_uvd(adev, false); 1339 1340 return 0; 1341 } 1342 1343 /** 1344 * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode 1345 * 1346 * @adev: amdgpu_device pointer 1347 * @inst_idx: instance number index 1348 * @new_state: pause state 1349 * 1350 * Pause dpg mode for VCN block 1351 */ 1352 static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, 1353 struct dpg_pause_state *new_state) 1354 { 1355 1356 return 0; 1357 } 1358 1359 /** 1360 * vcn_v4_0_3_unified_ring_get_rptr - get unified read pointer 1361 * 1362 * @ring: amdgpu_ring pointer 1363 * 1364 * Returns the current hardware unified read pointer 1365 */ 1366 static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring) 1367 { 1368 struct amdgpu_device *adev = ring->adev; 1369 1370 if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) 1371 DRM_ERROR("wrong ring id is identified in %s", __func__); 1372 1373 return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); 1374 } 1375 1376 /** 1377 * vcn_v4_0_3_unified_ring_get_wptr - get unified write pointer 1378 * 1379 * @ring: amdgpu_ring pointer 1380 * 1381 * Returns the current hardware unified write pointer 1382 */ 1383 static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) 1384 { 1385 struct amdgpu_device *adev = ring->adev; 1386 1387 if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) 1388 DRM_ERROR("wrong ring id is identified in %s", __func__); 1389 1390 if (ring->use_doorbell) 1391 return *ring->wptr_cpu_addr; 1392 else 1393 return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), 1394 regUVD_RB_WPTR); 1395 } 1396 1397 /** 1398 * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer 1399 * 1400 * @ring: amdgpu_ring pointer 1401 * 1402 * Commits the enc write pointer to the hardware 1403 */ 1404 static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring) 1405 { 1406 struct amdgpu_device *adev = ring->adev; 1407 1408 if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) 1409 DRM_ERROR("wrong ring id is identified in %s", __func__); 1410 1411 if (ring->use_doorbell) { 1412 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); 1413 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 1414 } else { 1415 WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, 1416 lower_32_bits(ring->wptr)); 1417 } 1418 } 1419 1420 static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { 1421 .type = AMDGPU_RING_TYPE_VCN_ENC, 1422 .align_mask = 0x3f, 1423 .nop = VCN_ENC_CMD_NO_OP, 1424 .get_rptr = vcn_v4_0_3_unified_ring_get_rptr, 1425 .get_wptr = vcn_v4_0_3_unified_ring_get_wptr, 1426 .set_wptr = vcn_v4_0_3_unified_ring_set_wptr, 1427 .emit_frame_size = 1428 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1429 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1430 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ 1431 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ 1432 1, /* vcn_v2_0_enc_ring_insert_end */ 1433 .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ 1434 .emit_ib = vcn_v2_0_enc_ring_emit_ib, 1435 .emit_fence = vcn_v2_0_enc_ring_emit_fence, 1436 .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, 1437 .test_ring = amdgpu_vcn_enc_ring_test_ring, 1438 .test_ib = amdgpu_vcn_unified_ring_test_ib, 1439 .insert_nop = amdgpu_ring_insert_nop, 1440 .insert_end = vcn_v2_0_enc_ring_insert_end, 1441 .pad_ib = amdgpu_ring_generic_pad_ib, 1442 .begin_use = amdgpu_vcn_ring_begin_use, 1443 .end_use = amdgpu_vcn_ring_end_use, 1444 .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, 1445 .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, 1446 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1447 }; 1448 1449 /** 1450 * vcn_v4_0_3_set_unified_ring_funcs - set unified ring functions 1451 * 1452 * @adev: amdgpu_device pointer 1453 * 1454 * Set unified ring functions 1455 */ 1456 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev) 1457 { 1458 int i, vcn_inst; 1459 1460 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1461 adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs; 1462 adev->vcn.inst[i].ring_enc[0].me = i; 1463 vcn_inst = GET_INST(VCN, i); 1464 adev->vcn.inst[i].aid_id = 1465 vcn_inst / adev->vcn.num_inst_per_aid; 1466 } 1467 DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n"); 1468 } 1469 1470 /** 1471 * vcn_v4_0_3_is_idle - check VCN block is idle 1472 * 1473 * @handle: amdgpu_device pointer 1474 * 1475 * Check whether VCN block is idle 1476 */ 1477 static bool vcn_v4_0_3_is_idle(void *handle) 1478 { 1479 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1480 int i, ret = 1; 1481 1482 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1483 ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == 1484 UVD_STATUS__IDLE); 1485 } 1486 1487 return ret; 1488 } 1489 1490 /** 1491 * vcn_v4_0_3_wait_for_idle - wait for VCN block idle 1492 * 1493 * @handle: amdgpu_device pointer 1494 * 1495 * Wait for VCN block idle 1496 */ 1497 static int vcn_v4_0_3_wait_for_idle(void *handle) 1498 { 1499 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1500 int i, ret = 0; 1501 1502 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1503 ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, 1504 UVD_STATUS__IDLE, UVD_STATUS__IDLE); 1505 if (ret) 1506 return ret; 1507 } 1508 1509 return ret; 1510 } 1511 1512 /* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state 1513 * 1514 * @handle: amdgpu_device pointer 1515 * @state: clock gating state 1516 * 1517 * Set VCN block clockgating state 1518 */ 1519 static int vcn_v4_0_3_set_clockgating_state(void *handle, 1520 enum amd_clockgating_state state) 1521 { 1522 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1523 bool enable = state == AMD_CG_STATE_GATE; 1524 int i; 1525 1526 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1527 if (enable) { 1528 if (RREG32_SOC15(VCN, GET_INST(VCN, i), 1529 regUVD_STATUS) != UVD_STATUS__IDLE) 1530 return -EBUSY; 1531 vcn_v4_0_3_enable_clock_gating(adev, i); 1532 } else { 1533 vcn_v4_0_3_disable_clock_gating(adev, i); 1534 } 1535 } 1536 return 0; 1537 } 1538 1539 /** 1540 * vcn_v4_0_3_set_powergating_state - set VCN block powergating state 1541 * 1542 * @handle: amdgpu_device pointer 1543 * @state: power gating state 1544 * 1545 * Set VCN block powergating state 1546 */ 1547 static int vcn_v4_0_3_set_powergating_state(void *handle, 1548 enum amd_powergating_state state) 1549 { 1550 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1551 int ret; 1552 1553 /* for SRIOV, guest should not control VCN Power-gating 1554 * MMSCH FW should control Power-gating and clock-gating 1555 * guest should avoid touching CGC and PG 1556 */ 1557 if (amdgpu_sriov_vf(adev)) { 1558 adev->vcn.cur_state = AMD_PG_STATE_UNGATE; 1559 return 0; 1560 } 1561 1562 if (state == adev->vcn.cur_state) 1563 return 0; 1564 1565 if (state == AMD_PG_STATE_GATE) 1566 ret = vcn_v4_0_3_stop(adev); 1567 else 1568 ret = vcn_v4_0_3_start(adev); 1569 1570 if (!ret) 1571 adev->vcn.cur_state = state; 1572 1573 return ret; 1574 } 1575 1576 /** 1577 * vcn_v4_0_3_set_interrupt_state - set VCN block interrupt state 1578 * 1579 * @adev: amdgpu_device pointer 1580 * @source: interrupt sources 1581 * @type: interrupt types 1582 * @state: interrupt states 1583 * 1584 * Set VCN block interrupt state 1585 */ 1586 static int vcn_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, 1587 struct amdgpu_irq_src *source, 1588 unsigned int type, 1589 enum amdgpu_interrupt_state state) 1590 { 1591 return 0; 1592 } 1593 1594 /** 1595 * vcn_v4_0_3_process_interrupt - process VCN block interrupt 1596 * 1597 * @adev: amdgpu_device pointer 1598 * @source: interrupt sources 1599 * @entry: interrupt entry from clients and sources 1600 * 1601 * Process VCN block interrupt 1602 */ 1603 static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev, 1604 struct amdgpu_irq_src *source, 1605 struct amdgpu_iv_entry *entry) 1606 { 1607 uint32_t i, inst; 1608 1609 i = node_id_to_phys_map[entry->node_id]; 1610 1611 DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); 1612 1613 for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) 1614 if (adev->vcn.inst[inst].aid_id == i) 1615 break; 1616 1617 if (inst >= adev->vcn.num_vcn_inst) { 1618 dev_WARN_ONCE(adev->dev, 1, 1619 "Interrupt received for unknown VCN instance %d", 1620 entry->node_id); 1621 return 0; 1622 } 1623 1624 switch (entry->src_id) { 1625 case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: 1626 amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); 1627 break; 1628 default: 1629 DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", 1630 entry->src_id, entry->src_data[0]); 1631 break; 1632 } 1633 1634 return 0; 1635 } 1636 1637 static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = { 1638 .set = vcn_v4_0_3_set_interrupt_state, 1639 .process = vcn_v4_0_3_process_interrupt, 1640 }; 1641 1642 /** 1643 * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions 1644 * 1645 * @adev: amdgpu_device pointer 1646 * 1647 * Set VCN block interrupt irq functions 1648 */ 1649 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) 1650 { 1651 int i; 1652 1653 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1654 adev->vcn.inst->irq.num_types++; 1655 } 1656 adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; 1657 } 1658 1659 static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { 1660 .name = "vcn_v4_0_3", 1661 .early_init = vcn_v4_0_3_early_init, 1662 .late_init = NULL, 1663 .sw_init = vcn_v4_0_3_sw_init, 1664 .sw_fini = vcn_v4_0_3_sw_fini, 1665 .hw_init = vcn_v4_0_3_hw_init, 1666 .hw_fini = vcn_v4_0_3_hw_fini, 1667 .suspend = vcn_v4_0_3_suspend, 1668 .resume = vcn_v4_0_3_resume, 1669 .is_idle = vcn_v4_0_3_is_idle, 1670 .wait_for_idle = vcn_v4_0_3_wait_for_idle, 1671 .check_soft_reset = NULL, 1672 .pre_soft_reset = NULL, 1673 .soft_reset = NULL, 1674 .post_soft_reset = NULL, 1675 .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, 1676 .set_powergating_state = vcn_v4_0_3_set_powergating_state, 1677 }; 1678 1679 const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { 1680 .type = AMD_IP_BLOCK_TYPE_VCN, 1681 .major = 4, 1682 .minor = 0, 1683 .rev = 3, 1684 .funcs = &vcn_v4_0_3_ip_funcs, 1685 }; 1686 1687 static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = { 1688 {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, regVCN_UE_ERR_STATUS_HI_VIDD), 1689 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"}, 1690 {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, regVCN_UE_ERR_STATUS_HI_VIDV), 1691 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"}, 1692 }; 1693 1694 static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev, 1695 uint32_t vcn_inst, 1696 void *ras_err_status) 1697 { 1698 struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; 1699 1700 /* vcn v4_0_3 only support query uncorrectable errors */ 1701 amdgpu_ras_inst_query_ras_error_count(adev, 1702 vcn_v4_0_3_ue_reg_list, 1703 ARRAY_SIZE(vcn_v4_0_3_ue_reg_list), 1704 NULL, 0, GET_INST(VCN, vcn_inst), 1705 AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 1706 &err_data->ue_count); 1707 } 1708 1709 static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev, 1710 void *ras_err_status) 1711 { 1712 uint32_t i; 1713 1714 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { 1715 dev_warn(adev->dev, "VCN RAS is not supported\n"); 1716 return; 1717 } 1718 1719 for (i = 0; i < adev->vcn.num_vcn_inst; i++) 1720 vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status); 1721 } 1722 1723 static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev, 1724 uint32_t vcn_inst) 1725 { 1726 amdgpu_ras_inst_reset_ras_error_count(adev, 1727 vcn_v4_0_3_ue_reg_list, 1728 ARRAY_SIZE(vcn_v4_0_3_ue_reg_list), 1729 GET_INST(VCN, vcn_inst)); 1730 } 1731 1732 static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) 1733 { 1734 uint32_t i; 1735 1736 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { 1737 dev_warn(adev->dev, "VCN RAS is not supported\n"); 1738 return; 1739 } 1740 1741 for (i = 0; i < adev->vcn.num_vcn_inst; i++) 1742 vcn_v4_0_3_inst_reset_ras_error_count(adev, i); 1743 } 1744 1745 static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = { 1746 .query_ras_error_count = vcn_v4_0_3_query_ras_error_count, 1747 .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count, 1748 }; 1749 1750 static struct amdgpu_vcn_ras vcn_v4_0_3_ras = { 1751 .ras_block = { 1752 .hw_ops = &vcn_v4_0_3_ras_hw_ops, 1753 }, 1754 }; 1755 1756 static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) 1757 { 1758 adev->vcn.ras = &vcn_v4_0_3_ras; 1759 } 1760 1761 static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, 1762 int inst_idx, bool indirect) 1763 { 1764 uint32_t tmp; 1765 1766 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) 1767 return; 1768 1769 tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | 1770 VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | 1771 VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | 1772 VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; 1773 WREG32_SOC15_DPG_MODE(inst_idx, 1774 SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), 1775 tmp, 0, indirect); 1776 1777 tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; 1778 WREG32_SOC15_DPG_MODE(inst_idx, 1779 SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), 1780 tmp, 0, indirect); 1781 } 1782