1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include "amdgpu.h" 26 #include "amdgpu_vcn.h" 27 #include "amdgpu_pm.h" 28 #include "soc15.h" 29 #include "soc15d.h" 30 #include "vcn_v2_0.h" 31 #include "mmsch_v3_0.h" 32 33 #include "vcn/vcn_3_0_0_offset.h" 34 #include "vcn/vcn_3_0_0_sh_mask.h" 35 #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" 36 37 #include <drm/drm_drv.h> 38 39 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 40 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f 41 #define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 42 #define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11 43 #define mmUVD_NO_OP_INTERNAL_OFFSET 0x29 44 #define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66 45 #define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d 46 47 #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 48 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 49 #define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 50 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c 51 52 #define VCN_INSTANCES_SIENNA_CICHLID 2 53 #define DEC_SW_RING_ENABLED FALSE 54 55 #define RDECODE_MSG_CREATE 0x00000000 56 #define RDECODE_MESSAGE_CREATE 0x00000001 57 58 static int amdgpu_ih_clientid_vcns[] = { 59 SOC15_IH_CLIENTID_VCN, 60 SOC15_IH_CLIENTID_VCN1 61 }; 62 63 static int amdgpu_ucode_id_vcns[] = { 64 AMDGPU_UCODE_ID_VCN, 65 AMDGPU_UCODE_ID_VCN1 66 }; 67 68 static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); 69 static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); 70 static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev); 71 static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev); 72 static int vcn_v3_0_set_powergating_state(void *handle, 73 enum amd_powergating_state state); 74 static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, 75 int inst_idx, struct dpg_pause_state *new_state); 76 77 static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring); 78 static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); 79 80 /** 81 * vcn_v3_0_early_init - set function pointers 82 * 83 * @handle: amdgpu_device pointer 84 * 85 * Set ring and irq function pointers 86 */ 87 static int vcn_v3_0_early_init(void *handle) 88 { 89 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 90 int i; 91 92 if (amdgpu_sriov_vf(adev)) { 93 for (i = 0; i < VCN_INSTANCES_SIENNA_CICHLID; i++) 94 if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) 95 adev->vcn.num_vcn_inst++; 96 adev->vcn.harvest_config = 0; 97 adev->vcn.num_enc_rings = 1; 98 99 } else { 100 if (adev->asic_type == CHIP_SIENNA_CICHLID) { 101 u32 harvest; 102 103 adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; 104 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 105 harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); 106 if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) 107 adev->vcn.harvest_config |= 1 << i; 108 } 109 110 if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | 111 AMDGPU_VCN_HARVEST_VCN1)) 112 /* both instances are harvested, disable the block */ 113 return -ENOENT; 114 } else 115 adev->vcn.num_vcn_inst = 1; 116 117 if (adev->asic_type == CHIP_BEIGE_GOBY) 118 adev->vcn.num_enc_rings = 0; 119 else 120 adev->vcn.num_enc_rings = 2; 121 } 122 123 vcn_v3_0_set_dec_ring_funcs(adev); 124 vcn_v3_0_set_enc_ring_funcs(adev); 125 vcn_v3_0_set_irq_funcs(adev); 126 127 return 0; 128 } 129 130 /** 131 * vcn_v3_0_sw_init - sw init for VCN block 132 * 133 * @handle: amdgpu_device pointer 134 * 135 * Load firmware and sw initialization 136 */ 137 static int vcn_v3_0_sw_init(void *handle) 138 { 139 struct amdgpu_ring *ring; 140 int i, j, r; 141 int vcn_doorbell_index = 0; 142 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 143 144 r = amdgpu_vcn_sw_init(adev); 145 if (r) 146 return r; 147 148 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 149 const struct common_firmware_header *hdr; 150 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 151 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; 152 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; 153 adev->firmware.fw_size += 154 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 155 156 if ((adev->vcn.num_vcn_inst == VCN_INSTANCES_SIENNA_CICHLID) || 157 (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)) { 158 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; 159 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; 160 adev->firmware.fw_size += 161 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 162 } 163 DRM_INFO("PSP loading VCN firmware\n"); 164 } 165 166 r = amdgpu_vcn_resume(adev); 167 if (r) 168 return r; 169 170 /* 171 * Note: doorbell assignment is fixed for SRIOV multiple VCN engines 172 * Formula: 173 * vcn_db_base = adev->doorbell_index.vcn.vcn_ring0_1 << 1; 174 * dec_ring_i = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) 175 * enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j 176 */ 177 if (amdgpu_sriov_vf(adev)) { 178 vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1; 179 /* get DWORD offset */ 180 vcn_doorbell_index = vcn_doorbell_index << 1; 181 } 182 183 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 184 volatile struct amdgpu_fw_shared *fw_shared; 185 186 if (adev->vcn.harvest_config & (1 << i)) 187 continue; 188 189 adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; 190 adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; 191 adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; 192 adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; 193 adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; 194 adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; 195 196 adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; 197 adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9); 198 adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; 199 adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0); 200 adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; 201 adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1); 202 adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; 203 adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD); 204 adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; 205 adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP); 206 207 /* VCN DEC TRAP */ 208 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], 209 VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[i].irq); 210 if (r) 211 return r; 212 213 atomic_set(&adev->vcn.inst[i].sched_score, 0); 214 215 ring = &adev->vcn.inst[i].ring_dec; 216 ring->use_doorbell = true; 217 if (amdgpu_sriov_vf(adev)) { 218 ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1); 219 } else { 220 ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; 221 } 222 sprintf(ring->name, "vcn_dec_%d", i); 223 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, 224 AMDGPU_RING_PRIO_DEFAULT, 225 &adev->vcn.inst[i].sched_score); 226 if (r) 227 return r; 228 229 for (j = 0; j < adev->vcn.num_enc_rings; ++j) { 230 /* VCN ENC TRAP */ 231 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], 232 j + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); 233 if (r) 234 return r; 235 236 ring = &adev->vcn.inst[i].ring_enc[j]; 237 ring->use_doorbell = true; 238 if (amdgpu_sriov_vf(adev)) { 239 ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j; 240 } else { 241 ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; 242 } 243 sprintf(ring->name, "vcn_enc_%d.%d", i, j); 244 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, 245 AMDGPU_RING_PRIO_DEFAULT, 246 &adev->vcn.inst[i].sched_score); 247 if (r) 248 return r; 249 } 250 251 fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; 252 fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) | 253 cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) | 254 cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB); 255 fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); 256 } 257 258 if (amdgpu_sriov_vf(adev)) { 259 r = amdgpu_virt_alloc_mm_table(adev); 260 if (r) 261 return r; 262 } 263 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) 264 adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; 265 266 return 0; 267 } 268 269 /** 270 * vcn_v3_0_sw_fini - sw fini for VCN block 271 * 272 * @handle: amdgpu_device pointer 273 * 274 * VCN suspend and free up sw allocation 275 */ 276 static int vcn_v3_0_sw_fini(void *handle) 277 { 278 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 279 int i, r, idx; 280 281 if (drm_dev_enter(&adev->ddev, &idx)) { 282 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 283 volatile struct amdgpu_fw_shared *fw_shared; 284 285 if (adev->vcn.harvest_config & (1 << i)) 286 continue; 287 fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; 288 fw_shared->present_flag_0 = 0; 289 fw_shared->sw_ring.is_enabled = false; 290 } 291 292 drm_dev_exit(idx); 293 } 294 295 if (amdgpu_sriov_vf(adev)) 296 amdgpu_virt_free_mm_table(adev); 297 298 r = amdgpu_vcn_suspend(adev); 299 if (r) 300 return r; 301 302 r = amdgpu_vcn_sw_fini(adev); 303 304 return r; 305 } 306 307 /** 308 * vcn_v3_0_hw_init - start and test VCN block 309 * 310 * @handle: amdgpu_device pointer 311 * 312 * Initialize the hardware, boot up the VCPU and do some testing 313 */ 314 static int vcn_v3_0_hw_init(void *handle) 315 { 316 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 317 struct amdgpu_ring *ring; 318 int i, j, r; 319 320 if (amdgpu_sriov_vf(adev)) { 321 r = vcn_v3_0_start_sriov(adev); 322 if (r) 323 goto done; 324 325 /* initialize VCN dec and enc ring buffers */ 326 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 327 if (adev->vcn.harvest_config & (1 << i)) 328 continue; 329 330 ring = &adev->vcn.inst[i].ring_dec; 331 ring->wptr = 0; 332 ring->wptr_old = 0; 333 vcn_v3_0_dec_ring_set_wptr(ring); 334 ring->sched.ready = true; 335 336 for (j = 0; j < adev->vcn.num_enc_rings; ++j) { 337 ring = &adev->vcn.inst[i].ring_enc[j]; 338 ring->wptr = 0; 339 ring->wptr_old = 0; 340 vcn_v3_0_enc_ring_set_wptr(ring); 341 ring->sched.ready = true; 342 } 343 } 344 } else { 345 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 346 if (adev->vcn.harvest_config & (1 << i)) 347 continue; 348 349 ring = &adev->vcn.inst[i].ring_dec; 350 351 adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, 352 ring->doorbell_index, i); 353 354 r = amdgpu_ring_test_helper(ring); 355 if (r) 356 goto done; 357 358 for (j = 0; j < adev->vcn.num_enc_rings; ++j) { 359 ring = &adev->vcn.inst[i].ring_enc[j]; 360 r = amdgpu_ring_test_helper(ring); 361 if (r) 362 goto done; 363 } 364 } 365 } 366 367 done: 368 if (!r) 369 DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", 370 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); 371 372 return r; 373 } 374 375 /** 376 * vcn_v3_0_hw_fini - stop the hardware block 377 * 378 * @handle: amdgpu_device pointer 379 * 380 * Stop the VCN block, mark ring as not ready any more 381 */ 382 static int vcn_v3_0_hw_fini(void *handle) 383 { 384 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 385 int i; 386 387 cancel_delayed_work_sync(&adev->vcn.idle_work); 388 389 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 390 if (adev->vcn.harvest_config & (1 << i)) 391 continue; 392 393 if (!amdgpu_sriov_vf(adev)) { 394 if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || 395 (adev->vcn.cur_state != AMD_PG_STATE_GATE && 396 RREG32_SOC15(VCN, i, mmUVD_STATUS))) { 397 vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); 398 } 399 } 400 } 401 402 return 0; 403 } 404 405 /** 406 * vcn_v3_0_suspend - suspend VCN block 407 * 408 * @handle: amdgpu_device pointer 409 * 410 * HW fini and suspend VCN block 411 */ 412 static int vcn_v3_0_suspend(void *handle) 413 { 414 int r; 415 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 416 417 r = vcn_v3_0_hw_fini(adev); 418 if (r) 419 return r; 420 421 r = amdgpu_vcn_suspend(adev); 422 423 return r; 424 } 425 426 /** 427 * vcn_v3_0_resume - resume VCN block 428 * 429 * @handle: amdgpu_device pointer 430 * 431 * Resume firmware and hw init VCN block 432 */ 433 static int vcn_v3_0_resume(void *handle) 434 { 435 int r; 436 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 437 438 r = amdgpu_vcn_resume(adev); 439 if (r) 440 return r; 441 442 r = vcn_v3_0_hw_init(adev); 443 444 return r; 445 } 446 447 /** 448 * vcn_v3_0_mc_resume - memory controller programming 449 * 450 * @adev: amdgpu_device pointer 451 * @inst: instance number 452 * 453 * Let the VCN memory controller know it's offsets 454 */ 455 static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) 456 { 457 uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); 458 uint32_t offset; 459 460 /* cache window 0: fw */ 461 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 462 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 463 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); 464 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 465 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); 466 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, 0); 467 offset = 0; 468 } else { 469 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 470 lower_32_bits(adev->vcn.inst[inst].gpu_addr)); 471 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 472 upper_32_bits(adev->vcn.inst[inst].gpu_addr)); 473 offset = size; 474 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, 475 AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 476 } 477 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE0, size); 478 479 /* cache window 1: stack */ 480 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, 481 lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); 482 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, 483 upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); 484 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET1, 0); 485 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); 486 487 /* cache window 2: context */ 488 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, 489 lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); 490 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, 491 upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); 492 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0); 493 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); 494 495 /* non-cache window */ 496 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, 497 lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); 498 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, 499 upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); 500 WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0); 501 WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0, 502 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); 503 } 504 505 static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) 506 { 507 uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); 508 uint32_t offset; 509 510 /* cache window 0: fw */ 511 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 512 if (!indirect) { 513 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 514 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 515 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); 516 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 517 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 518 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); 519 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 520 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); 521 } else { 522 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 523 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); 524 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 525 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); 526 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 527 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); 528 } 529 offset = 0; 530 } else { 531 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 532 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 533 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); 534 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 535 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 536 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); 537 offset = size; 538 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 539 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 540 AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); 541 } 542 543 if (!indirect) 544 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 545 VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); 546 else 547 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 548 VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); 549 550 /* cache window 1: stack */ 551 if (!indirect) { 552 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 553 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 554 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); 555 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 556 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 557 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); 558 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 559 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); 560 } else { 561 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 562 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); 563 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 564 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); 565 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 566 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); 567 } 568 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 569 VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); 570 571 /* cache window 2: context */ 572 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 573 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), 574 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); 575 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 576 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), 577 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); 578 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 579 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); 580 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 581 VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); 582 583 /* non-cache window */ 584 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 585 VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 586 lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); 587 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 588 VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 589 upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); 590 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 591 VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); 592 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 593 VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 594 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); 595 596 /* VCN global tiling registers */ 597 WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( 598 UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); 599 } 600 601 static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) 602 { 603 uint32_t data = 0; 604 605 if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { 606 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT 607 | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT 608 | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT 609 | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT 610 | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT 611 | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT 612 | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT 613 | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT 614 | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT 615 | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT 616 | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT 617 | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT 618 | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT 619 | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT); 620 621 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data); 622 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, 623 UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF); 624 } else { 625 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT 626 | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT 627 | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT 628 | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT 629 | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT 630 | 1 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT 631 | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT 632 | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT 633 | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT 634 | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT 635 | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT 636 | 1 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT 637 | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT 638 | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT); 639 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data); 640 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, 0, 0x3F3FFFFF); 641 } 642 643 data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS); 644 data &= ~0x103; 645 if (adev->pg_flags & AMD_PG_SUPPORT_VCN) 646 data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | 647 UVD_POWER_STATUS__UVD_PG_EN_MASK; 648 649 WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data); 650 } 651 652 static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst) 653 { 654 uint32_t data; 655 656 if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { 657 /* Before power off, this indicator has to be turned on */ 658 data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS); 659 data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; 660 data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; 661 WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data); 662 663 data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT 664 | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT 665 | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT 666 | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT 667 | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT 668 | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT 669 | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT 670 | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT 671 | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT 672 | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT 673 | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT 674 | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT 675 | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT 676 | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT); 677 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data); 678 679 data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT 680 | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT 681 | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT 682 | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT 683 | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT 684 | 2 << UVD_PGFSM_STATUS__UVDIRL_PWR_STATUS__SHIFT 685 | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT 686 | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT 687 | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT 688 | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT 689 | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT 690 | 2 << UVD_PGFSM_STATUS__UVDATD_PWR_STATUS__SHIFT 691 | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT 692 | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT); 693 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, data, 0x3F3FFFFF); 694 } 695 } 696 697 /** 698 * vcn_v3_0_disable_clock_gating - disable VCN clock gating 699 * 700 * @adev: amdgpu_device pointer 701 * @inst: instance number 702 * 703 * Disable clock gating for VCN block 704 */ 705 static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst) 706 { 707 uint32_t data; 708 709 /* VCN disable CGC */ 710 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL); 711 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) 712 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 713 else 714 data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; 715 data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 716 data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 717 WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data); 718 719 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_GATE); 720 data &= ~(UVD_CGC_GATE__SYS_MASK 721 | UVD_CGC_GATE__UDEC_MASK 722 | UVD_CGC_GATE__MPEG2_MASK 723 | UVD_CGC_GATE__REGS_MASK 724 | UVD_CGC_GATE__RBC_MASK 725 | UVD_CGC_GATE__LMI_MC_MASK 726 | UVD_CGC_GATE__LMI_UMC_MASK 727 | UVD_CGC_GATE__IDCT_MASK 728 | UVD_CGC_GATE__MPRD_MASK 729 | UVD_CGC_GATE__MPC_MASK 730 | UVD_CGC_GATE__LBSI_MASK 731 | UVD_CGC_GATE__LRBBM_MASK 732 | UVD_CGC_GATE__UDEC_RE_MASK 733 | UVD_CGC_GATE__UDEC_CM_MASK 734 | UVD_CGC_GATE__UDEC_IT_MASK 735 | UVD_CGC_GATE__UDEC_DB_MASK 736 | UVD_CGC_GATE__UDEC_MP_MASK 737 | UVD_CGC_GATE__WCB_MASK 738 | UVD_CGC_GATE__VCPU_MASK 739 | UVD_CGC_GATE__MMSCH_MASK); 740 741 WREG32_SOC15(VCN, inst, mmUVD_CGC_GATE, data); 742 743 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_CGC_GATE, 0, 0xFFFFFFFF); 744 745 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL); 746 data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK 747 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK 748 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK 749 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK 750 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK 751 | UVD_CGC_CTRL__SYS_MODE_MASK 752 | UVD_CGC_CTRL__UDEC_MODE_MASK 753 | UVD_CGC_CTRL__MPEG2_MODE_MASK 754 | UVD_CGC_CTRL__REGS_MODE_MASK 755 | UVD_CGC_CTRL__RBC_MODE_MASK 756 | UVD_CGC_CTRL__LMI_MC_MODE_MASK 757 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK 758 | UVD_CGC_CTRL__IDCT_MODE_MASK 759 | UVD_CGC_CTRL__MPRD_MODE_MASK 760 | UVD_CGC_CTRL__MPC_MODE_MASK 761 | UVD_CGC_CTRL__LBSI_MODE_MASK 762 | UVD_CGC_CTRL__LRBBM_MODE_MASK 763 | UVD_CGC_CTRL__WCB_MODE_MASK 764 | UVD_CGC_CTRL__VCPU_MODE_MASK 765 | UVD_CGC_CTRL__MMSCH_MODE_MASK); 766 WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data); 767 768 data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE); 769 data |= (UVD_SUVD_CGC_GATE__SRE_MASK 770 | UVD_SUVD_CGC_GATE__SIT_MASK 771 | UVD_SUVD_CGC_GATE__SMP_MASK 772 | UVD_SUVD_CGC_GATE__SCM_MASK 773 | UVD_SUVD_CGC_GATE__SDB_MASK 774 | UVD_SUVD_CGC_GATE__SRE_H264_MASK 775 | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK 776 | UVD_SUVD_CGC_GATE__SIT_H264_MASK 777 | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK 778 | UVD_SUVD_CGC_GATE__SCM_H264_MASK 779 | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK 780 | UVD_SUVD_CGC_GATE__SDB_H264_MASK 781 | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK 782 | UVD_SUVD_CGC_GATE__SCLR_MASK 783 | UVD_SUVD_CGC_GATE__ENT_MASK 784 | UVD_SUVD_CGC_GATE__IME_MASK 785 | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK 786 | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK 787 | UVD_SUVD_CGC_GATE__SITE_MASK 788 | UVD_SUVD_CGC_GATE__SRE_VP9_MASK 789 | UVD_SUVD_CGC_GATE__SCM_VP9_MASK 790 | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK 791 | UVD_SUVD_CGC_GATE__SDB_VP9_MASK 792 | UVD_SUVD_CGC_GATE__IME_HEVC_MASK 793 | UVD_SUVD_CGC_GATE__EFC_MASK 794 | UVD_SUVD_CGC_GATE__SAOE_MASK 795 | UVD_SUVD_CGC_GATE__SRE_AV1_MASK 796 | UVD_SUVD_CGC_GATE__FBC_PCLK_MASK 797 | UVD_SUVD_CGC_GATE__FBC_CCLK_MASK 798 | UVD_SUVD_CGC_GATE__SCM_AV1_MASK 799 | UVD_SUVD_CGC_GATE__SMPA_MASK); 800 WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data); 801 802 data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2); 803 data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK 804 | UVD_SUVD_CGC_GATE2__MPBE1_MASK 805 | UVD_SUVD_CGC_GATE2__SIT_AV1_MASK 806 | UVD_SUVD_CGC_GATE2__SDB_AV1_MASK 807 | UVD_SUVD_CGC_GATE2__MPC1_MASK); 808 WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data); 809 810 data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL); 811 data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK 812 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK 813 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK 814 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK 815 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK 816 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK 817 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK 818 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK 819 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK 820 | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK 821 | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK 822 | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK 823 | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK 824 | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK 825 | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK 826 | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK 827 | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK 828 | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK 829 | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK); 830 WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data); 831 } 832 833 static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev, 834 uint8_t sram_sel, int inst_idx, uint8_t indirect) 835 { 836 uint32_t reg_data = 0; 837 838 /* enable sw clock gating control */ 839 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) 840 reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 841 else 842 reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 843 reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 844 reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 845 reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | 846 UVD_CGC_CTRL__UDEC_CM_MODE_MASK | 847 UVD_CGC_CTRL__UDEC_IT_MODE_MASK | 848 UVD_CGC_CTRL__UDEC_DB_MODE_MASK | 849 UVD_CGC_CTRL__UDEC_MP_MODE_MASK | 850 UVD_CGC_CTRL__SYS_MODE_MASK | 851 UVD_CGC_CTRL__UDEC_MODE_MASK | 852 UVD_CGC_CTRL__MPEG2_MODE_MASK | 853 UVD_CGC_CTRL__REGS_MODE_MASK | 854 UVD_CGC_CTRL__RBC_MODE_MASK | 855 UVD_CGC_CTRL__LMI_MC_MODE_MASK | 856 UVD_CGC_CTRL__LMI_UMC_MODE_MASK | 857 UVD_CGC_CTRL__IDCT_MODE_MASK | 858 UVD_CGC_CTRL__MPRD_MODE_MASK | 859 UVD_CGC_CTRL__MPC_MODE_MASK | 860 UVD_CGC_CTRL__LBSI_MODE_MASK | 861 UVD_CGC_CTRL__LRBBM_MODE_MASK | 862 UVD_CGC_CTRL__WCB_MODE_MASK | 863 UVD_CGC_CTRL__VCPU_MODE_MASK | 864 UVD_CGC_CTRL__MMSCH_MODE_MASK); 865 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 866 VCN, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); 867 868 /* turn off clock gating */ 869 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 870 VCN, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect); 871 872 /* turn on SUVD clock gating */ 873 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 874 VCN, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); 875 876 /* turn on sw mode in UVD_SUVD_CGC_CTRL */ 877 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 878 VCN, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); 879 } 880 881 /** 882 * vcn_v3_0_enable_clock_gating - enable VCN clock gating 883 * 884 * @adev: amdgpu_device pointer 885 * @inst: instance number 886 * 887 * Enable clock gating for VCN block 888 */ 889 static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) 890 { 891 uint32_t data; 892 893 /* enable VCN CGC */ 894 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL); 895 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) 896 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 897 else 898 data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 899 data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 900 data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 901 WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data); 902 903 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL); 904 data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK 905 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK 906 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK 907 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK 908 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK 909 | UVD_CGC_CTRL__SYS_MODE_MASK 910 | UVD_CGC_CTRL__UDEC_MODE_MASK 911 | UVD_CGC_CTRL__MPEG2_MODE_MASK 912 | UVD_CGC_CTRL__REGS_MODE_MASK 913 | UVD_CGC_CTRL__RBC_MODE_MASK 914 | UVD_CGC_CTRL__LMI_MC_MODE_MASK 915 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK 916 | UVD_CGC_CTRL__IDCT_MODE_MASK 917 | UVD_CGC_CTRL__MPRD_MODE_MASK 918 | UVD_CGC_CTRL__MPC_MODE_MASK 919 | UVD_CGC_CTRL__LBSI_MODE_MASK 920 | UVD_CGC_CTRL__LRBBM_MODE_MASK 921 | UVD_CGC_CTRL__WCB_MODE_MASK 922 | UVD_CGC_CTRL__VCPU_MODE_MASK 923 | UVD_CGC_CTRL__MMSCH_MODE_MASK); 924 WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data); 925 926 data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL); 927 data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK 928 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK 929 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK 930 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK 931 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK 932 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK 933 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK 934 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK 935 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK 936 | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK 937 | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK 938 | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK 939 | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK 940 | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK 941 | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK 942 | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK 943 | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK 944 | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK 945 | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK); 946 WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data); 947 } 948 949 static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) 950 { 951 volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; 952 struct amdgpu_ring *ring; 953 uint32_t rb_bufsz, tmp; 954 955 /* disable register anti-hang mechanism */ 956 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1, 957 ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 958 /* enable dynamic power gating mode */ 959 tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS); 960 tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; 961 tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; 962 WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp); 963 964 if (indirect) 965 adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; 966 967 /* enable clock gating */ 968 vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); 969 970 /* enable VCPU clock */ 971 tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); 972 tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; 973 tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; 974 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 975 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); 976 977 /* disable master interupt */ 978 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 979 VCN, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect); 980 981 /* setup mmUVD_LMI_CTRL */ 982 tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 983 UVD_LMI_CTRL__REQ_MODE_MASK | 984 UVD_LMI_CTRL__CRC_RESET_MASK | 985 UVD_LMI_CTRL__MASK_MC_URGENT_MASK | 986 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 987 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | 988 (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 989 0x00100000L); 990 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 991 VCN, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect); 992 993 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 994 VCN, inst_idx, mmUVD_MPC_CNTL), 995 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); 996 997 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 998 VCN, inst_idx, mmUVD_MPC_SET_MUXA0), 999 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | 1000 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | 1001 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | 1002 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); 1003 1004 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1005 VCN, inst_idx, mmUVD_MPC_SET_MUXB0), 1006 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | 1007 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | 1008 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | 1009 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); 1010 1011 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1012 VCN, inst_idx, mmUVD_MPC_SET_MUX), 1013 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | 1014 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | 1015 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); 1016 1017 vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect); 1018 1019 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1020 VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect); 1021 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1022 VCN, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); 1023 1024 /* enable LMI MC and UMC channels */ 1025 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1026 VCN, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect); 1027 1028 /* unblock VCPU register access */ 1029 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1030 VCN, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect); 1031 1032 tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); 1033 tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; 1034 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1035 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); 1036 1037 /* enable master interrupt */ 1038 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1039 VCN, inst_idx, mmUVD_MASTINT_EN), 1040 UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); 1041 1042 /* add nop to workaround PSP size check */ 1043 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( 1044 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); 1045 1046 if (indirect) 1047 psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, 1048 (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - 1049 (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); 1050 1051 ring = &adev->vcn.inst[inst_idx].ring_dec; 1052 /* force RBC into idle state */ 1053 rb_bufsz = order_base_2(ring->ring_size); 1054 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); 1055 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); 1056 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); 1057 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); 1058 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); 1059 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp); 1060 1061 /* Stall DPG before WPTR/RPTR reset */ 1062 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1063 UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, 1064 ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); 1065 fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); 1066 1067 /* set the write pointer delay */ 1068 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); 1069 1070 /* set the wb address */ 1071 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, 1072 (upper_32_bits(ring->gpu_addr) >> 2)); 1073 1074 /* programm the RB_BASE for ring buffer */ 1075 WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, 1076 lower_32_bits(ring->gpu_addr)); 1077 WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, 1078 upper_32_bits(ring->gpu_addr)); 1079 1080 /* Initialize the ring buffer's read and write pointers */ 1081 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0); 1082 1083 WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0); 1084 1085 ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR); 1086 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, 1087 lower_32_bits(ring->wptr)); 1088 1089 /* Reset FW shared memory RBC WPTR/RPTR */ 1090 fw_shared->rb.rptr = 0; 1091 fw_shared->rb.wptr = lower_32_bits(ring->wptr); 1092 1093 /*resetting done, fw can check RB ring */ 1094 fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); 1095 1096 /* Unstall DPG */ 1097 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1098 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); 1099 1100 return 0; 1101 } 1102 1103 static int vcn_v3_0_start(struct amdgpu_device *adev) 1104 { 1105 volatile struct amdgpu_fw_shared *fw_shared; 1106 struct amdgpu_ring *ring; 1107 uint32_t rb_bufsz, tmp; 1108 int i, j, k, r; 1109 1110 if (adev->pm.dpm_enabled) 1111 amdgpu_dpm_enable_uvd(adev, true); 1112 1113 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1114 if (adev->vcn.harvest_config & (1 << i)) 1115 continue; 1116 1117 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ 1118 r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); 1119 continue; 1120 } 1121 1122 /* disable VCN power gating */ 1123 vcn_v3_0_disable_static_power_gating(adev, i); 1124 1125 /* set VCN status busy */ 1126 tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; 1127 WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp); 1128 1129 /*SW clock gating */ 1130 vcn_v3_0_disable_clock_gating(adev, i); 1131 1132 /* enable VCPU clock */ 1133 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 1134 UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); 1135 1136 /* disable master interrupt */ 1137 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0, 1138 ~UVD_MASTINT_EN__VCPU_EN_MASK); 1139 1140 /* enable LMI MC and UMC channels */ 1141 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0, 1142 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 1143 1144 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); 1145 tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; 1146 tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; 1147 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); 1148 1149 /* setup mmUVD_LMI_CTRL */ 1150 tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL); 1151 WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 1152 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 1153 UVD_LMI_CTRL__MASK_MC_URGENT_MASK | 1154 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 1155 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); 1156 1157 /* setup mmUVD_MPC_CNTL */ 1158 tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL); 1159 tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; 1160 tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; 1161 WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); 1162 1163 /* setup UVD_MPC_SET_MUXA0 */ 1164 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0, 1165 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | 1166 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | 1167 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | 1168 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); 1169 1170 /* setup UVD_MPC_SET_MUXB0 */ 1171 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0, 1172 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | 1173 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | 1174 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | 1175 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); 1176 1177 /* setup mmUVD_MPC_SET_MUX */ 1178 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX, 1179 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | 1180 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | 1181 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); 1182 1183 vcn_v3_0_mc_resume(adev, i); 1184 1185 /* VCN global tiling registers */ 1186 WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG, 1187 adev->gfx.config.gb_addr_config); 1188 1189 /* unblock VCPU register access */ 1190 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0, 1191 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); 1192 1193 /* release VCPU reset to boot */ 1194 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, 1195 ~UVD_VCPU_CNTL__BLK_RST_MASK); 1196 1197 for (j = 0; j < 10; ++j) { 1198 uint32_t status; 1199 1200 for (k = 0; k < 100; ++k) { 1201 status = RREG32_SOC15(VCN, i, mmUVD_STATUS); 1202 if (status & 2) 1203 break; 1204 mdelay(10); 1205 } 1206 r = 0; 1207 if (status & 2) 1208 break; 1209 1210 DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); 1211 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 1212 UVD_VCPU_CNTL__BLK_RST_MASK, 1213 ~UVD_VCPU_CNTL__BLK_RST_MASK); 1214 mdelay(10); 1215 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, 1216 ~UVD_VCPU_CNTL__BLK_RST_MASK); 1217 1218 mdelay(10); 1219 r = -1; 1220 } 1221 1222 if (r) { 1223 DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i); 1224 return r; 1225 } 1226 1227 /* enable master interrupt */ 1228 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 1229 UVD_MASTINT_EN__VCPU_EN_MASK, 1230 ~UVD_MASTINT_EN__VCPU_EN_MASK); 1231 1232 /* clear the busy bit of VCN_STATUS */ 1233 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0, 1234 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); 1235 1236 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0); 1237 1238 ring = &adev->vcn.inst[i].ring_dec; 1239 /* force RBC into idle state */ 1240 rb_bufsz = order_base_2(ring->ring_size); 1241 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); 1242 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); 1243 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); 1244 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); 1245 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); 1246 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); 1247 1248 fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; 1249 fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); 1250 1251 /* programm the RB_BASE for ring buffer */ 1252 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, 1253 lower_32_bits(ring->gpu_addr)); 1254 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, 1255 upper_32_bits(ring->gpu_addr)); 1256 1257 /* Initialize the ring buffer's read and write pointers */ 1258 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0); 1259 1260 WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0); 1261 ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); 1262 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, 1263 lower_32_bits(ring->wptr)); 1264 fw_shared->rb.wptr = lower_32_bits(ring->wptr); 1265 fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); 1266 1267 if (adev->asic_type != CHIP_BEIGE_GOBY) { 1268 fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); 1269 ring = &adev->vcn.inst[i].ring_enc[0]; 1270 WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); 1271 WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); 1272 WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); 1273 WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); 1274 WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); 1275 fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); 1276 1277 fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); 1278 ring = &adev->vcn.inst[i].ring_enc[1]; 1279 WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); 1280 WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); 1281 WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); 1282 WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); 1283 WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); 1284 fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); 1285 } 1286 } 1287 1288 return 0; 1289 } 1290 1291 static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) 1292 { 1293 int i, j; 1294 struct amdgpu_ring *ring; 1295 uint64_t cache_addr; 1296 uint64_t rb_addr; 1297 uint64_t ctx_addr; 1298 uint32_t param, resp, expected; 1299 uint32_t offset, cache_size; 1300 uint32_t tmp, timeout; 1301 uint32_t id; 1302 1303 struct amdgpu_mm_table *table = &adev->virt.mm_table; 1304 uint32_t *table_loc; 1305 uint32_t table_size; 1306 uint32_t size, size_dw; 1307 1308 struct mmsch_v3_0_cmd_direct_write 1309 direct_wt = { {0} }; 1310 struct mmsch_v3_0_cmd_direct_read_modify_write 1311 direct_rd_mod_wt = { {0} }; 1312 struct mmsch_v3_0_cmd_end end = { {0} }; 1313 struct mmsch_v3_0_init_header header; 1314 1315 direct_wt.cmd_header.command_type = 1316 MMSCH_COMMAND__DIRECT_REG_WRITE; 1317 direct_rd_mod_wt.cmd_header.command_type = 1318 MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 1319 end.cmd_header.command_type = 1320 MMSCH_COMMAND__END; 1321 1322 header.version = MMSCH_VERSION; 1323 header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2; 1324 for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { 1325 header.inst[i].init_status = 0; 1326 header.inst[i].table_offset = 0; 1327 header.inst[i].table_size = 0; 1328 } 1329 1330 table_loc = (uint32_t *)table->cpu_addr; 1331 table_loc += header.total_size; 1332 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 1333 if (adev->vcn.harvest_config & (1 << i)) 1334 continue; 1335 1336 table_size = 0; 1337 1338 MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, 1339 mmUVD_STATUS), 1340 ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); 1341 1342 cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); 1343 1344 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1345 id = amdgpu_ucode_id_vcns[i]; 1346 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1347 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 1348 adev->firmware.ucode[id].tmr_mc_addr_lo); 1349 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1350 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 1351 adev->firmware.ucode[id].tmr_mc_addr_hi); 1352 offset = 0; 1353 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1354 mmUVD_VCPU_CACHE_OFFSET0), 1355 0); 1356 } else { 1357 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1358 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 1359 lower_32_bits(adev->vcn.inst[i].gpu_addr)); 1360 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1361 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 1362 upper_32_bits(adev->vcn.inst[i].gpu_addr)); 1363 offset = cache_size; 1364 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1365 mmUVD_VCPU_CACHE_OFFSET0), 1366 AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 1367 } 1368 1369 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1370 mmUVD_VCPU_CACHE_SIZE0), 1371 cache_size); 1372 1373 cache_addr = adev->vcn.inst[i].gpu_addr + offset; 1374 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1375 mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 1376 lower_32_bits(cache_addr)); 1377 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1378 mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 1379 upper_32_bits(cache_addr)); 1380 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1381 mmUVD_VCPU_CACHE_OFFSET1), 1382 0); 1383 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1384 mmUVD_VCPU_CACHE_SIZE1), 1385 AMDGPU_VCN_STACK_SIZE); 1386 1387 cache_addr = adev->vcn.inst[i].gpu_addr + offset + 1388 AMDGPU_VCN_STACK_SIZE; 1389 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1390 mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), 1391 lower_32_bits(cache_addr)); 1392 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1393 mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), 1394 upper_32_bits(cache_addr)); 1395 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1396 mmUVD_VCPU_CACHE_OFFSET2), 1397 0); 1398 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1399 mmUVD_VCPU_CACHE_SIZE2), 1400 AMDGPU_VCN_CONTEXT_SIZE); 1401 1402 for (j = 0; j < adev->vcn.num_enc_rings; ++j) { 1403 ring = &adev->vcn.inst[i].ring_enc[j]; 1404 ring->wptr = 0; 1405 rb_addr = ring->gpu_addr; 1406 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1407 mmUVD_RB_BASE_LO), 1408 lower_32_bits(rb_addr)); 1409 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1410 mmUVD_RB_BASE_HI), 1411 upper_32_bits(rb_addr)); 1412 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1413 mmUVD_RB_SIZE), 1414 ring->ring_size / 4); 1415 } 1416 1417 ring = &adev->vcn.inst[i].ring_dec; 1418 ring->wptr = 0; 1419 rb_addr = ring->gpu_addr; 1420 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1421 mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), 1422 lower_32_bits(rb_addr)); 1423 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1424 mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), 1425 upper_32_bits(rb_addr)); 1426 /* force RBC into idle state */ 1427 tmp = order_base_2(ring->ring_size); 1428 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp); 1429 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); 1430 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); 1431 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); 1432 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); 1433 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, 1434 mmUVD_RBC_RB_CNTL), 1435 tmp); 1436 1437 /* add end packet */ 1438 MMSCH_V3_0_INSERT_END(); 1439 1440 /* refine header */ 1441 header.inst[i].init_status = 0; 1442 header.inst[i].table_offset = header.total_size; 1443 header.inst[i].table_size = table_size; 1444 header.total_size += table_size; 1445 } 1446 1447 /* Update init table header in memory */ 1448 size = sizeof(struct mmsch_v3_0_init_header); 1449 table_loc = (uint32_t *)table->cpu_addr; 1450 memcpy((void *)table_loc, &header, size); 1451 1452 /* message MMSCH (in VCN[0]) to initialize this client 1453 * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr 1454 * of memory descriptor location 1455 */ 1456 ctx_addr = table->gpu_addr; 1457 WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); 1458 WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); 1459 1460 /* 2, update vmid of descriptor */ 1461 tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID); 1462 tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; 1463 /* use domain0 for MM scheduler */ 1464 tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); 1465 WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp); 1466 1467 /* 3, notify mmsch about the size of this descriptor */ 1468 size = header.total_size; 1469 WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size); 1470 1471 /* 4, set resp to zero */ 1472 WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0); 1473 1474 /* 5, kick off the initialization and wait until 1475 * MMSCH_VF_MAILBOX_RESP becomes non-zero 1476 */ 1477 param = 0x10000001; 1478 WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param); 1479 tmp = 0; 1480 timeout = 1000; 1481 resp = 0; 1482 expected = param + 1; 1483 while (resp != expected) { 1484 resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP); 1485 if (resp == expected) 1486 break; 1487 1488 udelay(10); 1489 tmp = tmp + 10; 1490 if (tmp >= timeout) { 1491 DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ 1492 " waiting for mmMMSCH_VF_MAILBOX_RESP "\ 1493 "(expected=0x%08x, readback=0x%08x)\n", 1494 tmp, expected, resp); 1495 return -EBUSY; 1496 } 1497 } 1498 1499 return 0; 1500 } 1501 1502 static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) 1503 { 1504 uint32_t tmp; 1505 1506 /* Wait for power status to be 1 */ 1507 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, 1508 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 1509 1510 /* wait for read ptr to be equal to write ptr */ 1511 tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR); 1512 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF); 1513 1514 tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2); 1515 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF); 1516 1517 tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; 1518 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF); 1519 1520 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, 1521 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 1522 1523 /* disable dynamic power gating mode */ 1524 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, 1525 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); 1526 1527 return 0; 1528 } 1529 1530 static int vcn_v3_0_stop(struct amdgpu_device *adev) 1531 { 1532 uint32_t tmp; 1533 int i, r = 0; 1534 1535 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1536 if (adev->vcn.harvest_config & (1 << i)) 1537 continue; 1538 1539 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 1540 r = vcn_v3_0_stop_dpg_mode(adev, i); 1541 continue; 1542 } 1543 1544 /* wait for vcn idle */ 1545 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); 1546 if (r) 1547 return r; 1548 1549 tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | 1550 UVD_LMI_STATUS__READ_CLEAN_MASK | 1551 UVD_LMI_STATUS__WRITE_CLEAN_MASK | 1552 UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; 1553 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); 1554 if (r) 1555 return r; 1556 1557 /* disable LMI UMC channel */ 1558 tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); 1559 tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; 1560 WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); 1561 tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| 1562 UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; 1563 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); 1564 if (r) 1565 return r; 1566 1567 /* block VCPU register access */ 1568 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 1569 UVD_RB_ARB_CTRL__VCPU_DIS_MASK, 1570 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); 1571 1572 /* reset VCPU */ 1573 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 1574 UVD_VCPU_CNTL__BLK_RST_MASK, 1575 ~UVD_VCPU_CNTL__BLK_RST_MASK); 1576 1577 /* disable VCPU clock */ 1578 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, 1579 ~(UVD_VCPU_CNTL__CLK_EN_MASK)); 1580 1581 /* apply soft reset */ 1582 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); 1583 tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; 1584 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); 1585 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); 1586 tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; 1587 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); 1588 1589 /* clear status */ 1590 WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); 1591 1592 /* apply HW clock gating */ 1593 vcn_v3_0_enable_clock_gating(adev, i); 1594 1595 /* enable VCN power gating */ 1596 vcn_v3_0_enable_static_power_gating(adev, i); 1597 } 1598 1599 if (adev->pm.dpm_enabled) 1600 amdgpu_dpm_enable_uvd(adev, false); 1601 1602 return 0; 1603 } 1604 1605 static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, 1606 int inst_idx, struct dpg_pause_state *new_state) 1607 { 1608 volatile struct amdgpu_fw_shared *fw_shared; 1609 struct amdgpu_ring *ring; 1610 uint32_t reg_data = 0; 1611 int ret_code; 1612 1613 /* pause/unpause if state is changed */ 1614 if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { 1615 DRM_DEBUG("dpg pause state changed %d -> %d", 1616 adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); 1617 reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) & 1618 (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); 1619 1620 if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { 1621 ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1, 1622 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 1623 1624 if (!ret_code) { 1625 /* pause DPG */ 1626 reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; 1627 WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); 1628 1629 /* wait for ACK */ 1630 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE, 1631 UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, 1632 UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); 1633 1634 /* Stall DPG before WPTR/RPTR reset */ 1635 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1636 UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, 1637 ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); 1638 1639 if (adev->asic_type != CHIP_BEIGE_GOBY) { 1640 /* Restore */ 1641 fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; 1642 fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); 1643 ring = &adev->vcn.inst[inst_idx].ring_enc[0]; 1644 ring->wptr = 0; 1645 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); 1646 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); 1647 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); 1648 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); 1649 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); 1650 fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); 1651 1652 fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); 1653 ring = &adev->vcn.inst[inst_idx].ring_enc[1]; 1654 ring->wptr = 0; 1655 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); 1656 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); 1657 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); 1658 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); 1659 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); 1660 fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); 1661 1662 /* restore wptr/rptr with pointers saved in FW shared memory*/ 1663 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr); 1664 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr); 1665 } 1666 1667 /* Unstall DPG */ 1668 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1669 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); 1670 1671 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1672 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); 1673 } 1674 } else { 1675 /* unpause dpg, no need to wait */ 1676 reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; 1677 WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data); 1678 } 1679 adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; 1680 } 1681 1682 return 0; 1683 } 1684 1685 /** 1686 * vcn_v3_0_dec_ring_get_rptr - get read pointer 1687 * 1688 * @ring: amdgpu_ring pointer 1689 * 1690 * Returns the current hardware read pointer 1691 */ 1692 static uint64_t vcn_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring) 1693 { 1694 struct amdgpu_device *adev = ring->adev; 1695 1696 return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR); 1697 } 1698 1699 /** 1700 * vcn_v3_0_dec_ring_get_wptr - get write pointer 1701 * 1702 * @ring: amdgpu_ring pointer 1703 * 1704 * Returns the current hardware write pointer 1705 */ 1706 static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring) 1707 { 1708 struct amdgpu_device *adev = ring->adev; 1709 1710 if (ring->use_doorbell) 1711 return adev->wb.wb[ring->wptr_offs]; 1712 else 1713 return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR); 1714 } 1715 1716 /** 1717 * vcn_v3_0_dec_ring_set_wptr - set write pointer 1718 * 1719 * @ring: amdgpu_ring pointer 1720 * 1721 * Commits the write pointer to the hardware 1722 */ 1723 static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) 1724 { 1725 struct amdgpu_device *adev = ring->adev; 1726 volatile struct amdgpu_fw_shared *fw_shared; 1727 1728 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 1729 /*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */ 1730 fw_shared = adev->vcn.inst[ring->me].fw_shared_cpu_addr; 1731 fw_shared->rb.wptr = lower_32_bits(ring->wptr); 1732 WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, 1733 lower_32_bits(ring->wptr)); 1734 } 1735 1736 if (ring->use_doorbell) { 1737 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 1738 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 1739 } else { 1740 WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); 1741 } 1742 } 1743 1744 static void vcn_v3_0_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 1745 u64 seq, uint32_t flags) 1746 { 1747 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 1748 1749 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE); 1750 amdgpu_ring_write(ring, addr); 1751 amdgpu_ring_write(ring, upper_32_bits(addr)); 1752 amdgpu_ring_write(ring, seq); 1753 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP); 1754 } 1755 1756 static void vcn_v3_0_dec_sw_ring_insert_end(struct amdgpu_ring *ring) 1757 { 1758 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); 1759 } 1760 1761 static void vcn_v3_0_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, 1762 struct amdgpu_job *job, 1763 struct amdgpu_ib *ib, 1764 uint32_t flags) 1765 { 1766 uint32_t vmid = AMDGPU_JOB_GET_VMID(job); 1767 1768 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB); 1769 amdgpu_ring_write(ring, vmid); 1770 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 1771 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 1772 amdgpu_ring_write(ring, ib->length_dw); 1773 } 1774 1775 static void vcn_v3_0_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 1776 uint32_t val, uint32_t mask) 1777 { 1778 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT); 1779 amdgpu_ring_write(ring, reg << 2); 1780 amdgpu_ring_write(ring, mask); 1781 amdgpu_ring_write(ring, val); 1782 } 1783 1784 static void vcn_v3_0_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring, 1785 uint32_t vmid, uint64_t pd_addr) 1786 { 1787 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1788 uint32_t data0, data1, mask; 1789 1790 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 1791 1792 /* wait for register write */ 1793 data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; 1794 data1 = lower_32_bits(pd_addr); 1795 mask = 0xffffffff; 1796 vcn_v3_0_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask); 1797 } 1798 1799 static void vcn_v3_0_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) 1800 { 1801 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE); 1802 amdgpu_ring_write(ring, reg << 2); 1803 amdgpu_ring_write(ring, val); 1804 } 1805 1806 static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { 1807 .type = AMDGPU_RING_TYPE_VCN_DEC, 1808 .align_mask = 0x3f, 1809 .nop = VCN_DEC_SW_CMD_NO_OP, 1810 .vmhub = AMDGPU_MMHUB_0, 1811 .get_rptr = vcn_v3_0_dec_ring_get_rptr, 1812 .get_wptr = vcn_v3_0_dec_ring_get_wptr, 1813 .set_wptr = vcn_v3_0_dec_ring_set_wptr, 1814 .emit_frame_size = 1815 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1816 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1817 4 + /* vcn_v3_0_dec_sw_ring_emit_vm_flush */ 1818 5 + 5 + /* vcn_v3_0_dec_sw_ring_emit_fdec_swe x2 vm fdec_swe */ 1819 1, /* vcn_v3_0_dec_sw_ring_insert_end */ 1820 .emit_ib_size = 5, /* vcn_v3_0_dec_sw_ring_emit_ib */ 1821 .emit_ib = vcn_v3_0_dec_sw_ring_emit_ib, 1822 .emit_fence = vcn_v3_0_dec_sw_ring_emit_fence, 1823 .emit_vm_flush = vcn_v3_0_dec_sw_ring_emit_vm_flush, 1824 .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, 1825 .test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib, 1826 .insert_nop = amdgpu_ring_insert_nop, 1827 .insert_end = vcn_v3_0_dec_sw_ring_insert_end, 1828 .pad_ib = amdgpu_ring_generic_pad_ib, 1829 .begin_use = amdgpu_vcn_ring_begin_use, 1830 .end_use = amdgpu_vcn_ring_end_use, 1831 .emit_wreg = vcn_v3_0_dec_sw_ring_emit_wreg, 1832 .emit_reg_wait = vcn_v3_0_dec_sw_ring_emit_reg_wait, 1833 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1834 }; 1835 1836 static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) 1837 { 1838 struct drm_gpu_scheduler **scheds; 1839 1840 /* The create msg must be in the first IB submitted */ 1841 if (atomic_read(&p->entity->fence_seq)) 1842 return -EINVAL; 1843 1844 scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] 1845 [AMDGPU_RING_PRIO_DEFAULT].sched; 1846 drm_sched_entity_modify_sched(p->entity, scheds, 1); 1847 return 0; 1848 } 1849 1850 static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) 1851 { 1852 struct ttm_operation_ctx ctx = { false, false }; 1853 struct amdgpu_bo_va_mapping *map; 1854 uint32_t *msg, num_buffers; 1855 struct amdgpu_bo *bo; 1856 uint64_t start, end; 1857 unsigned int i; 1858 void * ptr; 1859 int r; 1860 1861 addr &= AMDGPU_GMC_HOLE_MASK; 1862 r = amdgpu_cs_find_mapping(p, addr, &bo, &map); 1863 if (r) { 1864 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); 1865 return r; 1866 } 1867 1868 start = map->start * AMDGPU_GPU_PAGE_SIZE; 1869 end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; 1870 if (addr & 0x7) { 1871 DRM_ERROR("VCN messages must be 8 byte aligned!\n"); 1872 return -EINVAL; 1873 } 1874 1875 bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 1876 amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); 1877 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1878 if (r) { 1879 DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); 1880 return r; 1881 } 1882 1883 r = amdgpu_bo_kmap(bo, &ptr); 1884 if (r) { 1885 DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); 1886 return r; 1887 } 1888 1889 msg = ptr + addr - start; 1890 1891 /* Check length */ 1892 if (msg[1] > end - addr) { 1893 r = -EINVAL; 1894 goto out; 1895 } 1896 1897 if (msg[3] != RDECODE_MSG_CREATE) 1898 goto out; 1899 1900 num_buffers = msg[2]; 1901 for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { 1902 uint32_t offset, size, *create; 1903 1904 if (msg[0] != RDECODE_MESSAGE_CREATE) 1905 continue; 1906 1907 offset = msg[1]; 1908 size = msg[2]; 1909 1910 if (offset + size > end) { 1911 r = -EINVAL; 1912 goto out; 1913 } 1914 1915 create = ptr + addr + offset - start; 1916 1917 /* H246, HEVC and VP9 can run on any instance */ 1918 if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) 1919 continue; 1920 1921 r = vcn_v3_0_limit_sched(p); 1922 if (r) 1923 goto out; 1924 } 1925 1926 out: 1927 amdgpu_bo_kunmap(bo); 1928 return r; 1929 } 1930 1931 static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, 1932 uint32_t ib_idx) 1933 { 1934 struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); 1935 struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; 1936 uint32_t msg_lo = 0, msg_hi = 0; 1937 unsigned i; 1938 int r; 1939 1940 /* The first instance can decode anything */ 1941 if (!ring->me) 1942 return 0; 1943 1944 for (i = 0; i < ib->length_dw; i += 2) { 1945 uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); 1946 uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1); 1947 1948 if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { 1949 msg_lo = val; 1950 } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) { 1951 msg_hi = val; 1952 } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && 1953 val == 0) { 1954 r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); 1955 if (r) 1956 return r; 1957 } 1958 } 1959 return 0; 1960 } 1961 1962 static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { 1963 .type = AMDGPU_RING_TYPE_VCN_DEC, 1964 .align_mask = 0xf, 1965 .vmhub = AMDGPU_MMHUB_0, 1966 .get_rptr = vcn_v3_0_dec_ring_get_rptr, 1967 .get_wptr = vcn_v3_0_dec_ring_get_wptr, 1968 .set_wptr = vcn_v3_0_dec_ring_set_wptr, 1969 .patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place, 1970 .emit_frame_size = 1971 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 1972 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 1973 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ 1974 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ 1975 6, 1976 .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ 1977 .emit_ib = vcn_v2_0_dec_ring_emit_ib, 1978 .emit_fence = vcn_v2_0_dec_ring_emit_fence, 1979 .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, 1980 .test_ring = vcn_v2_0_dec_ring_test_ring, 1981 .test_ib = amdgpu_vcn_dec_ring_test_ib, 1982 .insert_nop = vcn_v2_0_dec_ring_insert_nop, 1983 .insert_start = vcn_v2_0_dec_ring_insert_start, 1984 .insert_end = vcn_v2_0_dec_ring_insert_end, 1985 .pad_ib = amdgpu_ring_generic_pad_ib, 1986 .begin_use = amdgpu_vcn_ring_begin_use, 1987 .end_use = amdgpu_vcn_ring_end_use, 1988 .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, 1989 .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, 1990 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1991 }; 1992 1993 /** 1994 * vcn_v3_0_enc_ring_get_rptr - get enc read pointer 1995 * 1996 * @ring: amdgpu_ring pointer 1997 * 1998 * Returns the current hardware enc read pointer 1999 */ 2000 static uint64_t vcn_v3_0_enc_ring_get_rptr(struct amdgpu_ring *ring) 2001 { 2002 struct amdgpu_device *adev = ring->adev; 2003 2004 if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) 2005 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR); 2006 else 2007 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2); 2008 } 2009 2010 /** 2011 * vcn_v3_0_enc_ring_get_wptr - get enc write pointer 2012 * 2013 * @ring: amdgpu_ring pointer 2014 * 2015 * Returns the current hardware enc write pointer 2016 */ 2017 static uint64_t vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring *ring) 2018 { 2019 struct amdgpu_device *adev = ring->adev; 2020 2021 if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { 2022 if (ring->use_doorbell) 2023 return adev->wb.wb[ring->wptr_offs]; 2024 else 2025 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR); 2026 } else { 2027 if (ring->use_doorbell) 2028 return adev->wb.wb[ring->wptr_offs]; 2029 else 2030 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2); 2031 } 2032 } 2033 2034 /** 2035 * vcn_v3_0_enc_ring_set_wptr - set enc write pointer 2036 * 2037 * @ring: amdgpu_ring pointer 2038 * 2039 * Commits the enc write pointer to the hardware 2040 */ 2041 static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring) 2042 { 2043 struct amdgpu_device *adev = ring->adev; 2044 2045 if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { 2046 if (ring->use_doorbell) { 2047 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 2048 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 2049 } else { 2050 WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); 2051 } 2052 } else { 2053 if (ring->use_doorbell) { 2054 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 2055 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 2056 } else { 2057 WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); 2058 } 2059 } 2060 } 2061 2062 static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { 2063 .type = AMDGPU_RING_TYPE_VCN_ENC, 2064 .align_mask = 0x3f, 2065 .nop = VCN_ENC_CMD_NO_OP, 2066 .vmhub = AMDGPU_MMHUB_0, 2067 .get_rptr = vcn_v3_0_enc_ring_get_rptr, 2068 .get_wptr = vcn_v3_0_enc_ring_get_wptr, 2069 .set_wptr = vcn_v3_0_enc_ring_set_wptr, 2070 .emit_frame_size = 2071 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 2072 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 2073 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ 2074 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ 2075 1, /* vcn_v2_0_enc_ring_insert_end */ 2076 .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ 2077 .emit_ib = vcn_v2_0_enc_ring_emit_ib, 2078 .emit_fence = vcn_v2_0_enc_ring_emit_fence, 2079 .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, 2080 .test_ring = amdgpu_vcn_enc_ring_test_ring, 2081 .test_ib = amdgpu_vcn_enc_ring_test_ib, 2082 .insert_nop = amdgpu_ring_insert_nop, 2083 .insert_end = vcn_v2_0_enc_ring_insert_end, 2084 .pad_ib = amdgpu_ring_generic_pad_ib, 2085 .begin_use = amdgpu_vcn_ring_begin_use, 2086 .end_use = amdgpu_vcn_ring_end_use, 2087 .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, 2088 .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, 2089 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 2090 }; 2091 2092 static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) 2093 { 2094 int i; 2095 2096 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 2097 if (adev->vcn.harvest_config & (1 << i)) 2098 continue; 2099 2100 if (!DEC_SW_RING_ENABLED) 2101 adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; 2102 else 2103 adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs; 2104 adev->vcn.inst[i].ring_dec.me = i; 2105 DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i, 2106 DEC_SW_RING_ENABLED?"(Software Ring)":""); 2107 } 2108 } 2109 2110 static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) 2111 { 2112 int i, j; 2113 2114 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 2115 if (adev->vcn.harvest_config & (1 << i)) 2116 continue; 2117 2118 for (j = 0; j < adev->vcn.num_enc_rings; ++j) { 2119 adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs; 2120 adev->vcn.inst[i].ring_enc[j].me = i; 2121 } 2122 if (adev->vcn.num_enc_rings > 0) 2123 DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); 2124 } 2125 } 2126 2127 static bool vcn_v3_0_is_idle(void *handle) 2128 { 2129 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2130 int i, ret = 1; 2131 2132 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 2133 if (adev->vcn.harvest_config & (1 << i)) 2134 continue; 2135 2136 ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); 2137 } 2138 2139 return ret; 2140 } 2141 2142 static int vcn_v3_0_wait_for_idle(void *handle) 2143 { 2144 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2145 int i, ret = 0; 2146 2147 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 2148 if (adev->vcn.harvest_config & (1 << i)) 2149 continue; 2150 2151 ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 2152 UVD_STATUS__IDLE); 2153 if (ret) 2154 return ret; 2155 } 2156 2157 return ret; 2158 } 2159 2160 static int vcn_v3_0_set_clockgating_state(void *handle, 2161 enum amd_clockgating_state state) 2162 { 2163 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2164 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 2165 int i; 2166 2167 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 2168 if (adev->vcn.harvest_config & (1 << i)) 2169 continue; 2170 2171 if (enable) { 2172 if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE) 2173 return -EBUSY; 2174 vcn_v3_0_enable_clock_gating(adev, i); 2175 } else { 2176 vcn_v3_0_disable_clock_gating(adev, i); 2177 } 2178 } 2179 2180 return 0; 2181 } 2182 2183 static int vcn_v3_0_set_powergating_state(void *handle, 2184 enum amd_powergating_state state) 2185 { 2186 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2187 int ret; 2188 2189 /* for SRIOV, guest should not control VCN Power-gating 2190 * MMSCH FW should control Power-gating and clock-gating 2191 * guest should avoid touching CGC and PG 2192 */ 2193 if (amdgpu_sriov_vf(adev)) { 2194 adev->vcn.cur_state = AMD_PG_STATE_UNGATE; 2195 return 0; 2196 } 2197 2198 if(state == adev->vcn.cur_state) 2199 return 0; 2200 2201 if (state == AMD_PG_STATE_GATE) 2202 ret = vcn_v3_0_stop(adev); 2203 else 2204 ret = vcn_v3_0_start(adev); 2205 2206 if(!ret) 2207 adev->vcn.cur_state = state; 2208 2209 return ret; 2210 } 2211 2212 static int vcn_v3_0_set_interrupt_state(struct amdgpu_device *adev, 2213 struct amdgpu_irq_src *source, 2214 unsigned type, 2215 enum amdgpu_interrupt_state state) 2216 { 2217 return 0; 2218 } 2219 2220 static int vcn_v3_0_process_interrupt(struct amdgpu_device *adev, 2221 struct amdgpu_irq_src *source, 2222 struct amdgpu_iv_entry *entry) 2223 { 2224 uint32_t ip_instance; 2225 2226 switch (entry->client_id) { 2227 case SOC15_IH_CLIENTID_VCN: 2228 ip_instance = 0; 2229 break; 2230 case SOC15_IH_CLIENTID_VCN1: 2231 ip_instance = 1; 2232 break; 2233 default: 2234 DRM_ERROR("Unhandled client id: %d\n", entry->client_id); 2235 return 0; 2236 } 2237 2238 DRM_DEBUG("IH: VCN TRAP\n"); 2239 2240 switch (entry->src_id) { 2241 case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: 2242 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); 2243 break; 2244 case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: 2245 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); 2246 break; 2247 case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: 2248 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); 2249 break; 2250 default: 2251 DRM_ERROR("Unhandled interrupt: %d %d\n", 2252 entry->src_id, entry->src_data[0]); 2253 break; 2254 } 2255 2256 return 0; 2257 } 2258 2259 static const struct amdgpu_irq_src_funcs vcn_v3_0_irq_funcs = { 2260 .set = vcn_v3_0_set_interrupt_state, 2261 .process = vcn_v3_0_process_interrupt, 2262 }; 2263 2264 static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) 2265 { 2266 int i; 2267 2268 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 2269 if (adev->vcn.harvest_config & (1 << i)) 2270 continue; 2271 2272 adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; 2273 adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs; 2274 } 2275 } 2276 2277 static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { 2278 .name = "vcn_v3_0", 2279 .early_init = vcn_v3_0_early_init, 2280 .late_init = NULL, 2281 .sw_init = vcn_v3_0_sw_init, 2282 .sw_fini = vcn_v3_0_sw_fini, 2283 .hw_init = vcn_v3_0_hw_init, 2284 .hw_fini = vcn_v3_0_hw_fini, 2285 .suspend = vcn_v3_0_suspend, 2286 .resume = vcn_v3_0_resume, 2287 .is_idle = vcn_v3_0_is_idle, 2288 .wait_for_idle = vcn_v3_0_wait_for_idle, 2289 .check_soft_reset = NULL, 2290 .pre_soft_reset = NULL, 2291 .soft_reset = NULL, 2292 .post_soft_reset = NULL, 2293 .set_clockgating_state = vcn_v3_0_set_clockgating_state, 2294 .set_powergating_state = vcn_v3_0_set_powergating_state, 2295 }; 2296 2297 const struct amdgpu_ip_block_version vcn_v3_0_ip_block = 2298 { 2299 .type = AMD_IP_BLOCK_TYPE_VCN, 2300 .major = 3, 2301 .minor = 0, 2302 .rev = 0, 2303 .funcs = &vcn_v3_0_ip_funcs, 2304 }; 2305