1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <linux/pci.h> 30 #include <drm/drm_drv.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_pm.h" 34 #include "amdgpu_vcn.h" 35 #include "soc15d.h" 36 37 /* Firmware Names */ 38 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 39 #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" 40 #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" 41 #define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" 42 #define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" 43 #define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin" 44 #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" 45 #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" 46 #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" 47 #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" 48 #define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" 49 #define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin" 50 #define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin" 51 #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" 52 #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" 53 #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" 54 55 MODULE_FIRMWARE(FIRMWARE_RAVEN); 56 MODULE_FIRMWARE(FIRMWARE_PICASSO); 57 MODULE_FIRMWARE(FIRMWARE_RAVEN2); 58 MODULE_FIRMWARE(FIRMWARE_ARCTURUS); 59 MODULE_FIRMWARE(FIRMWARE_RENOIR); 60 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE); 61 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN); 62 MODULE_FIRMWARE(FIRMWARE_NAVI10); 63 MODULE_FIRMWARE(FIRMWARE_NAVI14); 64 MODULE_FIRMWARE(FIRMWARE_NAVI12); 65 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); 66 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); 67 MODULE_FIRMWARE(FIRMWARE_VANGOGH); 68 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); 69 MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); 70 MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); 71 72 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 73 74 int amdgpu_vcn_sw_init(struct amdgpu_device *adev) 75 { 76 unsigned long bo_size; 77 const char *fw_name; 78 const struct common_firmware_header *hdr; 79 unsigned char fw_check; 80 int i, r; 81 82 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); 83 mutex_init(&adev->vcn.vcn_pg_lock); 84 mutex_init(&adev->vcn.vcn1_jpeg1_workaround); 85 atomic_set(&adev->vcn.total_submission_cnt, 0); 86 for (i = 0; i < adev->vcn.num_vcn_inst; i++) 87 atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); 88 89 switch (adev->ip_versions[UVD_HWIP][0]) { 90 case IP_VERSION(1, 0, 0): 91 case IP_VERSION(1, 0, 1): 92 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 93 fw_name = FIRMWARE_RAVEN2; 94 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 95 fw_name = FIRMWARE_PICASSO; 96 else 97 fw_name = FIRMWARE_RAVEN; 98 break; 99 case IP_VERSION(2, 5, 0): 100 fw_name = FIRMWARE_ARCTURUS; 101 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 102 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 103 adev->vcn.indirect_sram = true; 104 break; 105 case IP_VERSION(2, 2, 0): 106 if (adev->apu_flags & AMD_APU_IS_RENOIR) 107 fw_name = FIRMWARE_RENOIR; 108 else 109 fw_name = FIRMWARE_GREEN_SARDINE; 110 111 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 112 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 113 adev->vcn.indirect_sram = true; 114 break; 115 case IP_VERSION(2, 6, 0): 116 fw_name = FIRMWARE_ALDEBARAN; 117 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 118 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 119 adev->vcn.indirect_sram = true; 120 break; 121 case IP_VERSION(2, 0, 0): 122 fw_name = FIRMWARE_NAVI10; 123 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 124 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 125 adev->vcn.indirect_sram = true; 126 break; 127 case IP_VERSION(2, 0, 2): 128 if (adev->asic_type == CHIP_NAVI12) 129 fw_name = FIRMWARE_NAVI12; 130 else 131 fw_name = FIRMWARE_NAVI14; 132 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 133 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 134 adev->vcn.indirect_sram = true; 135 break; 136 case IP_VERSION(3, 0, 0): 137 case IP_VERSION(3, 0, 64): 138 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) 139 fw_name = FIRMWARE_SIENNA_CICHLID; 140 else 141 fw_name = FIRMWARE_NAVY_FLOUNDER; 142 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 143 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 144 adev->vcn.indirect_sram = true; 145 break; 146 case IP_VERSION(3, 0, 2): 147 fw_name = FIRMWARE_VANGOGH; 148 break; 149 case IP_VERSION(3, 0, 16): 150 fw_name = FIRMWARE_DIMGREY_CAVEFISH; 151 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 152 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 153 adev->vcn.indirect_sram = true; 154 break; 155 case IP_VERSION(3, 0, 33): 156 fw_name = FIRMWARE_BEIGE_GOBY; 157 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 158 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 159 adev->vcn.indirect_sram = true; 160 break; 161 case IP_VERSION(3, 1, 1): 162 fw_name = FIRMWARE_YELLOW_CARP; 163 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 164 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 165 adev->vcn.indirect_sram = true; 166 break; 167 default: 168 return -EINVAL; 169 } 170 171 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); 172 if (r) { 173 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", 174 fw_name); 175 return r; 176 } 177 178 r = amdgpu_ucode_validate(adev->vcn.fw); 179 if (r) { 180 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", 181 fw_name); 182 release_firmware(adev->vcn.fw); 183 adev->vcn.fw = NULL; 184 return r; 185 } 186 187 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 188 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 189 190 /* Bit 20-23, it is encode major and non-zero for new naming convention. 191 * This field is part of version minor and DRM_DISABLED_FLAG in old naming 192 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG 193 * is zero in old naming convention, this field is always zero so far. 194 * These four bits are used to tell which naming convention is present. 195 */ 196 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; 197 if (fw_check) { 198 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; 199 200 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; 201 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; 202 enc_major = fw_check; 203 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; 204 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; 205 DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", 206 enc_major, enc_minor, dec_ver, vep, fw_rev); 207 } else { 208 unsigned int version_major, version_minor, family_id; 209 210 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 211 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 212 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 213 DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n", 214 version_major, version_minor, family_id); 215 } 216 217 bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; 218 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 219 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 220 bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 221 222 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 223 if (adev->vcn.harvest_config & (1 << i)) 224 continue; 225 226 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 227 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, 228 &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); 229 if (r) { 230 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 231 return r; 232 } 233 234 adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr + 235 bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 236 adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr + 237 bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 238 239 if (adev->vcn.indirect_sram) { 240 r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, 241 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, 242 &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); 243 if (r) { 244 dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); 245 return r; 246 } 247 } 248 } 249 250 return 0; 251 } 252 253 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) 254 { 255 int i, j; 256 257 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { 258 if (adev->vcn.harvest_config & (1 << j)) 259 continue; 260 261 if (adev->vcn.indirect_sram) { 262 amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, 263 &adev->vcn.inst[j].dpg_sram_gpu_addr, 264 (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); 265 } 266 kvfree(adev->vcn.inst[j].saved_bo); 267 268 amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, 269 &adev->vcn.inst[j].gpu_addr, 270 (void **)&adev->vcn.inst[j].cpu_addr); 271 272 amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); 273 274 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 275 amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); 276 } 277 278 release_firmware(adev->vcn.fw); 279 mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); 280 mutex_destroy(&adev->vcn.vcn_pg_lock); 281 282 return 0; 283 } 284 285 bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) 286 { 287 bool ret = false; 288 289 int major; 290 int minor; 291 int revision; 292 293 /* if cannot find IP data, then this VCN does not exist */ 294 if (amdgpu_discovery_get_vcn_version(adev, vcn_instance, &major, &minor, &revision) != 0) 295 return true; 296 297 if ((type == VCN_ENCODE_RING) && (revision & VCN_BLOCK_ENCODE_DISABLE_MASK)) { 298 ret = true; 299 } else if ((type == VCN_DECODE_RING) && (revision & VCN_BLOCK_DECODE_DISABLE_MASK)) { 300 ret = true; 301 } else if ((type == VCN_UNIFIED_RING) && (revision & VCN_BLOCK_QUEUE_DISABLE_MASK)) { 302 ret = true; 303 } 304 305 return ret; 306 } 307 308 int amdgpu_vcn_suspend(struct amdgpu_device *adev) 309 { 310 unsigned size; 311 void *ptr; 312 int i, idx; 313 314 cancel_delayed_work_sync(&adev->vcn.idle_work); 315 316 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 317 if (adev->vcn.harvest_config & (1 << i)) 318 continue; 319 if (adev->vcn.inst[i].vcpu_bo == NULL) 320 return 0; 321 322 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 323 ptr = adev->vcn.inst[i].cpu_addr; 324 325 adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); 326 if (!adev->vcn.inst[i].saved_bo) 327 return -ENOMEM; 328 329 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 330 memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); 331 drm_dev_exit(idx); 332 } 333 } 334 return 0; 335 } 336 337 int amdgpu_vcn_resume(struct amdgpu_device *adev) 338 { 339 unsigned size; 340 void *ptr; 341 int i, idx; 342 343 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 344 if (adev->vcn.harvest_config & (1 << i)) 345 continue; 346 if (adev->vcn.inst[i].vcpu_bo == NULL) 347 return -EINVAL; 348 349 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 350 ptr = adev->vcn.inst[i].cpu_addr; 351 352 if (adev->vcn.inst[i].saved_bo != NULL) { 353 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 354 memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); 355 drm_dev_exit(idx); 356 } 357 kvfree(adev->vcn.inst[i].saved_bo); 358 adev->vcn.inst[i].saved_bo = NULL; 359 } else { 360 const struct common_firmware_header *hdr; 361 unsigned offset; 362 363 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 364 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 365 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 366 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 367 memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, 368 le32_to_cpu(hdr->ucode_size_bytes)); 369 drm_dev_exit(idx); 370 } 371 size -= le32_to_cpu(hdr->ucode_size_bytes); 372 ptr += le32_to_cpu(hdr->ucode_size_bytes); 373 } 374 memset_io(ptr, 0, size); 375 } 376 } 377 return 0; 378 } 379 380 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 381 { 382 struct amdgpu_device *adev = 383 container_of(work, struct amdgpu_device, vcn.idle_work.work); 384 unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; 385 unsigned int i, j; 386 int r = 0; 387 388 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { 389 if (adev->vcn.harvest_config & (1 << j)) 390 continue; 391 392 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 393 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); 394 } 395 396 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 397 struct dpg_pause_state new_state; 398 399 if (fence[j] || 400 unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt))) 401 new_state.fw_based = VCN_DPG_STATE__PAUSE; 402 else 403 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 404 405 adev->vcn.pause_dpg_mode(adev, j, &new_state); 406 } 407 408 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); 409 fences += fence[j]; 410 } 411 412 if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { 413 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 414 AMD_PG_STATE_GATE); 415 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 416 false); 417 if (r) 418 dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 419 } else { 420 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 421 } 422 } 423 424 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 425 { 426 struct amdgpu_device *adev = ring->adev; 427 int r = 0; 428 429 atomic_inc(&adev->vcn.total_submission_cnt); 430 431 if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { 432 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 433 true); 434 if (r) 435 dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); 436 } 437 438 mutex_lock(&adev->vcn.vcn_pg_lock); 439 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 440 AMD_PG_STATE_UNGATE); 441 442 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 443 struct dpg_pause_state new_state; 444 445 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { 446 atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 447 new_state.fw_based = VCN_DPG_STATE__PAUSE; 448 } else { 449 unsigned int fences = 0; 450 unsigned int i; 451 452 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 453 fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); 454 455 if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt)) 456 new_state.fw_based = VCN_DPG_STATE__PAUSE; 457 else 458 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 459 } 460 461 adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); 462 } 463 mutex_unlock(&adev->vcn.vcn_pg_lock); 464 } 465 466 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 467 { 468 if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 469 ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) 470 atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 471 472 atomic_dec(&ring->adev->vcn.total_submission_cnt); 473 474 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 475 } 476 477 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 478 { 479 struct amdgpu_device *adev = ring->adev; 480 uint32_t tmp = 0; 481 unsigned i; 482 int r; 483 484 /* VCN in SRIOV does not support direct register read/write */ 485 if (amdgpu_sriov_vf(adev)) 486 return 0; 487 488 WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); 489 r = amdgpu_ring_alloc(ring, 3); 490 if (r) 491 return r; 492 amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); 493 amdgpu_ring_write(ring, 0xDEADBEEF); 494 amdgpu_ring_commit(ring); 495 for (i = 0; i < adev->usec_timeout; i++) { 496 tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); 497 if (tmp == 0xDEADBEEF) 498 break; 499 udelay(1); 500 } 501 502 if (i >= adev->usec_timeout) 503 r = -ETIMEDOUT; 504 505 return r; 506 } 507 508 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) 509 { 510 struct amdgpu_device *adev = ring->adev; 511 uint32_t rptr; 512 unsigned int i; 513 int r; 514 515 if (amdgpu_sriov_vf(adev)) 516 return 0; 517 518 r = amdgpu_ring_alloc(ring, 16); 519 if (r) 520 return r; 521 522 rptr = amdgpu_ring_get_rptr(ring); 523 524 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); 525 amdgpu_ring_commit(ring); 526 527 for (i = 0; i < adev->usec_timeout; i++) { 528 if (amdgpu_ring_get_rptr(ring) != rptr) 529 break; 530 udelay(1); 531 } 532 533 if (i >= adev->usec_timeout) 534 r = -ETIMEDOUT; 535 536 return r; 537 } 538 539 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 540 struct amdgpu_ib *ib_msg, 541 struct dma_fence **fence) 542 { 543 struct amdgpu_device *adev = ring->adev; 544 struct dma_fence *f = NULL; 545 struct amdgpu_job *job; 546 struct amdgpu_ib *ib; 547 uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 548 int i, r; 549 550 r = amdgpu_job_alloc_with_ib(adev, 64, 551 AMDGPU_IB_POOL_DIRECT, &job); 552 if (r) 553 goto err; 554 555 ib = &job->ibs[0]; 556 ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); 557 ib->ptr[1] = addr; 558 ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); 559 ib->ptr[3] = addr >> 32; 560 ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0); 561 ib->ptr[5] = 0; 562 for (i = 6; i < 16; i += 2) { 563 ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); 564 ib->ptr[i+1] = 0; 565 } 566 ib->length_dw = 16; 567 568 r = amdgpu_job_submit_direct(job, ring, &f); 569 if (r) 570 goto err_free; 571 572 amdgpu_ib_free(adev, ib_msg, f); 573 574 if (fence) 575 *fence = dma_fence_get(f); 576 dma_fence_put(f); 577 578 return 0; 579 580 err_free: 581 amdgpu_job_free(job); 582 err: 583 amdgpu_ib_free(adev, ib_msg, f); 584 return r; 585 } 586 587 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 588 struct amdgpu_ib *ib) 589 { 590 struct amdgpu_device *adev = ring->adev; 591 uint32_t *msg; 592 int r, i; 593 594 memset(ib, 0, sizeof(*ib)); 595 r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, 596 AMDGPU_IB_POOL_DIRECT, 597 ib); 598 if (r) 599 return r; 600 601 msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr); 602 msg[0] = cpu_to_le32(0x00000028); 603 msg[1] = cpu_to_le32(0x00000038); 604 msg[2] = cpu_to_le32(0x00000001); 605 msg[3] = cpu_to_le32(0x00000000); 606 msg[4] = cpu_to_le32(handle); 607 msg[5] = cpu_to_le32(0x00000000); 608 msg[6] = cpu_to_le32(0x00000001); 609 msg[7] = cpu_to_le32(0x00000028); 610 msg[8] = cpu_to_le32(0x00000010); 611 msg[9] = cpu_to_le32(0x00000000); 612 msg[10] = cpu_to_le32(0x00000007); 613 msg[11] = cpu_to_le32(0x00000000); 614 msg[12] = cpu_to_le32(0x00000780); 615 msg[13] = cpu_to_le32(0x00000440); 616 for (i = 14; i < 1024; ++i) 617 msg[i] = cpu_to_le32(0x0); 618 619 return 0; 620 } 621 622 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 623 struct amdgpu_ib *ib) 624 { 625 struct amdgpu_device *adev = ring->adev; 626 uint32_t *msg; 627 int r, i; 628 629 memset(ib, 0, sizeof(*ib)); 630 r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, 631 AMDGPU_IB_POOL_DIRECT, 632 ib); 633 if (r) 634 return r; 635 636 msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr); 637 msg[0] = cpu_to_le32(0x00000028); 638 msg[1] = cpu_to_le32(0x00000018); 639 msg[2] = cpu_to_le32(0x00000000); 640 msg[3] = cpu_to_le32(0x00000002); 641 msg[4] = cpu_to_le32(handle); 642 msg[5] = cpu_to_le32(0x00000000); 643 for (i = 6; i < 1024; ++i) 644 msg[i] = cpu_to_le32(0x0); 645 646 return 0; 647 } 648 649 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 650 { 651 struct dma_fence *fence = NULL; 652 struct amdgpu_ib ib; 653 long r; 654 655 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); 656 if (r) 657 goto error; 658 659 r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL); 660 if (r) 661 goto error; 662 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); 663 if (r) 664 goto error; 665 666 r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence); 667 if (r) 668 goto error; 669 670 r = dma_fence_wait_timeout(fence, false, timeout); 671 if (r == 0) 672 r = -ETIMEDOUT; 673 else if (r > 0) 674 r = 0; 675 676 dma_fence_put(fence); 677 error: 678 return r; 679 } 680 681 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, 682 struct amdgpu_ib *ib_msg, 683 struct dma_fence **fence) 684 { 685 struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; 686 const unsigned int ib_size_dw = 64; 687 struct amdgpu_device *adev = ring->adev; 688 struct dma_fence *f = NULL; 689 struct amdgpu_job *job; 690 struct amdgpu_ib *ib; 691 uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 692 int i, r; 693 694 r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, 695 AMDGPU_IB_POOL_DIRECT, &job); 696 if (r) 697 goto err; 698 699 ib = &job->ibs[0]; 700 ib->length_dw = 0; 701 702 ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; 703 ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); 704 decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); 705 ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; 706 memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); 707 708 decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); 709 decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); 710 decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); 711 712 for (i = ib->length_dw; i < ib_size_dw; ++i) 713 ib->ptr[i] = 0x0; 714 715 r = amdgpu_job_submit_direct(job, ring, &f); 716 if (r) 717 goto err_free; 718 719 amdgpu_ib_free(adev, ib_msg, f); 720 721 if (fence) 722 *fence = dma_fence_get(f); 723 dma_fence_put(f); 724 725 return 0; 726 727 err_free: 728 amdgpu_job_free(job); 729 err: 730 amdgpu_ib_free(adev, ib_msg, f); 731 return r; 732 } 733 734 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) 735 { 736 struct dma_fence *fence = NULL; 737 struct amdgpu_ib ib; 738 long r; 739 740 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); 741 if (r) 742 goto error; 743 744 r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL); 745 if (r) 746 goto error; 747 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); 748 if (r) 749 goto error; 750 751 r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence); 752 if (r) 753 goto error; 754 755 r = dma_fence_wait_timeout(fence, false, timeout); 756 if (r == 0) 757 r = -ETIMEDOUT; 758 else if (r > 0) 759 r = 0; 760 761 dma_fence_put(fence); 762 error: 763 return r; 764 } 765 766 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 767 { 768 struct amdgpu_device *adev = ring->adev; 769 uint32_t rptr; 770 unsigned i; 771 int r; 772 773 if (amdgpu_sriov_vf(adev)) 774 return 0; 775 776 r = amdgpu_ring_alloc(ring, 16); 777 if (r) 778 return r; 779 780 rptr = amdgpu_ring_get_rptr(ring); 781 782 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 783 amdgpu_ring_commit(ring); 784 785 for (i = 0; i < adev->usec_timeout; i++) { 786 if (amdgpu_ring_get_rptr(ring) != rptr) 787 break; 788 udelay(1); 789 } 790 791 if (i >= adev->usec_timeout) 792 r = -ETIMEDOUT; 793 794 return r; 795 } 796 797 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 798 struct amdgpu_ib *ib_msg, 799 struct dma_fence **fence) 800 { 801 const unsigned ib_size_dw = 16; 802 struct amdgpu_job *job; 803 struct amdgpu_ib *ib; 804 struct dma_fence *f = NULL; 805 uint64_t addr; 806 int i, r; 807 808 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, 809 AMDGPU_IB_POOL_DIRECT, &job); 810 if (r) 811 return r; 812 813 ib = &job->ibs[0]; 814 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 815 816 ib->length_dw = 0; 817 ib->ptr[ib->length_dw++] = 0x00000018; 818 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 819 ib->ptr[ib->length_dw++] = handle; 820 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 821 ib->ptr[ib->length_dw++] = addr; 822 ib->ptr[ib->length_dw++] = 0x0000000b; 823 824 ib->ptr[ib->length_dw++] = 0x00000014; 825 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 826 ib->ptr[ib->length_dw++] = 0x0000001c; 827 ib->ptr[ib->length_dw++] = 0x00000000; 828 ib->ptr[ib->length_dw++] = 0x00000000; 829 830 ib->ptr[ib->length_dw++] = 0x00000008; 831 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 832 833 for (i = ib->length_dw; i < ib_size_dw; ++i) 834 ib->ptr[i] = 0x0; 835 836 r = amdgpu_job_submit_direct(job, ring, &f); 837 if (r) 838 goto err; 839 840 if (fence) 841 *fence = dma_fence_get(f); 842 dma_fence_put(f); 843 844 return 0; 845 846 err: 847 amdgpu_job_free(job); 848 return r; 849 } 850 851 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 852 struct amdgpu_ib *ib_msg, 853 struct dma_fence **fence) 854 { 855 const unsigned ib_size_dw = 16; 856 struct amdgpu_job *job; 857 struct amdgpu_ib *ib; 858 struct dma_fence *f = NULL; 859 uint64_t addr; 860 int i, r; 861 862 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, 863 AMDGPU_IB_POOL_DIRECT, &job); 864 if (r) 865 return r; 866 867 ib = &job->ibs[0]; 868 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 869 870 ib->length_dw = 0; 871 ib->ptr[ib->length_dw++] = 0x00000018; 872 ib->ptr[ib->length_dw++] = 0x00000001; 873 ib->ptr[ib->length_dw++] = handle; 874 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 875 ib->ptr[ib->length_dw++] = addr; 876 ib->ptr[ib->length_dw++] = 0x0000000b; 877 878 ib->ptr[ib->length_dw++] = 0x00000014; 879 ib->ptr[ib->length_dw++] = 0x00000002; 880 ib->ptr[ib->length_dw++] = 0x0000001c; 881 ib->ptr[ib->length_dw++] = 0x00000000; 882 ib->ptr[ib->length_dw++] = 0x00000000; 883 884 ib->ptr[ib->length_dw++] = 0x00000008; 885 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 886 887 for (i = ib->length_dw; i < ib_size_dw; ++i) 888 ib->ptr[i] = 0x0; 889 890 r = amdgpu_job_submit_direct(job, ring, &f); 891 if (r) 892 goto err; 893 894 if (fence) 895 *fence = dma_fence_get(f); 896 dma_fence_put(f); 897 898 return 0; 899 900 err: 901 amdgpu_job_free(job); 902 return r; 903 } 904 905 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 906 { 907 struct amdgpu_device *adev = ring->adev; 908 struct dma_fence *fence = NULL; 909 struct amdgpu_ib ib; 910 long r; 911 912 memset(&ib, 0, sizeof(ib)); 913 r = amdgpu_ib_get(adev, NULL, (128 << 10) + AMDGPU_GPU_PAGE_SIZE, 914 AMDGPU_IB_POOL_DIRECT, 915 &ib); 916 if (r) 917 return r; 918 919 r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL); 920 if (r) 921 goto error; 922 923 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence); 924 if (r) 925 goto error; 926 927 r = dma_fence_wait_timeout(fence, false, timeout); 928 if (r == 0) 929 r = -ETIMEDOUT; 930 else if (r > 0) 931 r = 0; 932 933 error: 934 amdgpu_ib_free(adev, &ib, fence); 935 dma_fence_put(fence); 936 937 return r; 938 } 939 940 enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) 941 { 942 switch(ring) { 943 case 0: 944 return AMDGPU_RING_PRIO_0; 945 case 1: 946 return AMDGPU_RING_PRIO_1; 947 case 2: 948 return AMDGPU_RING_PRIO_2; 949 default: 950 return AMDGPU_RING_PRIO_0; 951 } 952 } 953 954 void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) 955 { 956 int i; 957 unsigned int idx; 958 959 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 960 const struct common_firmware_header *hdr; 961 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 962 963 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 964 if (adev->vcn.harvest_config & (1 << i)) 965 continue; 966 /* currently only support 2 FW instances */ 967 if (i >= 2) { 968 dev_info(adev->dev, "More then 2 VCN FW instances!\n"); 969 break; 970 } 971 idx = AMDGPU_UCODE_ID_VCN + i; 972 adev->firmware.ucode[idx].ucode_id = idx; 973 adev->firmware.ucode[idx].fw = adev->vcn.fw; 974 adev->firmware.fw_size += 975 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 976 } 977 dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); 978 } 979 } 980