1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <linux/pci.h> 30 31 #include "amdgpu.h" 32 #include "amdgpu_pm.h" 33 #include "amdgpu_vcn.h" 34 #include "soc15d.h" 35 36 /* Firmware Names */ 37 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 38 #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" 39 #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" 40 #define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" 41 #define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" 42 #define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin" 43 #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" 44 #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" 45 #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" 46 #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" 47 #define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" 48 #define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin" 49 #define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin" 50 #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" 51 52 MODULE_FIRMWARE(FIRMWARE_RAVEN); 53 MODULE_FIRMWARE(FIRMWARE_PICASSO); 54 MODULE_FIRMWARE(FIRMWARE_RAVEN2); 55 MODULE_FIRMWARE(FIRMWARE_ARCTURUS); 56 MODULE_FIRMWARE(FIRMWARE_RENOIR); 57 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE); 58 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN); 59 MODULE_FIRMWARE(FIRMWARE_NAVI10); 60 MODULE_FIRMWARE(FIRMWARE_NAVI14); 61 MODULE_FIRMWARE(FIRMWARE_NAVI12); 62 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); 63 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); 64 MODULE_FIRMWARE(FIRMWARE_VANGOGH); 65 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); 66 67 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 68 69 int amdgpu_vcn_sw_init(struct amdgpu_device *adev) 70 { 71 unsigned long bo_size; 72 const char *fw_name; 73 const struct common_firmware_header *hdr; 74 unsigned char fw_check; 75 int i, r; 76 77 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); 78 mutex_init(&adev->vcn.vcn_pg_lock); 79 mutex_init(&adev->vcn.vcn1_jpeg1_workaround); 80 atomic_set(&adev->vcn.total_submission_cnt, 0); 81 for (i = 0; i < adev->vcn.num_vcn_inst; i++) 82 atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); 83 84 switch (adev->asic_type) { 85 case CHIP_RAVEN: 86 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 87 fw_name = FIRMWARE_RAVEN2; 88 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 89 fw_name = FIRMWARE_PICASSO; 90 else 91 fw_name = FIRMWARE_RAVEN; 92 break; 93 case CHIP_ARCTURUS: 94 fw_name = FIRMWARE_ARCTURUS; 95 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 96 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 97 adev->vcn.indirect_sram = true; 98 break; 99 case CHIP_RENOIR: 100 if (adev->apu_flags & AMD_APU_IS_RENOIR) 101 fw_name = FIRMWARE_RENOIR; 102 else 103 fw_name = FIRMWARE_GREEN_SARDINE; 104 105 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 106 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 107 adev->vcn.indirect_sram = true; 108 break; 109 case CHIP_ALDEBARAN: 110 fw_name = FIRMWARE_ALDEBARAN; 111 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 112 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 113 adev->vcn.indirect_sram = true; 114 break; 115 case CHIP_NAVI10: 116 fw_name = FIRMWARE_NAVI10; 117 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 118 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 119 adev->vcn.indirect_sram = true; 120 break; 121 case CHIP_NAVI14: 122 fw_name = FIRMWARE_NAVI14; 123 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 124 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 125 adev->vcn.indirect_sram = true; 126 break; 127 case CHIP_NAVI12: 128 fw_name = FIRMWARE_NAVI12; 129 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 130 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 131 adev->vcn.indirect_sram = true; 132 break; 133 case CHIP_SIENNA_CICHLID: 134 fw_name = FIRMWARE_SIENNA_CICHLID; 135 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 136 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 137 adev->vcn.indirect_sram = true; 138 break; 139 case CHIP_NAVY_FLOUNDER: 140 fw_name = FIRMWARE_NAVY_FLOUNDER; 141 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 142 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 143 adev->vcn.indirect_sram = true; 144 break; 145 case CHIP_VANGOGH: 146 fw_name = FIRMWARE_VANGOGH; 147 break; 148 case CHIP_DIMGREY_CAVEFISH: 149 fw_name = FIRMWARE_DIMGREY_CAVEFISH; 150 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 151 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 152 adev->vcn.indirect_sram = true; 153 break; 154 default: 155 return -EINVAL; 156 } 157 158 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); 159 if (r) { 160 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", 161 fw_name); 162 return r; 163 } 164 165 r = amdgpu_ucode_validate(adev->vcn.fw); 166 if (r) { 167 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", 168 fw_name); 169 release_firmware(adev->vcn.fw); 170 adev->vcn.fw = NULL; 171 return r; 172 } 173 174 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 175 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 176 177 /* Bit 20-23, it is encode major and non-zero for new naming convention. 178 * This field is part of version minor and DRM_DISABLED_FLAG in old naming 179 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG 180 * is zero in old naming convention, this field is always zero so far. 181 * These four bits are used to tell which naming convention is present. 182 */ 183 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; 184 if (fw_check) { 185 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; 186 187 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; 188 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; 189 enc_major = fw_check; 190 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; 191 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; 192 DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", 193 enc_major, enc_minor, dec_ver, vep, fw_rev); 194 } else { 195 unsigned int version_major, version_minor, family_id; 196 197 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 198 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 199 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 200 DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n", 201 version_major, version_minor, family_id); 202 } 203 204 bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; 205 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 206 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 207 bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 208 209 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 210 if (adev->vcn.harvest_config & (1 << i)) 211 continue; 212 213 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 214 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, 215 &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); 216 if (r) { 217 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 218 return r; 219 } 220 221 adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr + 222 bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 223 adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr + 224 bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 225 226 if (adev->vcn.indirect_sram) { 227 r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, 228 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, 229 &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); 230 if (r) { 231 dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); 232 return r; 233 } 234 } 235 } 236 237 return 0; 238 } 239 240 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) 241 { 242 int i, j; 243 244 cancel_delayed_work_sync(&adev->vcn.idle_work); 245 246 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { 247 if (adev->vcn.harvest_config & (1 << j)) 248 continue; 249 250 if (adev->vcn.indirect_sram) { 251 amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, 252 &adev->vcn.inst[j].dpg_sram_gpu_addr, 253 (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); 254 } 255 kvfree(adev->vcn.inst[j].saved_bo); 256 257 amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, 258 &adev->vcn.inst[j].gpu_addr, 259 (void **)&adev->vcn.inst[j].cpu_addr); 260 261 amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); 262 263 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 264 amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); 265 } 266 267 release_firmware(adev->vcn.fw); 268 mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); 269 mutex_destroy(&adev->vcn.vcn_pg_lock); 270 271 return 0; 272 } 273 274 int amdgpu_vcn_suspend(struct amdgpu_device *adev) 275 { 276 unsigned size; 277 void *ptr; 278 int i; 279 280 cancel_delayed_work_sync(&adev->vcn.idle_work); 281 282 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 283 if (adev->vcn.harvest_config & (1 << i)) 284 continue; 285 if (adev->vcn.inst[i].vcpu_bo == NULL) 286 return 0; 287 288 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 289 ptr = adev->vcn.inst[i].cpu_addr; 290 291 adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); 292 if (!adev->vcn.inst[i].saved_bo) 293 return -ENOMEM; 294 295 memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); 296 } 297 return 0; 298 } 299 300 int amdgpu_vcn_resume(struct amdgpu_device *adev) 301 { 302 unsigned size; 303 void *ptr; 304 int i; 305 306 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 307 if (adev->vcn.harvest_config & (1 << i)) 308 continue; 309 if (adev->vcn.inst[i].vcpu_bo == NULL) 310 return -EINVAL; 311 312 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 313 ptr = adev->vcn.inst[i].cpu_addr; 314 315 if (adev->vcn.inst[i].saved_bo != NULL) { 316 memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); 317 kvfree(adev->vcn.inst[i].saved_bo); 318 adev->vcn.inst[i].saved_bo = NULL; 319 } else { 320 const struct common_firmware_header *hdr; 321 unsigned offset; 322 323 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 324 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 325 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 326 memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, 327 le32_to_cpu(hdr->ucode_size_bytes)); 328 size -= le32_to_cpu(hdr->ucode_size_bytes); 329 ptr += le32_to_cpu(hdr->ucode_size_bytes); 330 } 331 memset_io(ptr, 0, size); 332 } 333 } 334 return 0; 335 } 336 337 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 338 { 339 struct amdgpu_device *adev = 340 container_of(work, struct amdgpu_device, vcn.idle_work.work); 341 unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; 342 unsigned int i, j; 343 int r = 0; 344 345 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { 346 if (adev->vcn.harvest_config & (1 << j)) 347 continue; 348 349 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 350 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); 351 } 352 353 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 354 struct dpg_pause_state new_state; 355 356 if (fence[j] || 357 unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt))) 358 new_state.fw_based = VCN_DPG_STATE__PAUSE; 359 else 360 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 361 362 adev->vcn.pause_dpg_mode(adev, j, &new_state); 363 } 364 365 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); 366 fences += fence[j]; 367 } 368 369 if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { 370 amdgpu_gfx_off_ctrl(adev, true); 371 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 372 AMD_PG_STATE_GATE); 373 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 374 false); 375 if (r) 376 dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 377 } else { 378 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 379 } 380 } 381 382 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 383 { 384 struct amdgpu_device *adev = ring->adev; 385 int r = 0; 386 387 atomic_inc(&adev->vcn.total_submission_cnt); 388 389 if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { 390 amdgpu_gfx_off_ctrl(adev, false); 391 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 392 true); 393 if (r) 394 dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); 395 } 396 397 mutex_lock(&adev->vcn.vcn_pg_lock); 398 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 399 AMD_PG_STATE_UNGATE); 400 401 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 402 struct dpg_pause_state new_state; 403 404 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { 405 atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 406 new_state.fw_based = VCN_DPG_STATE__PAUSE; 407 } else { 408 unsigned int fences = 0; 409 unsigned int i; 410 411 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 412 fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); 413 414 if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt)) 415 new_state.fw_based = VCN_DPG_STATE__PAUSE; 416 else 417 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 418 } 419 420 adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); 421 } 422 mutex_unlock(&adev->vcn.vcn_pg_lock); 423 } 424 425 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 426 { 427 if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 428 ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) 429 atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 430 431 atomic_dec(&ring->adev->vcn.total_submission_cnt); 432 433 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 434 } 435 436 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 437 { 438 struct amdgpu_device *adev = ring->adev; 439 uint32_t tmp = 0; 440 unsigned i; 441 int r; 442 443 /* VCN in SRIOV does not support direct register read/write */ 444 if (amdgpu_sriov_vf(adev)) 445 return 0; 446 447 WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); 448 r = amdgpu_ring_alloc(ring, 3); 449 if (r) 450 return r; 451 amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); 452 amdgpu_ring_write(ring, 0xDEADBEEF); 453 amdgpu_ring_commit(ring); 454 for (i = 0; i < adev->usec_timeout; i++) { 455 tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); 456 if (tmp == 0xDEADBEEF) 457 break; 458 udelay(1); 459 } 460 461 if (i >= adev->usec_timeout) 462 r = -ETIMEDOUT; 463 464 return r; 465 } 466 467 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) 468 { 469 struct amdgpu_device *adev = ring->adev; 470 uint32_t rptr; 471 unsigned int i; 472 int r; 473 474 if (amdgpu_sriov_vf(adev)) 475 return 0; 476 477 r = amdgpu_ring_alloc(ring, 16); 478 if (r) 479 return r; 480 481 rptr = amdgpu_ring_get_rptr(ring); 482 483 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); 484 amdgpu_ring_commit(ring); 485 486 for (i = 0; i < adev->usec_timeout; i++) { 487 if (amdgpu_ring_get_rptr(ring) != rptr) 488 break; 489 udelay(1); 490 } 491 492 if (i >= adev->usec_timeout) 493 r = -ETIMEDOUT; 494 495 return r; 496 } 497 498 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 499 struct amdgpu_bo *bo, 500 struct dma_fence **fence) 501 { 502 struct amdgpu_device *adev = ring->adev; 503 struct dma_fence *f = NULL; 504 struct amdgpu_job *job; 505 struct amdgpu_ib *ib; 506 uint64_t addr; 507 void *msg = NULL; 508 int i, r; 509 510 r = amdgpu_job_alloc_with_ib(adev, 64, 511 AMDGPU_IB_POOL_DIRECT, &job); 512 if (r) 513 goto err; 514 515 ib = &job->ibs[0]; 516 addr = amdgpu_bo_gpu_offset(bo); 517 msg = amdgpu_bo_kptr(bo); 518 ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); 519 ib->ptr[1] = addr; 520 ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); 521 ib->ptr[3] = addr >> 32; 522 ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0); 523 ib->ptr[5] = 0; 524 for (i = 6; i < 16; i += 2) { 525 ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); 526 ib->ptr[i+1] = 0; 527 } 528 ib->length_dw = 16; 529 530 r = amdgpu_job_submit_direct(job, ring, &f); 531 if (r) 532 goto err_free; 533 534 amdgpu_bo_fence(bo, f, false); 535 amdgpu_bo_unreserve(bo); 536 amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg); 537 538 if (fence) 539 *fence = dma_fence_get(f); 540 dma_fence_put(f); 541 542 return 0; 543 544 err_free: 545 amdgpu_job_free(job); 546 547 err: 548 amdgpu_bo_unreserve(bo); 549 amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg); 550 return r; 551 } 552 553 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 554 struct amdgpu_bo **bo) 555 { 556 struct amdgpu_device *adev = ring->adev; 557 uint32_t *msg; 558 int r, i; 559 560 *bo = NULL; 561 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 562 AMDGPU_GEM_DOMAIN_VRAM, 563 bo, NULL, (void **)&msg); 564 if (r) 565 return r; 566 567 msg[0] = cpu_to_le32(0x00000028); 568 msg[1] = cpu_to_le32(0x00000038); 569 msg[2] = cpu_to_le32(0x00000001); 570 msg[3] = cpu_to_le32(0x00000000); 571 msg[4] = cpu_to_le32(handle); 572 msg[5] = cpu_to_le32(0x00000000); 573 msg[6] = cpu_to_le32(0x00000001); 574 msg[7] = cpu_to_le32(0x00000028); 575 msg[8] = cpu_to_le32(0x00000010); 576 msg[9] = cpu_to_le32(0x00000000); 577 msg[10] = cpu_to_le32(0x00000007); 578 msg[11] = cpu_to_le32(0x00000000); 579 msg[12] = cpu_to_le32(0x00000780); 580 msg[13] = cpu_to_le32(0x00000440); 581 for (i = 14; i < 1024; ++i) 582 msg[i] = cpu_to_le32(0x0); 583 584 return 0; 585 } 586 587 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 588 struct amdgpu_bo **bo) 589 { 590 struct amdgpu_device *adev = ring->adev; 591 uint32_t *msg; 592 int r, i; 593 594 *bo = NULL; 595 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 596 AMDGPU_GEM_DOMAIN_VRAM, 597 bo, NULL, (void **)&msg); 598 if (r) 599 return r; 600 601 msg[0] = cpu_to_le32(0x00000028); 602 msg[1] = cpu_to_le32(0x00000018); 603 msg[2] = cpu_to_le32(0x00000000); 604 msg[3] = cpu_to_le32(0x00000002); 605 msg[4] = cpu_to_le32(handle); 606 msg[5] = cpu_to_le32(0x00000000); 607 for (i = 6; i < 1024; ++i) 608 msg[i] = cpu_to_le32(0x0); 609 610 return 0; 611 } 612 613 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 614 { 615 struct dma_fence *fence = NULL; 616 struct amdgpu_bo *bo; 617 long r; 618 619 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); 620 if (r) 621 goto error; 622 623 r = amdgpu_vcn_dec_send_msg(ring, bo, NULL); 624 if (r) 625 goto error; 626 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); 627 if (r) 628 goto error; 629 630 r = amdgpu_vcn_dec_send_msg(ring, bo, &fence); 631 if (r) 632 goto error; 633 634 r = dma_fence_wait_timeout(fence, false, timeout); 635 if (r == 0) 636 r = -ETIMEDOUT; 637 else if (r > 0) 638 r = 0; 639 640 dma_fence_put(fence); 641 error: 642 return r; 643 } 644 645 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, 646 struct amdgpu_bo *bo, 647 struct dma_fence **fence) 648 { 649 struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; 650 const unsigned int ib_size_dw = 64; 651 struct amdgpu_device *adev = ring->adev; 652 struct dma_fence *f = NULL; 653 struct amdgpu_job *job; 654 struct amdgpu_ib *ib; 655 uint64_t addr; 656 int i, r; 657 658 r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, 659 AMDGPU_IB_POOL_DIRECT, &job); 660 if (r) 661 goto err; 662 663 ib = &job->ibs[0]; 664 addr = amdgpu_bo_gpu_offset(bo); 665 ib->length_dw = 0; 666 667 ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; 668 ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); 669 decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); 670 ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; 671 memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); 672 673 decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); 674 decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); 675 decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); 676 677 for (i = ib->length_dw; i < ib_size_dw; ++i) 678 ib->ptr[i] = 0x0; 679 680 r = amdgpu_job_submit_direct(job, ring, &f); 681 if (r) 682 goto err_free; 683 684 amdgpu_bo_fence(bo, f, false); 685 amdgpu_bo_unreserve(bo); 686 amdgpu_bo_unref(&bo); 687 688 if (fence) 689 *fence = dma_fence_get(f); 690 dma_fence_put(f); 691 692 return 0; 693 694 err_free: 695 amdgpu_job_free(job); 696 697 err: 698 amdgpu_bo_unreserve(bo); 699 amdgpu_bo_unref(&bo); 700 return r; 701 } 702 703 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) 704 { 705 struct dma_fence *fence = NULL; 706 struct amdgpu_bo *bo; 707 long r; 708 709 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); 710 if (r) 711 goto error; 712 713 r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL); 714 if (r) 715 goto error; 716 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); 717 if (r) 718 goto error; 719 720 r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence); 721 if (r) 722 goto error; 723 724 r = dma_fence_wait_timeout(fence, false, timeout); 725 if (r == 0) 726 r = -ETIMEDOUT; 727 else if (r > 0) 728 r = 0; 729 730 dma_fence_put(fence); 731 error: 732 return r; 733 } 734 735 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 736 { 737 struct amdgpu_device *adev = ring->adev; 738 uint32_t rptr; 739 unsigned i; 740 int r; 741 742 if (amdgpu_sriov_vf(adev)) 743 return 0; 744 745 r = amdgpu_ring_alloc(ring, 16); 746 if (r) 747 return r; 748 749 rptr = amdgpu_ring_get_rptr(ring); 750 751 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 752 amdgpu_ring_commit(ring); 753 754 for (i = 0; i < adev->usec_timeout; i++) { 755 if (amdgpu_ring_get_rptr(ring) != rptr) 756 break; 757 udelay(1); 758 } 759 760 if (i >= adev->usec_timeout) 761 r = -ETIMEDOUT; 762 763 return r; 764 } 765 766 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 767 struct amdgpu_bo *bo, 768 struct dma_fence **fence) 769 { 770 const unsigned ib_size_dw = 16; 771 struct amdgpu_job *job; 772 struct amdgpu_ib *ib; 773 struct dma_fence *f = NULL; 774 uint64_t addr; 775 int i, r; 776 777 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, 778 AMDGPU_IB_POOL_DIRECT, &job); 779 if (r) 780 return r; 781 782 ib = &job->ibs[0]; 783 addr = amdgpu_bo_gpu_offset(bo); 784 785 ib->length_dw = 0; 786 ib->ptr[ib->length_dw++] = 0x00000018; 787 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 788 ib->ptr[ib->length_dw++] = handle; 789 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 790 ib->ptr[ib->length_dw++] = addr; 791 ib->ptr[ib->length_dw++] = 0x0000000b; 792 793 ib->ptr[ib->length_dw++] = 0x00000014; 794 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 795 ib->ptr[ib->length_dw++] = 0x0000001c; 796 ib->ptr[ib->length_dw++] = 0x00000000; 797 ib->ptr[ib->length_dw++] = 0x00000000; 798 799 ib->ptr[ib->length_dw++] = 0x00000008; 800 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 801 802 for (i = ib->length_dw; i < ib_size_dw; ++i) 803 ib->ptr[i] = 0x0; 804 805 r = amdgpu_job_submit_direct(job, ring, &f); 806 if (r) 807 goto err; 808 809 if (fence) 810 *fence = dma_fence_get(f); 811 dma_fence_put(f); 812 813 return 0; 814 815 err: 816 amdgpu_job_free(job); 817 return r; 818 } 819 820 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 821 struct amdgpu_bo *bo, 822 struct dma_fence **fence) 823 { 824 const unsigned ib_size_dw = 16; 825 struct amdgpu_job *job; 826 struct amdgpu_ib *ib; 827 struct dma_fence *f = NULL; 828 uint64_t addr; 829 int i, r; 830 831 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, 832 AMDGPU_IB_POOL_DIRECT, &job); 833 if (r) 834 return r; 835 836 ib = &job->ibs[0]; 837 addr = amdgpu_bo_gpu_offset(bo); 838 839 ib->length_dw = 0; 840 ib->ptr[ib->length_dw++] = 0x00000018; 841 ib->ptr[ib->length_dw++] = 0x00000001; 842 ib->ptr[ib->length_dw++] = handle; 843 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 844 ib->ptr[ib->length_dw++] = addr; 845 ib->ptr[ib->length_dw++] = 0x0000000b; 846 847 ib->ptr[ib->length_dw++] = 0x00000014; 848 ib->ptr[ib->length_dw++] = 0x00000002; 849 ib->ptr[ib->length_dw++] = 0x0000001c; 850 ib->ptr[ib->length_dw++] = 0x00000000; 851 ib->ptr[ib->length_dw++] = 0x00000000; 852 853 ib->ptr[ib->length_dw++] = 0x00000008; 854 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 855 856 for (i = ib->length_dw; i < ib_size_dw; ++i) 857 ib->ptr[i] = 0x0; 858 859 r = amdgpu_job_submit_direct(job, ring, &f); 860 if (r) 861 goto err; 862 863 if (fence) 864 *fence = dma_fence_get(f); 865 dma_fence_put(f); 866 867 return 0; 868 869 err: 870 amdgpu_job_free(job); 871 return r; 872 } 873 874 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 875 { 876 struct dma_fence *fence = NULL; 877 struct amdgpu_bo *bo = NULL; 878 long r; 879 880 r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, 881 AMDGPU_GEM_DOMAIN_VRAM, 882 &bo, NULL, NULL); 883 if (r) 884 return r; 885 886 r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL); 887 if (r) 888 goto error; 889 890 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence); 891 if (r) 892 goto error; 893 894 r = dma_fence_wait_timeout(fence, false, timeout); 895 if (r == 0) 896 r = -ETIMEDOUT; 897 else if (r > 0) 898 r = 0; 899 900 error: 901 dma_fence_put(fence); 902 amdgpu_bo_unreserve(bo); 903 amdgpu_bo_free_kernel(&bo, NULL, NULL); 904 905 return r; 906 } 907