1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <linux/pci.h> 30 31 #include "amdgpu.h" 32 #include "amdgpu_pm.h" 33 #include "amdgpu_vcn.h" 34 #include "soc15d.h" 35 36 /* Firmware Names */ 37 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 38 #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" 39 #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" 40 #define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" 41 #define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" 42 #define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin" 43 #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" 44 #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" 45 #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" 46 #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" 47 #define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" 48 #define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin" 49 #define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin" 50 #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" 51 #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" 52 53 MODULE_FIRMWARE(FIRMWARE_RAVEN); 54 MODULE_FIRMWARE(FIRMWARE_PICASSO); 55 MODULE_FIRMWARE(FIRMWARE_RAVEN2); 56 MODULE_FIRMWARE(FIRMWARE_ARCTURUS); 57 MODULE_FIRMWARE(FIRMWARE_RENOIR); 58 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE); 59 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN); 60 MODULE_FIRMWARE(FIRMWARE_NAVI10); 61 MODULE_FIRMWARE(FIRMWARE_NAVI14); 62 MODULE_FIRMWARE(FIRMWARE_NAVI12); 63 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); 64 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); 65 MODULE_FIRMWARE(FIRMWARE_VANGOGH); 66 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); 67 MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); 68 69 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 70 71 int amdgpu_vcn_sw_init(struct amdgpu_device *adev) 72 { 73 unsigned long bo_size; 74 const char *fw_name; 75 const struct common_firmware_header *hdr; 76 unsigned char fw_check; 77 int i, r; 78 79 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); 80 mutex_init(&adev->vcn.vcn_pg_lock); 81 mutex_init(&adev->vcn.vcn1_jpeg1_workaround); 82 atomic_set(&adev->vcn.total_submission_cnt, 0); 83 for (i = 0; i < adev->vcn.num_vcn_inst; i++) 84 atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); 85 86 switch (adev->asic_type) { 87 case CHIP_RAVEN: 88 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 89 fw_name = FIRMWARE_RAVEN2; 90 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 91 fw_name = FIRMWARE_PICASSO; 92 else 93 fw_name = FIRMWARE_RAVEN; 94 break; 95 case CHIP_ARCTURUS: 96 fw_name = FIRMWARE_ARCTURUS; 97 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 98 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 99 adev->vcn.indirect_sram = true; 100 break; 101 case CHIP_RENOIR: 102 if (adev->apu_flags & AMD_APU_IS_RENOIR) 103 fw_name = FIRMWARE_RENOIR; 104 else 105 fw_name = FIRMWARE_GREEN_SARDINE; 106 107 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 108 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 109 adev->vcn.indirect_sram = true; 110 break; 111 case CHIP_ALDEBARAN: 112 fw_name = FIRMWARE_ALDEBARAN; 113 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 114 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 115 adev->vcn.indirect_sram = true; 116 break; 117 case CHIP_NAVI10: 118 fw_name = FIRMWARE_NAVI10; 119 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 120 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 121 adev->vcn.indirect_sram = true; 122 break; 123 case CHIP_NAVI14: 124 fw_name = FIRMWARE_NAVI14; 125 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 126 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 127 adev->vcn.indirect_sram = true; 128 break; 129 case CHIP_NAVI12: 130 fw_name = FIRMWARE_NAVI12; 131 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 132 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 133 adev->vcn.indirect_sram = true; 134 break; 135 case CHIP_SIENNA_CICHLID: 136 fw_name = FIRMWARE_SIENNA_CICHLID; 137 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 138 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 139 adev->vcn.indirect_sram = true; 140 break; 141 case CHIP_NAVY_FLOUNDER: 142 fw_name = FIRMWARE_NAVY_FLOUNDER; 143 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 144 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 145 adev->vcn.indirect_sram = true; 146 break; 147 case CHIP_VANGOGH: 148 fw_name = FIRMWARE_VANGOGH; 149 break; 150 case CHIP_DIMGREY_CAVEFISH: 151 fw_name = FIRMWARE_DIMGREY_CAVEFISH; 152 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 153 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 154 adev->vcn.indirect_sram = true; 155 break; 156 case CHIP_BEIGE_GOBY: 157 fw_name = FIRMWARE_BEIGE_GOBY; 158 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 159 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 160 adev->vcn.indirect_sram = true; 161 break; 162 default: 163 return -EINVAL; 164 } 165 166 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); 167 if (r) { 168 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", 169 fw_name); 170 return r; 171 } 172 173 r = amdgpu_ucode_validate(adev->vcn.fw); 174 if (r) { 175 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", 176 fw_name); 177 release_firmware(adev->vcn.fw); 178 adev->vcn.fw = NULL; 179 return r; 180 } 181 182 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 183 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 184 185 /* Bit 20-23, it is encode major and non-zero for new naming convention. 186 * This field is part of version minor and DRM_DISABLED_FLAG in old naming 187 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG 188 * is zero in old naming convention, this field is always zero so far. 189 * These four bits are used to tell which naming convention is present. 190 */ 191 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; 192 if (fw_check) { 193 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; 194 195 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; 196 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; 197 enc_major = fw_check; 198 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; 199 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; 200 DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", 201 enc_major, enc_minor, dec_ver, vep, fw_rev); 202 } else { 203 unsigned int version_major, version_minor, family_id; 204 205 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 206 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 207 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 208 DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n", 209 version_major, version_minor, family_id); 210 } 211 212 bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; 213 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 214 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 215 bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 216 217 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 218 if (adev->vcn.harvest_config & (1 << i)) 219 continue; 220 221 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 222 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, 223 &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); 224 if (r) { 225 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 226 return r; 227 } 228 229 adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr + 230 bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 231 adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr + 232 bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 233 234 if (adev->vcn.indirect_sram) { 235 r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, 236 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, 237 &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); 238 if (r) { 239 dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); 240 return r; 241 } 242 } 243 } 244 245 return 0; 246 } 247 248 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) 249 { 250 int i, j; 251 252 cancel_delayed_work_sync(&adev->vcn.idle_work); 253 254 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { 255 if (adev->vcn.harvest_config & (1 << j)) 256 continue; 257 258 if (adev->vcn.indirect_sram) { 259 amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, 260 &adev->vcn.inst[j].dpg_sram_gpu_addr, 261 (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); 262 } 263 kvfree(adev->vcn.inst[j].saved_bo); 264 265 amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, 266 &adev->vcn.inst[j].gpu_addr, 267 (void **)&adev->vcn.inst[j].cpu_addr); 268 269 amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); 270 271 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 272 amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); 273 } 274 275 release_firmware(adev->vcn.fw); 276 mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); 277 mutex_destroy(&adev->vcn.vcn_pg_lock); 278 279 return 0; 280 } 281 282 int amdgpu_vcn_suspend(struct amdgpu_device *adev) 283 { 284 unsigned size; 285 void *ptr; 286 int i; 287 288 cancel_delayed_work_sync(&adev->vcn.idle_work); 289 290 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 291 if (adev->vcn.harvest_config & (1 << i)) 292 continue; 293 if (adev->vcn.inst[i].vcpu_bo == NULL) 294 return 0; 295 296 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 297 ptr = adev->vcn.inst[i].cpu_addr; 298 299 adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); 300 if (!adev->vcn.inst[i].saved_bo) 301 return -ENOMEM; 302 303 memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); 304 } 305 return 0; 306 } 307 308 int amdgpu_vcn_resume(struct amdgpu_device *adev) 309 { 310 unsigned size; 311 void *ptr; 312 int i; 313 314 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 315 if (adev->vcn.harvest_config & (1 << i)) 316 continue; 317 if (adev->vcn.inst[i].vcpu_bo == NULL) 318 return -EINVAL; 319 320 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 321 ptr = adev->vcn.inst[i].cpu_addr; 322 323 if (adev->vcn.inst[i].saved_bo != NULL) { 324 memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); 325 kvfree(adev->vcn.inst[i].saved_bo); 326 adev->vcn.inst[i].saved_bo = NULL; 327 } else { 328 const struct common_firmware_header *hdr; 329 unsigned offset; 330 331 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 332 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 333 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 334 memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, 335 le32_to_cpu(hdr->ucode_size_bytes)); 336 size -= le32_to_cpu(hdr->ucode_size_bytes); 337 ptr += le32_to_cpu(hdr->ucode_size_bytes); 338 } 339 memset_io(ptr, 0, size); 340 } 341 } 342 return 0; 343 } 344 345 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 346 { 347 struct amdgpu_device *adev = 348 container_of(work, struct amdgpu_device, vcn.idle_work.work); 349 unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; 350 unsigned int i, j; 351 int r = 0; 352 353 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { 354 if (adev->vcn.harvest_config & (1 << j)) 355 continue; 356 357 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 358 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); 359 } 360 361 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 362 struct dpg_pause_state new_state; 363 364 if (fence[j] || 365 unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt))) 366 new_state.fw_based = VCN_DPG_STATE__PAUSE; 367 else 368 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 369 370 adev->vcn.pause_dpg_mode(adev, j, &new_state); 371 } 372 373 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); 374 fences += fence[j]; 375 } 376 377 if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { 378 amdgpu_gfx_off_ctrl(adev, true); 379 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 380 AMD_PG_STATE_GATE); 381 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 382 false); 383 if (r) 384 dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 385 } else { 386 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 387 } 388 } 389 390 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 391 { 392 struct amdgpu_device *adev = ring->adev; 393 int r = 0; 394 395 atomic_inc(&adev->vcn.total_submission_cnt); 396 397 if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { 398 amdgpu_gfx_off_ctrl(adev, false); 399 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 400 true); 401 if (r) 402 dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); 403 } 404 405 mutex_lock(&adev->vcn.vcn_pg_lock); 406 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 407 AMD_PG_STATE_UNGATE); 408 409 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 410 struct dpg_pause_state new_state; 411 412 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { 413 atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 414 new_state.fw_based = VCN_DPG_STATE__PAUSE; 415 } else { 416 unsigned int fences = 0; 417 unsigned int i; 418 419 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 420 fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); 421 422 if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt)) 423 new_state.fw_based = VCN_DPG_STATE__PAUSE; 424 else 425 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 426 } 427 428 adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); 429 } 430 mutex_unlock(&adev->vcn.vcn_pg_lock); 431 } 432 433 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 434 { 435 if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 436 ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) 437 atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 438 439 atomic_dec(&ring->adev->vcn.total_submission_cnt); 440 441 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 442 } 443 444 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 445 { 446 struct amdgpu_device *adev = ring->adev; 447 uint32_t tmp = 0; 448 unsigned i; 449 int r; 450 451 /* VCN in SRIOV does not support direct register read/write */ 452 if (amdgpu_sriov_vf(adev)) 453 return 0; 454 455 WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); 456 r = amdgpu_ring_alloc(ring, 3); 457 if (r) 458 return r; 459 amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); 460 amdgpu_ring_write(ring, 0xDEADBEEF); 461 amdgpu_ring_commit(ring); 462 for (i = 0; i < adev->usec_timeout; i++) { 463 tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); 464 if (tmp == 0xDEADBEEF) 465 break; 466 udelay(1); 467 } 468 469 if (i >= adev->usec_timeout) 470 r = -ETIMEDOUT; 471 472 return r; 473 } 474 475 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) 476 { 477 struct amdgpu_device *adev = ring->adev; 478 uint32_t rptr; 479 unsigned int i; 480 int r; 481 482 if (amdgpu_sriov_vf(adev)) 483 return 0; 484 485 r = amdgpu_ring_alloc(ring, 16); 486 if (r) 487 return r; 488 489 rptr = amdgpu_ring_get_rptr(ring); 490 491 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); 492 amdgpu_ring_commit(ring); 493 494 for (i = 0; i < adev->usec_timeout; i++) { 495 if (amdgpu_ring_get_rptr(ring) != rptr) 496 break; 497 udelay(1); 498 } 499 500 if (i >= adev->usec_timeout) 501 r = -ETIMEDOUT; 502 503 return r; 504 } 505 506 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 507 struct amdgpu_bo *bo, 508 struct dma_fence **fence) 509 { 510 struct amdgpu_device *adev = ring->adev; 511 struct dma_fence *f = NULL; 512 struct amdgpu_job *job; 513 struct amdgpu_ib *ib; 514 uint64_t addr; 515 void *msg = NULL; 516 int i, r; 517 518 r = amdgpu_job_alloc_with_ib(adev, 64, 519 AMDGPU_IB_POOL_DIRECT, &job); 520 if (r) 521 goto err; 522 523 ib = &job->ibs[0]; 524 addr = amdgpu_bo_gpu_offset(bo); 525 msg = amdgpu_bo_kptr(bo); 526 ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); 527 ib->ptr[1] = addr; 528 ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); 529 ib->ptr[3] = addr >> 32; 530 ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0); 531 ib->ptr[5] = 0; 532 for (i = 6; i < 16; i += 2) { 533 ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); 534 ib->ptr[i+1] = 0; 535 } 536 ib->length_dw = 16; 537 538 r = amdgpu_job_submit_direct(job, ring, &f); 539 if (r) 540 goto err_free; 541 542 amdgpu_bo_fence(bo, f, false); 543 amdgpu_bo_unreserve(bo); 544 amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg); 545 546 if (fence) 547 *fence = dma_fence_get(f); 548 dma_fence_put(f); 549 550 return 0; 551 552 err_free: 553 amdgpu_job_free(job); 554 555 err: 556 amdgpu_bo_unreserve(bo); 557 amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg); 558 return r; 559 } 560 561 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 562 struct amdgpu_bo **bo) 563 { 564 struct amdgpu_device *adev = ring->adev; 565 uint32_t *msg; 566 int r, i; 567 568 *bo = NULL; 569 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 570 AMDGPU_GEM_DOMAIN_VRAM, 571 bo, NULL, (void **)&msg); 572 if (r) 573 return r; 574 575 msg[0] = cpu_to_le32(0x00000028); 576 msg[1] = cpu_to_le32(0x00000038); 577 msg[2] = cpu_to_le32(0x00000001); 578 msg[3] = cpu_to_le32(0x00000000); 579 msg[4] = cpu_to_le32(handle); 580 msg[5] = cpu_to_le32(0x00000000); 581 msg[6] = cpu_to_le32(0x00000001); 582 msg[7] = cpu_to_le32(0x00000028); 583 msg[8] = cpu_to_le32(0x00000010); 584 msg[9] = cpu_to_le32(0x00000000); 585 msg[10] = cpu_to_le32(0x00000007); 586 msg[11] = cpu_to_le32(0x00000000); 587 msg[12] = cpu_to_le32(0x00000780); 588 msg[13] = cpu_to_le32(0x00000440); 589 for (i = 14; i < 1024; ++i) 590 msg[i] = cpu_to_le32(0x0); 591 592 return 0; 593 } 594 595 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 596 struct amdgpu_bo **bo) 597 { 598 struct amdgpu_device *adev = ring->adev; 599 uint32_t *msg; 600 int r, i; 601 602 *bo = NULL; 603 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 604 AMDGPU_GEM_DOMAIN_VRAM, 605 bo, NULL, (void **)&msg); 606 if (r) 607 return r; 608 609 msg[0] = cpu_to_le32(0x00000028); 610 msg[1] = cpu_to_le32(0x00000018); 611 msg[2] = cpu_to_le32(0x00000000); 612 msg[3] = cpu_to_le32(0x00000002); 613 msg[4] = cpu_to_le32(handle); 614 msg[5] = cpu_to_le32(0x00000000); 615 for (i = 6; i < 1024; ++i) 616 msg[i] = cpu_to_le32(0x0); 617 618 return 0; 619 } 620 621 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 622 { 623 struct dma_fence *fence = NULL; 624 struct amdgpu_bo *bo; 625 long r; 626 627 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); 628 if (r) 629 goto error; 630 631 r = amdgpu_vcn_dec_send_msg(ring, bo, NULL); 632 if (r) 633 goto error; 634 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); 635 if (r) 636 goto error; 637 638 r = amdgpu_vcn_dec_send_msg(ring, bo, &fence); 639 if (r) 640 goto error; 641 642 r = dma_fence_wait_timeout(fence, false, timeout); 643 if (r == 0) 644 r = -ETIMEDOUT; 645 else if (r > 0) 646 r = 0; 647 648 dma_fence_put(fence); 649 error: 650 return r; 651 } 652 653 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, 654 struct amdgpu_bo *bo, 655 struct dma_fence **fence) 656 { 657 struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; 658 const unsigned int ib_size_dw = 64; 659 struct amdgpu_device *adev = ring->adev; 660 struct dma_fence *f = NULL; 661 struct amdgpu_job *job; 662 struct amdgpu_ib *ib; 663 uint64_t addr; 664 int i, r; 665 666 r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, 667 AMDGPU_IB_POOL_DIRECT, &job); 668 if (r) 669 goto err; 670 671 ib = &job->ibs[0]; 672 addr = amdgpu_bo_gpu_offset(bo); 673 ib->length_dw = 0; 674 675 ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; 676 ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); 677 decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); 678 ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; 679 memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); 680 681 decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); 682 decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); 683 decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); 684 685 for (i = ib->length_dw; i < ib_size_dw; ++i) 686 ib->ptr[i] = 0x0; 687 688 r = amdgpu_job_submit_direct(job, ring, &f); 689 if (r) 690 goto err_free; 691 692 amdgpu_bo_fence(bo, f, false); 693 amdgpu_bo_unreserve(bo); 694 amdgpu_bo_unref(&bo); 695 696 if (fence) 697 *fence = dma_fence_get(f); 698 dma_fence_put(f); 699 700 return 0; 701 702 err_free: 703 amdgpu_job_free(job); 704 705 err: 706 amdgpu_bo_unreserve(bo); 707 amdgpu_bo_unref(&bo); 708 return r; 709 } 710 711 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) 712 { 713 struct dma_fence *fence = NULL; 714 struct amdgpu_bo *bo; 715 long r; 716 717 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); 718 if (r) 719 goto error; 720 721 r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL); 722 if (r) 723 goto error; 724 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); 725 if (r) 726 goto error; 727 728 r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence); 729 if (r) 730 goto error; 731 732 r = dma_fence_wait_timeout(fence, false, timeout); 733 if (r == 0) 734 r = -ETIMEDOUT; 735 else if (r > 0) 736 r = 0; 737 738 dma_fence_put(fence); 739 error: 740 return r; 741 } 742 743 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 744 { 745 struct amdgpu_device *adev = ring->adev; 746 uint32_t rptr; 747 unsigned i; 748 int r; 749 750 if (amdgpu_sriov_vf(adev)) 751 return 0; 752 753 r = amdgpu_ring_alloc(ring, 16); 754 if (r) 755 return r; 756 757 rptr = amdgpu_ring_get_rptr(ring); 758 759 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 760 amdgpu_ring_commit(ring); 761 762 for (i = 0; i < adev->usec_timeout; i++) { 763 if (amdgpu_ring_get_rptr(ring) != rptr) 764 break; 765 udelay(1); 766 } 767 768 if (i >= adev->usec_timeout) 769 r = -ETIMEDOUT; 770 771 return r; 772 } 773 774 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 775 struct amdgpu_bo *bo, 776 struct dma_fence **fence) 777 { 778 const unsigned ib_size_dw = 16; 779 struct amdgpu_job *job; 780 struct amdgpu_ib *ib; 781 struct dma_fence *f = NULL; 782 uint64_t addr; 783 int i, r; 784 785 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, 786 AMDGPU_IB_POOL_DIRECT, &job); 787 if (r) 788 return r; 789 790 ib = &job->ibs[0]; 791 addr = amdgpu_bo_gpu_offset(bo); 792 793 ib->length_dw = 0; 794 ib->ptr[ib->length_dw++] = 0x00000018; 795 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 796 ib->ptr[ib->length_dw++] = handle; 797 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 798 ib->ptr[ib->length_dw++] = addr; 799 ib->ptr[ib->length_dw++] = 0x0000000b; 800 801 ib->ptr[ib->length_dw++] = 0x00000014; 802 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 803 ib->ptr[ib->length_dw++] = 0x0000001c; 804 ib->ptr[ib->length_dw++] = 0x00000000; 805 ib->ptr[ib->length_dw++] = 0x00000000; 806 807 ib->ptr[ib->length_dw++] = 0x00000008; 808 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 809 810 for (i = ib->length_dw; i < ib_size_dw; ++i) 811 ib->ptr[i] = 0x0; 812 813 r = amdgpu_job_submit_direct(job, ring, &f); 814 if (r) 815 goto err; 816 817 if (fence) 818 *fence = dma_fence_get(f); 819 dma_fence_put(f); 820 821 return 0; 822 823 err: 824 amdgpu_job_free(job); 825 return r; 826 } 827 828 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 829 struct amdgpu_bo *bo, 830 struct dma_fence **fence) 831 { 832 const unsigned ib_size_dw = 16; 833 struct amdgpu_job *job; 834 struct amdgpu_ib *ib; 835 struct dma_fence *f = NULL; 836 uint64_t addr; 837 int i, r; 838 839 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, 840 AMDGPU_IB_POOL_DIRECT, &job); 841 if (r) 842 return r; 843 844 ib = &job->ibs[0]; 845 addr = amdgpu_bo_gpu_offset(bo); 846 847 ib->length_dw = 0; 848 ib->ptr[ib->length_dw++] = 0x00000018; 849 ib->ptr[ib->length_dw++] = 0x00000001; 850 ib->ptr[ib->length_dw++] = handle; 851 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 852 ib->ptr[ib->length_dw++] = addr; 853 ib->ptr[ib->length_dw++] = 0x0000000b; 854 855 ib->ptr[ib->length_dw++] = 0x00000014; 856 ib->ptr[ib->length_dw++] = 0x00000002; 857 ib->ptr[ib->length_dw++] = 0x0000001c; 858 ib->ptr[ib->length_dw++] = 0x00000000; 859 ib->ptr[ib->length_dw++] = 0x00000000; 860 861 ib->ptr[ib->length_dw++] = 0x00000008; 862 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 863 864 for (i = ib->length_dw; i < ib_size_dw; ++i) 865 ib->ptr[i] = 0x0; 866 867 r = amdgpu_job_submit_direct(job, ring, &f); 868 if (r) 869 goto err; 870 871 if (fence) 872 *fence = dma_fence_get(f); 873 dma_fence_put(f); 874 875 return 0; 876 877 err: 878 amdgpu_job_free(job); 879 return r; 880 } 881 882 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 883 { 884 struct dma_fence *fence = NULL; 885 struct amdgpu_bo *bo = NULL; 886 long r; 887 888 r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE, 889 AMDGPU_GEM_DOMAIN_VRAM, 890 &bo, NULL, NULL); 891 if (r) 892 return r; 893 894 r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL); 895 if (r) 896 goto error; 897 898 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence); 899 if (r) 900 goto error; 901 902 r = dma_fence_wait_timeout(fence, false, timeout); 903 if (r == 0) 904 r = -ETIMEDOUT; 905 else if (r > 0) 906 r = 0; 907 908 error: 909 dma_fence_put(fence); 910 amdgpu_bo_unreserve(bo); 911 amdgpu_bo_free_kernel(&bo, NULL, NULL); 912 913 return r; 914 } 915