1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <linux/pci.h> 30 #include <linux/debugfs.h> 31 #include <drm/drm_drv.h> 32 33 #include "amdgpu.h" 34 #include "amdgpu_pm.h" 35 #include "amdgpu_vcn.h" 36 #include "soc15d.h" 37 38 /* Firmware Names */ 39 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 40 #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" 41 #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" 42 #define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" 43 #define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" 44 #define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin" 45 #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" 46 #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" 47 #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" 48 #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" 49 #define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" 50 #define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin" 51 #define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin" 52 #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" 53 #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" 54 #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" 55 #define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2_vcn.bin" 56 57 MODULE_FIRMWARE(FIRMWARE_RAVEN); 58 MODULE_FIRMWARE(FIRMWARE_PICASSO); 59 MODULE_FIRMWARE(FIRMWARE_RAVEN2); 60 MODULE_FIRMWARE(FIRMWARE_ARCTURUS); 61 MODULE_FIRMWARE(FIRMWARE_RENOIR); 62 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE); 63 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN); 64 MODULE_FIRMWARE(FIRMWARE_NAVI10); 65 MODULE_FIRMWARE(FIRMWARE_NAVI14); 66 MODULE_FIRMWARE(FIRMWARE_NAVI12); 67 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); 68 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); 69 MODULE_FIRMWARE(FIRMWARE_VANGOGH); 70 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); 71 MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); 72 MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); 73 MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2); 74 75 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 76 77 int amdgpu_vcn_sw_init(struct amdgpu_device *adev) 78 { 79 unsigned long bo_size; 80 const char *fw_name; 81 const struct common_firmware_header *hdr; 82 unsigned char fw_check; 83 unsigned int fw_shared_size, log_offset; 84 int i, r; 85 86 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); 87 mutex_init(&adev->vcn.vcn_pg_lock); 88 mutex_init(&adev->vcn.vcn1_jpeg1_workaround); 89 atomic_set(&adev->vcn.total_submission_cnt, 0); 90 for (i = 0; i < adev->vcn.num_vcn_inst; i++) 91 atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); 92 93 switch (adev->ip_versions[UVD_HWIP][0]) { 94 case IP_VERSION(1, 0, 0): 95 case IP_VERSION(1, 0, 1): 96 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 97 fw_name = FIRMWARE_RAVEN2; 98 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 99 fw_name = FIRMWARE_PICASSO; 100 else 101 fw_name = FIRMWARE_RAVEN; 102 break; 103 case IP_VERSION(2, 5, 0): 104 fw_name = FIRMWARE_ARCTURUS; 105 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 106 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 107 adev->vcn.indirect_sram = true; 108 break; 109 case IP_VERSION(2, 2, 0): 110 if (adev->apu_flags & AMD_APU_IS_RENOIR) 111 fw_name = FIRMWARE_RENOIR; 112 else 113 fw_name = FIRMWARE_GREEN_SARDINE; 114 115 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 116 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 117 adev->vcn.indirect_sram = true; 118 break; 119 case IP_VERSION(2, 6, 0): 120 fw_name = FIRMWARE_ALDEBARAN; 121 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 122 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 123 adev->vcn.indirect_sram = true; 124 break; 125 case IP_VERSION(2, 0, 0): 126 fw_name = FIRMWARE_NAVI10; 127 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 128 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 129 adev->vcn.indirect_sram = true; 130 break; 131 case IP_VERSION(2, 0, 2): 132 if (adev->asic_type == CHIP_NAVI12) 133 fw_name = FIRMWARE_NAVI12; 134 else 135 fw_name = FIRMWARE_NAVI14; 136 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 137 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 138 adev->vcn.indirect_sram = true; 139 break; 140 case IP_VERSION(3, 0, 0): 141 case IP_VERSION(3, 0, 64): 142 case IP_VERSION(3, 0, 192): 143 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) 144 fw_name = FIRMWARE_SIENNA_CICHLID; 145 else 146 fw_name = FIRMWARE_NAVY_FLOUNDER; 147 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 148 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 149 adev->vcn.indirect_sram = true; 150 break; 151 case IP_VERSION(3, 0, 2): 152 fw_name = FIRMWARE_VANGOGH; 153 break; 154 case IP_VERSION(3, 0, 16): 155 fw_name = FIRMWARE_DIMGREY_CAVEFISH; 156 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 157 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 158 adev->vcn.indirect_sram = true; 159 break; 160 case IP_VERSION(3, 0, 33): 161 fw_name = FIRMWARE_BEIGE_GOBY; 162 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 163 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 164 adev->vcn.indirect_sram = true; 165 break; 166 case IP_VERSION(3, 1, 1): 167 fw_name = FIRMWARE_YELLOW_CARP; 168 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 169 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 170 adev->vcn.indirect_sram = true; 171 break; 172 case IP_VERSION(3, 1, 2): 173 fw_name = FIRMWARE_VCN_3_1_2; 174 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 175 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 176 adev->vcn.indirect_sram = true; 177 break; 178 default: 179 return -EINVAL; 180 } 181 182 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); 183 if (r) { 184 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", 185 fw_name); 186 return r; 187 } 188 189 r = amdgpu_ucode_validate(adev->vcn.fw); 190 if (r) { 191 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", 192 fw_name); 193 release_firmware(adev->vcn.fw); 194 adev->vcn.fw = NULL; 195 return r; 196 } 197 198 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 199 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 200 201 /* Bit 20-23, it is encode major and non-zero for new naming convention. 202 * This field is part of version minor and DRM_DISABLED_FLAG in old naming 203 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG 204 * is zero in old naming convention, this field is always zero so far. 205 * These four bits are used to tell which naming convention is present. 206 */ 207 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; 208 if (fw_check) { 209 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; 210 211 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; 212 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; 213 enc_major = fw_check; 214 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; 215 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; 216 DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", 217 enc_major, enc_minor, dec_ver, vep, fw_rev); 218 } else { 219 unsigned int version_major, version_minor, family_id; 220 221 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 222 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 223 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 224 DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n", 225 version_major, version_minor, family_id); 226 } 227 228 bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; 229 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 230 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 231 fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 232 log_offset = offsetof(struct amdgpu_fw_shared, fw_log); 233 bo_size += fw_shared_size; 234 235 if (amdgpu_vcnfw_log) 236 bo_size += AMDGPU_VCNFW_LOG_SIZE; 237 238 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 239 if (adev->vcn.harvest_config & (1 << i)) 240 continue; 241 242 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 243 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, 244 &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); 245 if (r) { 246 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 247 return r; 248 } 249 250 adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + 251 bo_size - fw_shared_size; 252 adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + 253 bo_size - fw_shared_size; 254 255 adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; 256 257 if (amdgpu_vcnfw_log) { 258 adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; 259 adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; 260 adev->vcn.inst[i].fw_shared.log_offset = log_offset; 261 } 262 263 if (adev->vcn.indirect_sram) { 264 r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, 265 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo, 266 &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr); 267 if (r) { 268 dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); 269 return r; 270 } 271 } 272 } 273 274 return 0; 275 } 276 277 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) 278 { 279 int i, j; 280 281 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { 282 if (adev->vcn.harvest_config & (1 << j)) 283 continue; 284 285 if (adev->vcn.indirect_sram) { 286 amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, 287 &adev->vcn.inst[j].dpg_sram_gpu_addr, 288 (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); 289 } 290 kvfree(adev->vcn.inst[j].saved_bo); 291 292 amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, 293 &adev->vcn.inst[j].gpu_addr, 294 (void **)&adev->vcn.inst[j].cpu_addr); 295 296 amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); 297 298 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 299 amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); 300 } 301 302 release_firmware(adev->vcn.fw); 303 mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); 304 mutex_destroy(&adev->vcn.vcn_pg_lock); 305 306 return 0; 307 } 308 309 bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) 310 { 311 bool ret = false; 312 int vcn_config = adev->vcn.vcn_config[vcn_instance]; 313 314 if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) { 315 ret = true; 316 } else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) { 317 ret = true; 318 } else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) { 319 ret = true; 320 } 321 322 return ret; 323 } 324 325 int amdgpu_vcn_suspend(struct amdgpu_device *adev) 326 { 327 unsigned size; 328 void *ptr; 329 int i, idx; 330 331 cancel_delayed_work_sync(&adev->vcn.idle_work); 332 333 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 334 if (adev->vcn.harvest_config & (1 << i)) 335 continue; 336 if (adev->vcn.inst[i].vcpu_bo == NULL) 337 return 0; 338 339 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 340 ptr = adev->vcn.inst[i].cpu_addr; 341 342 adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); 343 if (!adev->vcn.inst[i].saved_bo) 344 return -ENOMEM; 345 346 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 347 memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); 348 drm_dev_exit(idx); 349 } 350 } 351 return 0; 352 } 353 354 int amdgpu_vcn_resume(struct amdgpu_device *adev) 355 { 356 unsigned size; 357 void *ptr; 358 int i, idx; 359 360 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 361 if (adev->vcn.harvest_config & (1 << i)) 362 continue; 363 if (adev->vcn.inst[i].vcpu_bo == NULL) 364 return -EINVAL; 365 366 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 367 ptr = adev->vcn.inst[i].cpu_addr; 368 369 if (adev->vcn.inst[i].saved_bo != NULL) { 370 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 371 memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); 372 drm_dev_exit(idx); 373 } 374 kvfree(adev->vcn.inst[i].saved_bo); 375 adev->vcn.inst[i].saved_bo = NULL; 376 } else { 377 const struct common_firmware_header *hdr; 378 unsigned offset; 379 380 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 381 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 382 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 383 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 384 memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, 385 le32_to_cpu(hdr->ucode_size_bytes)); 386 drm_dev_exit(idx); 387 } 388 size -= le32_to_cpu(hdr->ucode_size_bytes); 389 ptr += le32_to_cpu(hdr->ucode_size_bytes); 390 } 391 memset_io(ptr, 0, size); 392 } 393 } 394 return 0; 395 } 396 397 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 398 { 399 struct amdgpu_device *adev = 400 container_of(work, struct amdgpu_device, vcn.idle_work.work); 401 unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; 402 unsigned int i, j; 403 int r = 0; 404 405 for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { 406 if (adev->vcn.harvest_config & (1 << j)) 407 continue; 408 409 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 410 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); 411 } 412 413 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 414 struct dpg_pause_state new_state; 415 416 if (fence[j] || 417 unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt))) 418 new_state.fw_based = VCN_DPG_STATE__PAUSE; 419 else 420 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 421 422 adev->vcn.pause_dpg_mode(adev, j, &new_state); 423 } 424 425 fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); 426 fences += fence[j]; 427 } 428 429 if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { 430 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 431 AMD_PG_STATE_GATE); 432 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 433 false); 434 if (r) 435 dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 436 } else { 437 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 438 } 439 } 440 441 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 442 { 443 struct amdgpu_device *adev = ring->adev; 444 int r = 0; 445 446 atomic_inc(&adev->vcn.total_submission_cnt); 447 448 if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { 449 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 450 true); 451 if (r) 452 dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); 453 } 454 455 mutex_lock(&adev->vcn.vcn_pg_lock); 456 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 457 AMD_PG_STATE_UNGATE); 458 459 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { 460 struct dpg_pause_state new_state; 461 462 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { 463 atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 464 new_state.fw_based = VCN_DPG_STATE__PAUSE; 465 } else { 466 unsigned int fences = 0; 467 unsigned int i; 468 469 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 470 fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); 471 472 if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt)) 473 new_state.fw_based = VCN_DPG_STATE__PAUSE; 474 else 475 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 476 } 477 478 adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); 479 } 480 mutex_unlock(&adev->vcn.vcn_pg_lock); 481 } 482 483 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 484 { 485 if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 486 ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) 487 atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 488 489 atomic_dec(&ring->adev->vcn.total_submission_cnt); 490 491 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 492 } 493 494 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 495 { 496 struct amdgpu_device *adev = ring->adev; 497 uint32_t tmp = 0; 498 unsigned i; 499 int r; 500 501 /* VCN in SRIOV does not support direct register read/write */ 502 if (amdgpu_sriov_vf(adev)) 503 return 0; 504 505 WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); 506 r = amdgpu_ring_alloc(ring, 3); 507 if (r) 508 return r; 509 amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); 510 amdgpu_ring_write(ring, 0xDEADBEEF); 511 amdgpu_ring_commit(ring); 512 for (i = 0; i < adev->usec_timeout; i++) { 513 tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); 514 if (tmp == 0xDEADBEEF) 515 break; 516 udelay(1); 517 } 518 519 if (i >= adev->usec_timeout) 520 r = -ETIMEDOUT; 521 522 return r; 523 } 524 525 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) 526 { 527 struct amdgpu_device *adev = ring->adev; 528 uint32_t rptr; 529 unsigned int i; 530 int r; 531 532 if (amdgpu_sriov_vf(adev)) 533 return 0; 534 535 r = amdgpu_ring_alloc(ring, 16); 536 if (r) 537 return r; 538 539 rptr = amdgpu_ring_get_rptr(ring); 540 541 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); 542 amdgpu_ring_commit(ring); 543 544 for (i = 0; i < adev->usec_timeout; i++) { 545 if (amdgpu_ring_get_rptr(ring) != rptr) 546 break; 547 udelay(1); 548 } 549 550 if (i >= adev->usec_timeout) 551 r = -ETIMEDOUT; 552 553 return r; 554 } 555 556 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 557 struct amdgpu_ib *ib_msg, 558 struct dma_fence **fence) 559 { 560 struct amdgpu_device *adev = ring->adev; 561 struct dma_fence *f = NULL; 562 struct amdgpu_job *job; 563 struct amdgpu_ib *ib; 564 uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 565 int i, r; 566 567 r = amdgpu_job_alloc_with_ib(adev, 64, 568 AMDGPU_IB_POOL_DIRECT, &job); 569 if (r) 570 goto err; 571 572 ib = &job->ibs[0]; 573 ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); 574 ib->ptr[1] = addr; 575 ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); 576 ib->ptr[3] = addr >> 32; 577 ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0); 578 ib->ptr[5] = 0; 579 for (i = 6; i < 16; i += 2) { 580 ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); 581 ib->ptr[i+1] = 0; 582 } 583 ib->length_dw = 16; 584 585 r = amdgpu_job_submit_direct(job, ring, &f); 586 if (r) 587 goto err_free; 588 589 amdgpu_ib_free(adev, ib_msg, f); 590 591 if (fence) 592 *fence = dma_fence_get(f); 593 dma_fence_put(f); 594 595 return 0; 596 597 err_free: 598 amdgpu_job_free(job); 599 err: 600 amdgpu_ib_free(adev, ib_msg, f); 601 return r; 602 } 603 604 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 605 struct amdgpu_ib *ib) 606 { 607 struct amdgpu_device *adev = ring->adev; 608 uint32_t *msg; 609 int r, i; 610 611 memset(ib, 0, sizeof(*ib)); 612 r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, 613 AMDGPU_IB_POOL_DIRECT, 614 ib); 615 if (r) 616 return r; 617 618 msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr); 619 msg[0] = cpu_to_le32(0x00000028); 620 msg[1] = cpu_to_le32(0x00000038); 621 msg[2] = cpu_to_le32(0x00000001); 622 msg[3] = cpu_to_le32(0x00000000); 623 msg[4] = cpu_to_le32(handle); 624 msg[5] = cpu_to_le32(0x00000000); 625 msg[6] = cpu_to_le32(0x00000001); 626 msg[7] = cpu_to_le32(0x00000028); 627 msg[8] = cpu_to_le32(0x00000010); 628 msg[9] = cpu_to_le32(0x00000000); 629 msg[10] = cpu_to_le32(0x00000007); 630 msg[11] = cpu_to_le32(0x00000000); 631 msg[12] = cpu_to_le32(0x00000780); 632 msg[13] = cpu_to_le32(0x00000440); 633 for (i = 14; i < 1024; ++i) 634 msg[i] = cpu_to_le32(0x0); 635 636 return 0; 637 } 638 639 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 640 struct amdgpu_ib *ib) 641 { 642 struct amdgpu_device *adev = ring->adev; 643 uint32_t *msg; 644 int r, i; 645 646 memset(ib, 0, sizeof(*ib)); 647 r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, 648 AMDGPU_IB_POOL_DIRECT, 649 ib); 650 if (r) 651 return r; 652 653 msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr); 654 msg[0] = cpu_to_le32(0x00000028); 655 msg[1] = cpu_to_le32(0x00000018); 656 msg[2] = cpu_to_le32(0x00000000); 657 msg[3] = cpu_to_le32(0x00000002); 658 msg[4] = cpu_to_le32(handle); 659 msg[5] = cpu_to_le32(0x00000000); 660 for (i = 6; i < 1024; ++i) 661 msg[i] = cpu_to_le32(0x0); 662 663 return 0; 664 } 665 666 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 667 { 668 struct dma_fence *fence = NULL; 669 struct amdgpu_ib ib; 670 long r; 671 672 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); 673 if (r) 674 goto error; 675 676 r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL); 677 if (r) 678 goto error; 679 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); 680 if (r) 681 goto error; 682 683 r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence); 684 if (r) 685 goto error; 686 687 r = dma_fence_wait_timeout(fence, false, timeout); 688 if (r == 0) 689 r = -ETIMEDOUT; 690 else if (r > 0) 691 r = 0; 692 693 dma_fence_put(fence); 694 error: 695 return r; 696 } 697 698 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, 699 struct amdgpu_ib *ib_msg, 700 struct dma_fence **fence) 701 { 702 struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; 703 const unsigned int ib_size_dw = 64; 704 struct amdgpu_device *adev = ring->adev; 705 struct dma_fence *f = NULL; 706 struct amdgpu_job *job; 707 struct amdgpu_ib *ib; 708 uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 709 int i, r; 710 711 r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, 712 AMDGPU_IB_POOL_DIRECT, &job); 713 if (r) 714 goto err; 715 716 ib = &job->ibs[0]; 717 ib->length_dw = 0; 718 719 ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; 720 ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); 721 decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); 722 ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; 723 memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); 724 725 decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); 726 decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); 727 decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); 728 729 for (i = ib->length_dw; i < ib_size_dw; ++i) 730 ib->ptr[i] = 0x0; 731 732 r = amdgpu_job_submit_direct(job, ring, &f); 733 if (r) 734 goto err_free; 735 736 amdgpu_ib_free(adev, ib_msg, f); 737 738 if (fence) 739 *fence = dma_fence_get(f); 740 dma_fence_put(f); 741 742 return 0; 743 744 err_free: 745 amdgpu_job_free(job); 746 err: 747 amdgpu_ib_free(adev, ib_msg, f); 748 return r; 749 } 750 751 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) 752 { 753 struct dma_fence *fence = NULL; 754 struct amdgpu_ib ib; 755 long r; 756 757 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); 758 if (r) 759 goto error; 760 761 r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL); 762 if (r) 763 goto error; 764 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); 765 if (r) 766 goto error; 767 768 r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence); 769 if (r) 770 goto error; 771 772 r = dma_fence_wait_timeout(fence, false, timeout); 773 if (r == 0) 774 r = -ETIMEDOUT; 775 else if (r > 0) 776 r = 0; 777 778 dma_fence_put(fence); 779 error: 780 return r; 781 } 782 783 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 784 { 785 struct amdgpu_device *adev = ring->adev; 786 uint32_t rptr; 787 unsigned i; 788 int r; 789 790 if (amdgpu_sriov_vf(adev)) 791 return 0; 792 793 r = amdgpu_ring_alloc(ring, 16); 794 if (r) 795 return r; 796 797 rptr = amdgpu_ring_get_rptr(ring); 798 799 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 800 amdgpu_ring_commit(ring); 801 802 for (i = 0; i < adev->usec_timeout; i++) { 803 if (amdgpu_ring_get_rptr(ring) != rptr) 804 break; 805 udelay(1); 806 } 807 808 if (i >= adev->usec_timeout) 809 r = -ETIMEDOUT; 810 811 return r; 812 } 813 814 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 815 struct amdgpu_ib *ib_msg, 816 struct dma_fence **fence) 817 { 818 const unsigned ib_size_dw = 16; 819 struct amdgpu_job *job; 820 struct amdgpu_ib *ib; 821 struct dma_fence *f = NULL; 822 uint64_t addr; 823 int i, r; 824 825 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, 826 AMDGPU_IB_POOL_DIRECT, &job); 827 if (r) 828 return r; 829 830 ib = &job->ibs[0]; 831 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 832 833 ib->length_dw = 0; 834 ib->ptr[ib->length_dw++] = 0x00000018; 835 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 836 ib->ptr[ib->length_dw++] = handle; 837 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 838 ib->ptr[ib->length_dw++] = addr; 839 ib->ptr[ib->length_dw++] = 0x0000000b; 840 841 ib->ptr[ib->length_dw++] = 0x00000014; 842 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 843 ib->ptr[ib->length_dw++] = 0x0000001c; 844 ib->ptr[ib->length_dw++] = 0x00000000; 845 ib->ptr[ib->length_dw++] = 0x00000000; 846 847 ib->ptr[ib->length_dw++] = 0x00000008; 848 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 849 850 for (i = ib->length_dw; i < ib_size_dw; ++i) 851 ib->ptr[i] = 0x0; 852 853 r = amdgpu_job_submit_direct(job, ring, &f); 854 if (r) 855 goto err; 856 857 if (fence) 858 *fence = dma_fence_get(f); 859 dma_fence_put(f); 860 861 return 0; 862 863 err: 864 amdgpu_job_free(job); 865 return r; 866 } 867 868 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 869 struct amdgpu_ib *ib_msg, 870 struct dma_fence **fence) 871 { 872 const unsigned ib_size_dw = 16; 873 struct amdgpu_job *job; 874 struct amdgpu_ib *ib; 875 struct dma_fence *f = NULL; 876 uint64_t addr; 877 int i, r; 878 879 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, 880 AMDGPU_IB_POOL_DIRECT, &job); 881 if (r) 882 return r; 883 884 ib = &job->ibs[0]; 885 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 886 887 ib->length_dw = 0; 888 ib->ptr[ib->length_dw++] = 0x00000018; 889 ib->ptr[ib->length_dw++] = 0x00000001; 890 ib->ptr[ib->length_dw++] = handle; 891 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 892 ib->ptr[ib->length_dw++] = addr; 893 ib->ptr[ib->length_dw++] = 0x0000000b; 894 895 ib->ptr[ib->length_dw++] = 0x00000014; 896 ib->ptr[ib->length_dw++] = 0x00000002; 897 ib->ptr[ib->length_dw++] = 0x0000001c; 898 ib->ptr[ib->length_dw++] = 0x00000000; 899 ib->ptr[ib->length_dw++] = 0x00000000; 900 901 ib->ptr[ib->length_dw++] = 0x00000008; 902 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 903 904 for (i = ib->length_dw; i < ib_size_dw; ++i) 905 ib->ptr[i] = 0x0; 906 907 r = amdgpu_job_submit_direct(job, ring, &f); 908 if (r) 909 goto err; 910 911 if (fence) 912 *fence = dma_fence_get(f); 913 dma_fence_put(f); 914 915 return 0; 916 917 err: 918 amdgpu_job_free(job); 919 return r; 920 } 921 922 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 923 { 924 struct amdgpu_device *adev = ring->adev; 925 struct dma_fence *fence = NULL; 926 struct amdgpu_ib ib; 927 long r; 928 929 memset(&ib, 0, sizeof(ib)); 930 r = amdgpu_ib_get(adev, NULL, (128 << 10) + AMDGPU_GPU_PAGE_SIZE, 931 AMDGPU_IB_POOL_DIRECT, 932 &ib); 933 if (r) 934 return r; 935 936 r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL); 937 if (r) 938 goto error; 939 940 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence); 941 if (r) 942 goto error; 943 944 r = dma_fence_wait_timeout(fence, false, timeout); 945 if (r == 0) 946 r = -ETIMEDOUT; 947 else if (r > 0) 948 r = 0; 949 950 error: 951 amdgpu_ib_free(adev, &ib, fence); 952 dma_fence_put(fence); 953 954 return r; 955 } 956 957 enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) 958 { 959 switch(ring) { 960 case 0: 961 return AMDGPU_RING_PRIO_0; 962 case 1: 963 return AMDGPU_RING_PRIO_1; 964 case 2: 965 return AMDGPU_RING_PRIO_2; 966 default: 967 return AMDGPU_RING_PRIO_0; 968 } 969 } 970 971 void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) 972 { 973 int i; 974 unsigned int idx; 975 976 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 977 const struct common_firmware_header *hdr; 978 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 979 980 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 981 if (adev->vcn.harvest_config & (1 << i)) 982 continue; 983 /* currently only support 2 FW instances */ 984 if (i >= 2) { 985 dev_info(adev->dev, "More then 2 VCN FW instances!\n"); 986 break; 987 } 988 idx = AMDGPU_UCODE_ID_VCN + i; 989 adev->firmware.ucode[idx].ucode_id = idx; 990 adev->firmware.ucode[idx].fw = adev->vcn.fw; 991 adev->firmware.fw_size += 992 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 993 } 994 dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); 995 } 996 } 997 998 /* 999 * debugfs for mapping vcn firmware log buffer. 1000 */ 1001 #if defined(CONFIG_DEBUG_FS) 1002 static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, 1003 size_t size, loff_t *pos) 1004 { 1005 struct amdgpu_vcn_inst *vcn; 1006 void *log_buf; 1007 volatile struct amdgpu_vcn_fwlog *plog; 1008 unsigned int read_pos, write_pos, available, i, read_bytes = 0; 1009 unsigned int read_num[2] = {0}; 1010 1011 vcn = file_inode(f)->i_private; 1012 if (!vcn) 1013 return -ENODEV; 1014 1015 if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log) 1016 return -EFAULT; 1017 1018 log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; 1019 1020 plog = (volatile struct amdgpu_vcn_fwlog *)log_buf; 1021 read_pos = plog->rptr; 1022 write_pos = plog->wptr; 1023 1024 if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE) 1025 return -EFAULT; 1026 1027 if (!size || (read_pos == write_pos)) 1028 return 0; 1029 1030 if (write_pos > read_pos) { 1031 available = write_pos - read_pos; 1032 read_num[0] = min(size, (size_t)available); 1033 } else { 1034 read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos; 1035 available = read_num[0] + write_pos - plog->header_size; 1036 if (size > available) 1037 read_num[1] = write_pos - plog->header_size; 1038 else if (size > read_num[0]) 1039 read_num[1] = size - read_num[0]; 1040 else 1041 read_num[0] = size; 1042 } 1043 1044 for (i = 0; i < 2; i++) { 1045 if (read_num[i]) { 1046 if (read_pos == AMDGPU_VCNFW_LOG_SIZE) 1047 read_pos = plog->header_size; 1048 if (read_num[i] == copy_to_user((buf + read_bytes), 1049 (log_buf + read_pos), read_num[i])) 1050 return -EFAULT; 1051 1052 read_bytes += read_num[i]; 1053 read_pos += read_num[i]; 1054 } 1055 } 1056 1057 plog->rptr = read_pos; 1058 *pos += read_bytes; 1059 return read_bytes; 1060 } 1061 1062 static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = { 1063 .owner = THIS_MODULE, 1064 .read = amdgpu_debugfs_vcn_fwlog_read, 1065 .llseek = default_llseek 1066 }; 1067 #endif 1068 1069 void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, 1070 struct amdgpu_vcn_inst *vcn) 1071 { 1072 #if defined(CONFIG_DEBUG_FS) 1073 struct drm_minor *minor = adev_to_drm(adev)->primary; 1074 struct dentry *root = minor->debugfs_root; 1075 char name[32]; 1076 1077 sprintf(name, "amdgpu_vcn_%d_fwlog", i); 1078 debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn, 1079 &amdgpu_debugfs_vcnfwlog_fops, 1080 AMDGPU_VCNFW_LOG_SIZE); 1081 #endif 1082 } 1083 1084 void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) 1085 { 1086 #if defined(CONFIG_DEBUG_FS) 1087 volatile uint32_t *flag = vcn->fw_shared.cpu_addr; 1088 void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; 1089 uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; 1090 volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; 1091 volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr 1092 + vcn->fw_shared.log_offset; 1093 *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); 1094 fw_log->is_enabled = 1; 1095 fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF); 1096 fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32); 1097 fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE); 1098 1099 log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog); 1100 log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE; 1101 log_buf->rptr = log_buf->header_size; 1102 log_buf->wptr = log_buf->header_size; 1103 log_buf->wrapped = 0; 1104 #endif 1105 } 1106