1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <drm/drmP.h> 30 #include <drm/drm.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_pm.h" 34 #include "amdgpu_vcn.h" 35 #include "soc15d.h" 36 #include "soc15_common.h" 37 38 #include "vcn/vcn_1_0_offset.h" 39 40 /* 1 second timeout */ 41 #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) 42 43 /* Firmware Names */ 44 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 45 46 MODULE_FIRMWARE(FIRMWARE_RAVEN); 47 48 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 49 50 int amdgpu_vcn_sw_init(struct amdgpu_device *adev) 51 { 52 unsigned long bo_size; 53 const char *fw_name; 54 const struct common_firmware_header *hdr; 55 unsigned version_major, version_minor, family_id; 56 int r; 57 58 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); 59 60 switch (adev->asic_type) { 61 case CHIP_RAVEN: 62 fw_name = FIRMWARE_RAVEN; 63 break; 64 default: 65 return -EINVAL; 66 } 67 68 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); 69 if (r) { 70 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", 71 fw_name); 72 return r; 73 } 74 75 r = amdgpu_ucode_validate(adev->vcn.fw); 76 if (r) { 77 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", 78 fw_name); 79 release_firmware(adev->vcn.fw); 80 adev->vcn.fw = NULL; 81 return r; 82 } 83 84 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 85 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 86 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 87 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 88 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 89 DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", 90 version_major, version_minor, family_id); 91 92 93 bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) 94 + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE 95 + AMDGPU_VCN_SESSION_SIZE * 40; 96 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 97 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, 98 &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); 99 if (r) { 100 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 101 return r; 102 } 103 104 return 0; 105 } 106 107 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) 108 { 109 int i; 110 111 kfree(adev->vcn.saved_bo); 112 113 amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, 114 &adev->vcn.gpu_addr, 115 (void **)&adev->vcn.cpu_addr); 116 117 amdgpu_ring_fini(&adev->vcn.ring_dec); 118 119 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 120 amdgpu_ring_fini(&adev->vcn.ring_enc[i]); 121 122 release_firmware(adev->vcn.fw); 123 124 return 0; 125 } 126 127 int amdgpu_vcn_suspend(struct amdgpu_device *adev) 128 { 129 unsigned size; 130 void *ptr; 131 132 if (adev->vcn.vcpu_bo == NULL) 133 return 0; 134 135 cancel_delayed_work_sync(&adev->vcn.idle_work); 136 137 size = amdgpu_bo_size(adev->vcn.vcpu_bo); 138 ptr = adev->vcn.cpu_addr; 139 140 adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); 141 if (!adev->vcn.saved_bo) 142 return -ENOMEM; 143 144 memcpy_fromio(adev->vcn.saved_bo, ptr, size); 145 146 return 0; 147 } 148 149 int amdgpu_vcn_resume(struct amdgpu_device *adev) 150 { 151 unsigned size; 152 void *ptr; 153 154 if (adev->vcn.vcpu_bo == NULL) 155 return -EINVAL; 156 157 size = amdgpu_bo_size(adev->vcn.vcpu_bo); 158 ptr = adev->vcn.cpu_addr; 159 160 if (adev->vcn.saved_bo != NULL) { 161 memcpy_toio(ptr, adev->vcn.saved_bo, size); 162 kfree(adev->vcn.saved_bo); 163 adev->vcn.saved_bo = NULL; 164 } else { 165 const struct common_firmware_header *hdr; 166 unsigned offset; 167 168 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 169 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 170 memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, 171 le32_to_cpu(hdr->ucode_size_bytes)); 172 size -= le32_to_cpu(hdr->ucode_size_bytes); 173 ptr += le32_to_cpu(hdr->ucode_size_bytes); 174 memset_io(ptr, 0, size); 175 } 176 177 return 0; 178 } 179 180 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 181 { 182 struct amdgpu_device *adev = 183 container_of(work, struct amdgpu_device, vcn.idle_work.work); 184 unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); 185 unsigned i; 186 187 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 188 fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); 189 } 190 191 if (fences == 0) { 192 if (adev->pm.dpm_enabled) 193 amdgpu_dpm_enable_uvd(adev, false); 194 else 195 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 196 AMD_PG_STATE_GATE); 197 } else { 198 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 199 } 200 } 201 202 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 203 { 204 struct amdgpu_device *adev = ring->adev; 205 bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); 206 207 if (set_clocks && adev->pm.dpm_enabled) { 208 if (adev->pm.dpm_enabled) 209 amdgpu_dpm_enable_uvd(adev, true); 210 else 211 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 212 AMD_PG_STATE_UNGATE); 213 } 214 } 215 216 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 217 { 218 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 219 } 220 221 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 222 { 223 struct amdgpu_device *adev = ring->adev; 224 uint32_t tmp = 0; 225 unsigned i; 226 int r; 227 228 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); 229 r = amdgpu_ring_alloc(ring, 3); 230 if (r) { 231 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 232 ring->idx, r); 233 return r; 234 } 235 amdgpu_ring_write(ring, 236 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); 237 amdgpu_ring_write(ring, 0xDEADBEEF); 238 amdgpu_ring_commit(ring); 239 for (i = 0; i < adev->usec_timeout; i++) { 240 tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); 241 if (tmp == 0xDEADBEEF) 242 break; 243 DRM_UDELAY(1); 244 } 245 246 if (i < adev->usec_timeout) { 247 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 248 ring->idx, i); 249 } else { 250 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 251 ring->idx, tmp); 252 r = -EINVAL; 253 } 254 return r; 255 } 256 257 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 258 struct amdgpu_bo *bo, 259 struct dma_fence **fence) 260 { 261 struct amdgpu_device *adev = ring->adev; 262 struct dma_fence *f = NULL; 263 struct amdgpu_job *job; 264 struct amdgpu_ib *ib; 265 uint64_t addr; 266 int i, r; 267 268 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 269 if (r) 270 goto err; 271 272 ib = &job->ibs[0]; 273 addr = amdgpu_bo_gpu_offset(bo); 274 ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); 275 ib->ptr[1] = addr; 276 ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); 277 ib->ptr[3] = addr >> 32; 278 ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); 279 ib->ptr[5] = 0; 280 for (i = 6; i < 16; i += 2) { 281 ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); 282 ib->ptr[i+1] = 0; 283 } 284 ib->length_dw = 16; 285 286 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 287 job->fence = dma_fence_get(f); 288 if (r) 289 goto err_free; 290 291 amdgpu_job_free(job); 292 293 amdgpu_bo_fence(bo, f, false); 294 amdgpu_bo_unreserve(bo); 295 amdgpu_bo_unref(&bo); 296 297 if (fence) 298 *fence = dma_fence_get(f); 299 dma_fence_put(f); 300 301 return 0; 302 303 err_free: 304 amdgpu_job_free(job); 305 306 err: 307 amdgpu_bo_unreserve(bo); 308 amdgpu_bo_unref(&bo); 309 return r; 310 } 311 312 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 313 struct dma_fence **fence) 314 { 315 struct amdgpu_device *adev = ring->adev; 316 struct amdgpu_bo *bo = NULL; 317 uint32_t *msg; 318 int r, i; 319 320 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 321 AMDGPU_GEM_DOMAIN_VRAM, 322 &bo, NULL, (void **)&msg); 323 if (r) 324 return r; 325 326 msg[0] = cpu_to_le32(0x00000028); 327 msg[1] = cpu_to_le32(0x00000038); 328 msg[2] = cpu_to_le32(0x00000001); 329 msg[3] = cpu_to_le32(0x00000000); 330 msg[4] = cpu_to_le32(handle); 331 msg[5] = cpu_to_le32(0x00000000); 332 msg[6] = cpu_to_le32(0x00000001); 333 msg[7] = cpu_to_le32(0x00000028); 334 msg[8] = cpu_to_le32(0x00000010); 335 msg[9] = cpu_to_le32(0x00000000); 336 msg[10] = cpu_to_le32(0x00000007); 337 msg[11] = cpu_to_le32(0x00000000); 338 msg[12] = cpu_to_le32(0x00000780); 339 msg[13] = cpu_to_le32(0x00000440); 340 for (i = 14; i < 1024; ++i) 341 msg[i] = cpu_to_le32(0x0); 342 343 return amdgpu_vcn_dec_send_msg(ring, bo, fence); 344 } 345 346 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 347 struct dma_fence **fence) 348 { 349 struct amdgpu_device *adev = ring->adev; 350 struct amdgpu_bo *bo = NULL; 351 uint32_t *msg; 352 int r, i; 353 354 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 355 AMDGPU_GEM_DOMAIN_VRAM, 356 &bo, NULL, (void **)&msg); 357 if (r) 358 return r; 359 360 msg[0] = cpu_to_le32(0x00000028); 361 msg[1] = cpu_to_le32(0x00000018); 362 msg[2] = cpu_to_le32(0x00000000); 363 msg[3] = cpu_to_le32(0x00000002); 364 msg[4] = cpu_to_le32(handle); 365 msg[5] = cpu_to_le32(0x00000000); 366 for (i = 6; i < 1024; ++i) 367 msg[i] = cpu_to_le32(0x0); 368 369 return amdgpu_vcn_dec_send_msg(ring, bo, fence); 370 } 371 372 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 373 { 374 struct dma_fence *fence; 375 long r; 376 377 r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); 378 if (r) { 379 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); 380 goto error; 381 } 382 383 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); 384 if (r) { 385 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); 386 goto error; 387 } 388 389 r = dma_fence_wait_timeout(fence, false, timeout); 390 if (r == 0) { 391 DRM_ERROR("amdgpu: IB test timed out.\n"); 392 r = -ETIMEDOUT; 393 } else if (r < 0) { 394 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 395 } else { 396 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 397 r = 0; 398 } 399 400 dma_fence_put(fence); 401 402 error: 403 return r; 404 } 405 406 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 407 { 408 struct amdgpu_device *adev = ring->adev; 409 uint32_t rptr = amdgpu_ring_get_rptr(ring); 410 unsigned i; 411 int r; 412 413 r = amdgpu_ring_alloc(ring, 16); 414 if (r) { 415 DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", 416 ring->idx, r); 417 return r; 418 } 419 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 420 amdgpu_ring_commit(ring); 421 422 for (i = 0; i < adev->usec_timeout; i++) { 423 if (amdgpu_ring_get_rptr(ring) != rptr) 424 break; 425 DRM_UDELAY(1); 426 } 427 428 if (i < adev->usec_timeout) { 429 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 430 ring->idx, i); 431 } else { 432 DRM_ERROR("amdgpu: ring %d test failed\n", 433 ring->idx); 434 r = -ETIMEDOUT; 435 } 436 437 return r; 438 } 439 440 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 441 struct dma_fence **fence) 442 { 443 const unsigned ib_size_dw = 16; 444 struct amdgpu_job *job; 445 struct amdgpu_ib *ib; 446 struct dma_fence *f = NULL; 447 uint64_t dummy; 448 int i, r; 449 450 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); 451 if (r) 452 return r; 453 454 ib = &job->ibs[0]; 455 dummy = ib->gpu_addr + 1024; 456 457 ib->length_dw = 0; 458 ib->ptr[ib->length_dw++] = 0x00000018; 459 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 460 ib->ptr[ib->length_dw++] = handle; 461 ib->ptr[ib->length_dw++] = upper_32_bits(dummy); 462 ib->ptr[ib->length_dw++] = dummy; 463 ib->ptr[ib->length_dw++] = 0x0000000b; 464 465 ib->ptr[ib->length_dw++] = 0x00000014; 466 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 467 ib->ptr[ib->length_dw++] = 0x0000001c; 468 ib->ptr[ib->length_dw++] = 0x00000000; 469 ib->ptr[ib->length_dw++] = 0x00000000; 470 471 ib->ptr[ib->length_dw++] = 0x00000008; 472 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 473 474 for (i = ib->length_dw; i < ib_size_dw; ++i) 475 ib->ptr[i] = 0x0; 476 477 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 478 job->fence = dma_fence_get(f); 479 if (r) 480 goto err; 481 482 amdgpu_job_free(job); 483 if (fence) 484 *fence = dma_fence_get(f); 485 dma_fence_put(f); 486 487 return 0; 488 489 err: 490 amdgpu_job_free(job); 491 return r; 492 } 493 494 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 495 struct dma_fence **fence) 496 { 497 const unsigned ib_size_dw = 16; 498 struct amdgpu_job *job; 499 struct amdgpu_ib *ib; 500 struct dma_fence *f = NULL; 501 uint64_t dummy; 502 int i, r; 503 504 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); 505 if (r) 506 return r; 507 508 ib = &job->ibs[0]; 509 dummy = ib->gpu_addr + 1024; 510 511 ib->length_dw = 0; 512 ib->ptr[ib->length_dw++] = 0x00000018; 513 ib->ptr[ib->length_dw++] = 0x00000001; 514 ib->ptr[ib->length_dw++] = handle; 515 ib->ptr[ib->length_dw++] = upper_32_bits(dummy); 516 ib->ptr[ib->length_dw++] = dummy; 517 ib->ptr[ib->length_dw++] = 0x0000000b; 518 519 ib->ptr[ib->length_dw++] = 0x00000014; 520 ib->ptr[ib->length_dw++] = 0x00000002; 521 ib->ptr[ib->length_dw++] = 0x0000001c; 522 ib->ptr[ib->length_dw++] = 0x00000000; 523 ib->ptr[ib->length_dw++] = 0x00000000; 524 525 ib->ptr[ib->length_dw++] = 0x00000008; 526 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 527 528 for (i = ib->length_dw; i < ib_size_dw; ++i) 529 ib->ptr[i] = 0x0; 530 531 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 532 job->fence = dma_fence_get(f); 533 if (r) 534 goto err; 535 536 amdgpu_job_free(job); 537 if (fence) 538 *fence = dma_fence_get(f); 539 dma_fence_put(f); 540 541 return 0; 542 543 err: 544 amdgpu_job_free(job); 545 return r; 546 } 547 548 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 549 { 550 struct dma_fence *fence = NULL; 551 long r; 552 553 r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); 554 if (r) { 555 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); 556 goto error; 557 } 558 559 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); 560 if (r) { 561 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); 562 goto error; 563 } 564 565 r = dma_fence_wait_timeout(fence, false, timeout); 566 if (r == 0) { 567 DRM_ERROR("amdgpu: IB test timed out.\n"); 568 r = -ETIMEDOUT; 569 } else if (r < 0) { 570 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 571 } else { 572 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 573 r = 0; 574 } 575 error: 576 dma_fence_put(fence); 577 return r; 578 } 579