1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <drm/drmP.h> 30 #include <drm/drm.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_pm.h" 34 #include "amdgpu_vcn.h" 35 #include "soc15d.h" 36 #include "soc15_common.h" 37 38 #include "vcn/vcn_1_0_offset.h" 39 40 /* 1 second timeout */ 41 #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) 42 43 /* Firmware Names */ 44 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 45 46 MODULE_FIRMWARE(FIRMWARE_RAVEN); 47 48 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 49 50 int amdgpu_vcn_sw_init(struct amdgpu_device *adev) 51 { 52 struct amdgpu_ring *ring; 53 struct drm_sched_rq *rq; 54 unsigned long bo_size; 55 const char *fw_name; 56 const struct common_firmware_header *hdr; 57 unsigned version_major, version_minor, family_id; 58 int r; 59 60 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); 61 62 switch (adev->asic_type) { 63 case CHIP_RAVEN: 64 fw_name = FIRMWARE_RAVEN; 65 break; 66 default: 67 return -EINVAL; 68 } 69 70 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); 71 if (r) { 72 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", 73 fw_name); 74 return r; 75 } 76 77 r = amdgpu_ucode_validate(adev->vcn.fw); 78 if (r) { 79 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", 80 fw_name); 81 release_firmware(adev->vcn.fw); 82 adev->vcn.fw = NULL; 83 return r; 84 } 85 86 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 87 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 88 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 89 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 90 DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", 91 version_major, version_minor, family_id); 92 93 94 bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) 95 + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE 96 + AMDGPU_VCN_SESSION_SIZE * 40; 97 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 98 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, 99 &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); 100 if (r) { 101 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 102 return r; 103 } 104 105 ring = &adev->vcn.ring_dec; 106 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; 107 r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, 108 rq, NULL); 109 if (r != 0) { 110 DRM_ERROR("Failed setting up VCN dec run queue.\n"); 111 return r; 112 } 113 114 ring = &adev->vcn.ring_enc[0]; 115 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; 116 r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, 117 rq, NULL); 118 if (r != 0) { 119 DRM_ERROR("Failed setting up VCN enc run queue.\n"); 120 return r; 121 } 122 123 return 0; 124 } 125 126 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) 127 { 128 int i; 129 130 kfree(adev->vcn.saved_bo); 131 132 drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); 133 134 drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); 135 136 amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, 137 &adev->vcn.gpu_addr, 138 (void **)&adev->vcn.cpu_addr); 139 140 amdgpu_ring_fini(&adev->vcn.ring_dec); 141 142 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 143 amdgpu_ring_fini(&adev->vcn.ring_enc[i]); 144 145 release_firmware(adev->vcn.fw); 146 147 return 0; 148 } 149 150 int amdgpu_vcn_suspend(struct amdgpu_device *adev) 151 { 152 unsigned size; 153 void *ptr; 154 155 if (adev->vcn.vcpu_bo == NULL) 156 return 0; 157 158 cancel_delayed_work_sync(&adev->vcn.idle_work); 159 160 size = amdgpu_bo_size(adev->vcn.vcpu_bo); 161 ptr = adev->vcn.cpu_addr; 162 163 adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); 164 if (!adev->vcn.saved_bo) 165 return -ENOMEM; 166 167 memcpy_fromio(adev->vcn.saved_bo, ptr, size); 168 169 return 0; 170 } 171 172 int amdgpu_vcn_resume(struct amdgpu_device *adev) 173 { 174 unsigned size; 175 void *ptr; 176 177 if (adev->vcn.vcpu_bo == NULL) 178 return -EINVAL; 179 180 size = amdgpu_bo_size(adev->vcn.vcpu_bo); 181 ptr = adev->vcn.cpu_addr; 182 183 if (adev->vcn.saved_bo != NULL) { 184 memcpy_toio(ptr, adev->vcn.saved_bo, size); 185 kfree(adev->vcn.saved_bo); 186 adev->vcn.saved_bo = NULL; 187 } else { 188 const struct common_firmware_header *hdr; 189 unsigned offset; 190 191 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 192 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 193 memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, 194 le32_to_cpu(hdr->ucode_size_bytes)); 195 size -= le32_to_cpu(hdr->ucode_size_bytes); 196 ptr += le32_to_cpu(hdr->ucode_size_bytes); 197 memset_io(ptr, 0, size); 198 } 199 200 return 0; 201 } 202 203 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 204 { 205 struct amdgpu_device *adev = 206 container_of(work, struct amdgpu_device, vcn.idle_work.work); 207 unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); 208 unsigned i; 209 210 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 211 fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); 212 } 213 214 if (fences == 0) { 215 if (adev->pm.dpm_enabled) 216 amdgpu_dpm_enable_uvd(adev, false); 217 else 218 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 219 AMD_PG_STATE_GATE); 220 } else { 221 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 222 } 223 } 224 225 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 226 { 227 struct amdgpu_device *adev = ring->adev; 228 bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); 229 230 if (set_clocks && adev->pm.dpm_enabled) { 231 if (adev->pm.dpm_enabled) 232 amdgpu_dpm_enable_uvd(adev, true); 233 else 234 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 235 AMD_PG_STATE_UNGATE); 236 } 237 } 238 239 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 240 { 241 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 242 } 243 244 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 245 { 246 struct amdgpu_device *adev = ring->adev; 247 uint32_t tmp = 0; 248 unsigned i; 249 int r; 250 251 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); 252 r = amdgpu_ring_alloc(ring, 3); 253 if (r) { 254 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 255 ring->idx, r); 256 return r; 257 } 258 amdgpu_ring_write(ring, 259 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); 260 amdgpu_ring_write(ring, 0xDEADBEEF); 261 amdgpu_ring_commit(ring); 262 for (i = 0; i < adev->usec_timeout; i++) { 263 tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); 264 if (tmp == 0xDEADBEEF) 265 break; 266 DRM_UDELAY(1); 267 } 268 269 if (i < adev->usec_timeout) { 270 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 271 ring->idx, i); 272 } else { 273 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 274 ring->idx, tmp); 275 r = -EINVAL; 276 } 277 return r; 278 } 279 280 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 281 struct amdgpu_bo *bo, bool direct, 282 struct dma_fence **fence) 283 { 284 struct amdgpu_device *adev = ring->adev; 285 struct dma_fence *f = NULL; 286 struct amdgpu_job *job; 287 struct amdgpu_ib *ib; 288 uint64_t addr; 289 int i, r; 290 291 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 292 if (r) 293 goto err; 294 295 ib = &job->ibs[0]; 296 addr = amdgpu_bo_gpu_offset(bo); 297 ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); 298 ib->ptr[1] = addr; 299 ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); 300 ib->ptr[3] = addr >> 32; 301 ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); 302 ib->ptr[5] = 0; 303 for (i = 6; i < 16; i += 2) { 304 ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); 305 ib->ptr[i+1] = 0; 306 } 307 ib->length_dw = 16; 308 309 if (direct) { 310 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 311 job->fence = dma_fence_get(f); 312 if (r) 313 goto err_free; 314 315 amdgpu_job_free(job); 316 } else { 317 r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec, 318 AMDGPU_FENCE_OWNER_UNDEFINED, &f); 319 if (r) 320 goto err_free; 321 } 322 323 amdgpu_bo_fence(bo, f, false); 324 amdgpu_bo_unreserve(bo); 325 amdgpu_bo_unref(&bo); 326 327 if (fence) 328 *fence = dma_fence_get(f); 329 dma_fence_put(f); 330 331 return 0; 332 333 err_free: 334 amdgpu_job_free(job); 335 336 err: 337 amdgpu_bo_unreserve(bo); 338 amdgpu_bo_unref(&bo); 339 return r; 340 } 341 342 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 343 struct dma_fence **fence) 344 { 345 struct amdgpu_device *adev = ring->adev; 346 struct amdgpu_bo *bo = NULL; 347 uint32_t *msg; 348 int r, i; 349 350 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 351 AMDGPU_GEM_DOMAIN_VRAM, 352 &bo, NULL, (void **)&msg); 353 if (r) 354 return r; 355 356 msg[0] = cpu_to_le32(0x00000028); 357 msg[1] = cpu_to_le32(0x00000038); 358 msg[2] = cpu_to_le32(0x00000001); 359 msg[3] = cpu_to_le32(0x00000000); 360 msg[4] = cpu_to_le32(handle); 361 msg[5] = cpu_to_le32(0x00000000); 362 msg[6] = cpu_to_le32(0x00000001); 363 msg[7] = cpu_to_le32(0x00000028); 364 msg[8] = cpu_to_le32(0x00000010); 365 msg[9] = cpu_to_le32(0x00000000); 366 msg[10] = cpu_to_le32(0x00000007); 367 msg[11] = cpu_to_le32(0x00000000); 368 msg[12] = cpu_to_le32(0x00000780); 369 msg[13] = cpu_to_le32(0x00000440); 370 for (i = 14; i < 1024; ++i) 371 msg[i] = cpu_to_le32(0x0); 372 373 return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); 374 } 375 376 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 377 bool direct, struct dma_fence **fence) 378 { 379 struct amdgpu_device *adev = ring->adev; 380 struct amdgpu_bo *bo = NULL; 381 uint32_t *msg; 382 int r, i; 383 384 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 385 AMDGPU_GEM_DOMAIN_VRAM, 386 &bo, NULL, (void **)&msg); 387 if (r) 388 return r; 389 390 msg[0] = cpu_to_le32(0x00000028); 391 msg[1] = cpu_to_le32(0x00000018); 392 msg[2] = cpu_to_le32(0x00000000); 393 msg[3] = cpu_to_le32(0x00000002); 394 msg[4] = cpu_to_le32(handle); 395 msg[5] = cpu_to_le32(0x00000000); 396 for (i = 6; i < 1024; ++i) 397 msg[i] = cpu_to_le32(0x0); 398 399 return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); 400 } 401 402 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 403 { 404 struct dma_fence *fence; 405 long r; 406 407 r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); 408 if (r) { 409 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); 410 goto error; 411 } 412 413 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence); 414 if (r) { 415 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); 416 goto error; 417 } 418 419 r = dma_fence_wait_timeout(fence, false, timeout); 420 if (r == 0) { 421 DRM_ERROR("amdgpu: IB test timed out.\n"); 422 r = -ETIMEDOUT; 423 } else if (r < 0) { 424 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 425 } else { 426 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 427 r = 0; 428 } 429 430 dma_fence_put(fence); 431 432 error: 433 return r; 434 } 435 436 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 437 { 438 struct amdgpu_device *adev = ring->adev; 439 uint32_t rptr = amdgpu_ring_get_rptr(ring); 440 unsigned i; 441 int r; 442 443 r = amdgpu_ring_alloc(ring, 16); 444 if (r) { 445 DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", 446 ring->idx, r); 447 return r; 448 } 449 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 450 amdgpu_ring_commit(ring); 451 452 for (i = 0; i < adev->usec_timeout; i++) { 453 if (amdgpu_ring_get_rptr(ring) != rptr) 454 break; 455 DRM_UDELAY(1); 456 } 457 458 if (i < adev->usec_timeout) { 459 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 460 ring->idx, i); 461 } else { 462 DRM_ERROR("amdgpu: ring %d test failed\n", 463 ring->idx); 464 r = -ETIMEDOUT; 465 } 466 467 return r; 468 } 469 470 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 471 struct dma_fence **fence) 472 { 473 const unsigned ib_size_dw = 16; 474 struct amdgpu_job *job; 475 struct amdgpu_ib *ib; 476 struct dma_fence *f = NULL; 477 uint64_t dummy; 478 int i, r; 479 480 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); 481 if (r) 482 return r; 483 484 ib = &job->ibs[0]; 485 dummy = ib->gpu_addr + 1024; 486 487 ib->length_dw = 0; 488 ib->ptr[ib->length_dw++] = 0x00000018; 489 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 490 ib->ptr[ib->length_dw++] = handle; 491 ib->ptr[ib->length_dw++] = upper_32_bits(dummy); 492 ib->ptr[ib->length_dw++] = dummy; 493 ib->ptr[ib->length_dw++] = 0x0000000b; 494 495 ib->ptr[ib->length_dw++] = 0x00000014; 496 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 497 ib->ptr[ib->length_dw++] = 0x0000001c; 498 ib->ptr[ib->length_dw++] = 0x00000000; 499 ib->ptr[ib->length_dw++] = 0x00000000; 500 501 ib->ptr[ib->length_dw++] = 0x00000008; 502 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 503 504 for (i = ib->length_dw; i < ib_size_dw; ++i) 505 ib->ptr[i] = 0x0; 506 507 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 508 job->fence = dma_fence_get(f); 509 if (r) 510 goto err; 511 512 amdgpu_job_free(job); 513 if (fence) 514 *fence = dma_fence_get(f); 515 dma_fence_put(f); 516 517 return 0; 518 519 err: 520 amdgpu_job_free(job); 521 return r; 522 } 523 524 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 525 struct dma_fence **fence) 526 { 527 const unsigned ib_size_dw = 16; 528 struct amdgpu_job *job; 529 struct amdgpu_ib *ib; 530 struct dma_fence *f = NULL; 531 uint64_t dummy; 532 int i, r; 533 534 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); 535 if (r) 536 return r; 537 538 ib = &job->ibs[0]; 539 dummy = ib->gpu_addr + 1024; 540 541 ib->length_dw = 0; 542 ib->ptr[ib->length_dw++] = 0x00000018; 543 ib->ptr[ib->length_dw++] = 0x00000001; 544 ib->ptr[ib->length_dw++] = handle; 545 ib->ptr[ib->length_dw++] = upper_32_bits(dummy); 546 ib->ptr[ib->length_dw++] = dummy; 547 ib->ptr[ib->length_dw++] = 0x0000000b; 548 549 ib->ptr[ib->length_dw++] = 0x00000014; 550 ib->ptr[ib->length_dw++] = 0x00000002; 551 ib->ptr[ib->length_dw++] = 0x0000001c; 552 ib->ptr[ib->length_dw++] = 0x00000000; 553 ib->ptr[ib->length_dw++] = 0x00000000; 554 555 ib->ptr[ib->length_dw++] = 0x00000008; 556 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 557 558 for (i = ib->length_dw; i < ib_size_dw; ++i) 559 ib->ptr[i] = 0x0; 560 561 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 562 job->fence = dma_fence_get(f); 563 if (r) 564 goto err; 565 566 amdgpu_job_free(job); 567 if (fence) 568 *fence = dma_fence_get(f); 569 dma_fence_put(f); 570 571 return 0; 572 573 err: 574 amdgpu_job_free(job); 575 return r; 576 } 577 578 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 579 { 580 struct dma_fence *fence = NULL; 581 long r; 582 583 r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); 584 if (r) { 585 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); 586 goto error; 587 } 588 589 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); 590 if (r) { 591 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); 592 goto error; 593 } 594 595 r = dma_fence_wait_timeout(fence, false, timeout); 596 if (r == 0) { 597 DRM_ERROR("amdgpu: IB test timed out.\n"); 598 r = -ETIMEDOUT; 599 } else if (r < 0) { 600 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 601 } else { 602 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 603 r = 0; 604 } 605 error: 606 dma_fence_put(fence); 607 return r; 608 } 609