1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 #include <drm/drmP.h> 34 #include <drm/drm.h> 35 36 #include "amdgpu.h" 37 #include "amdgpu_pm.h" 38 #include "amdgpu_uvd.h" 39 #include "cikd.h" 40 #include "uvd/uvd_4_2_d.h" 41 42 /* 1 second timeout */ 43 #define UVD_IDLE_TIMEOUT_MS 1000 44 45 /* Firmware Names */ 46 #ifdef CONFIG_DRM_AMDGPU_CIK 47 #define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin" 48 #define FIRMWARE_KABINI "radeon/kabini_uvd.bin" 49 #define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin" 50 #define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin" 51 #define FIRMWARE_MULLINS "radeon/mullins_uvd.bin" 52 #endif 53 #define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin" 54 #define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin" 55 #define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin" 56 #define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin" 57 58 /** 59 * amdgpu_uvd_cs_ctx - Command submission parser context 60 * 61 * Used for emulating virtual memory support on UVD 4.2. 62 */ 63 struct amdgpu_uvd_cs_ctx { 64 struct amdgpu_cs_parser *parser; 65 unsigned reg, count; 66 unsigned data0, data1; 67 unsigned idx; 68 unsigned ib_idx; 69 70 /* does the IB has a msg command */ 71 bool has_msg_cmd; 72 73 /* minimum buffer sizes */ 74 unsigned *buf_sizes; 75 }; 76 77 #ifdef CONFIG_DRM_AMDGPU_CIK 78 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 79 MODULE_FIRMWARE(FIRMWARE_KABINI); 80 MODULE_FIRMWARE(FIRMWARE_KAVERI); 81 MODULE_FIRMWARE(FIRMWARE_HAWAII); 82 MODULE_FIRMWARE(FIRMWARE_MULLINS); 83 #endif 84 MODULE_FIRMWARE(FIRMWARE_TONGA); 85 MODULE_FIRMWARE(FIRMWARE_CARRIZO); 86 MODULE_FIRMWARE(FIRMWARE_FIJI); 87 MODULE_FIRMWARE(FIRMWARE_STONEY); 88 89 static void amdgpu_uvd_note_usage(struct amdgpu_device *adev); 90 static void amdgpu_uvd_idle_work_handler(struct work_struct *work); 91 92 int amdgpu_uvd_sw_init(struct amdgpu_device *adev) 93 { 94 struct amdgpu_ring *ring; 95 struct amd_sched_rq *rq; 96 unsigned long bo_size; 97 const char *fw_name; 98 const struct common_firmware_header *hdr; 99 unsigned version_major, version_minor, family_id; 100 int i, r; 101 102 INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); 103 104 switch (adev->asic_type) { 105 #ifdef CONFIG_DRM_AMDGPU_CIK 106 case CHIP_BONAIRE: 107 fw_name = FIRMWARE_BONAIRE; 108 break; 109 case CHIP_KABINI: 110 fw_name = FIRMWARE_KABINI; 111 break; 112 case CHIP_KAVERI: 113 fw_name = FIRMWARE_KAVERI; 114 break; 115 case CHIP_HAWAII: 116 fw_name = FIRMWARE_HAWAII; 117 break; 118 case CHIP_MULLINS: 119 fw_name = FIRMWARE_MULLINS; 120 break; 121 #endif 122 case CHIP_TONGA: 123 fw_name = FIRMWARE_TONGA; 124 break; 125 case CHIP_FIJI: 126 fw_name = FIRMWARE_FIJI; 127 break; 128 case CHIP_CARRIZO: 129 fw_name = FIRMWARE_CARRIZO; 130 break; 131 case CHIP_STONEY: 132 fw_name = FIRMWARE_STONEY; 133 break; 134 default: 135 return -EINVAL; 136 } 137 138 r = request_firmware(&adev->uvd.fw, fw_name, adev->dev); 139 if (r) { 140 dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n", 141 fw_name); 142 return r; 143 } 144 145 r = amdgpu_ucode_validate(adev->uvd.fw); 146 if (r) { 147 dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", 148 fw_name); 149 release_firmware(adev->uvd.fw); 150 adev->uvd.fw = NULL; 151 return r; 152 } 153 154 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 155 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 156 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 157 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 158 DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", 159 version_major, version_minor, family_id); 160 161 bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) 162 + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE; 163 r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, 164 AMDGPU_GEM_DOMAIN_VRAM, 165 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 166 NULL, NULL, &adev->uvd.vcpu_bo); 167 if (r) { 168 dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); 169 return r; 170 } 171 172 r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); 173 if (r) { 174 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 175 dev_err(adev->dev, "(%d) failed to reserve UVD bo\n", r); 176 return r; 177 } 178 179 r = amdgpu_bo_pin(adev->uvd.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, 180 &adev->uvd.gpu_addr); 181 if (r) { 182 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 183 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 184 dev_err(adev->dev, "(%d) UVD bo pin failed\n", r); 185 return r; 186 } 187 188 r = amdgpu_bo_kmap(adev->uvd.vcpu_bo, &adev->uvd.cpu_addr); 189 if (r) { 190 dev_err(adev->dev, "(%d) UVD map failed\n", r); 191 return r; 192 } 193 194 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 195 196 ring = &adev->uvd.ring; 197 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; 198 r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity, 199 rq, amdgpu_sched_jobs); 200 if (r != 0) { 201 DRM_ERROR("Failed setting up UVD run queue.\n"); 202 return r; 203 } 204 205 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 206 atomic_set(&adev->uvd.handles[i], 0); 207 adev->uvd.filp[i] = NULL; 208 } 209 210 /* from uvd v5.0 HW addressing capacity increased to 64 bits */ 211 if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) 212 adev->uvd.address_64_bit = true; 213 214 return 0; 215 } 216 217 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) 218 { 219 int r; 220 221 if (adev->uvd.vcpu_bo == NULL) 222 return 0; 223 224 amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); 225 226 r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); 227 if (!r) { 228 amdgpu_bo_kunmap(adev->uvd.vcpu_bo); 229 amdgpu_bo_unpin(adev->uvd.vcpu_bo); 230 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 231 } 232 233 amdgpu_bo_unref(&adev->uvd.vcpu_bo); 234 235 amdgpu_ring_fini(&adev->uvd.ring); 236 237 release_firmware(adev->uvd.fw); 238 239 return 0; 240 } 241 242 int amdgpu_uvd_suspend(struct amdgpu_device *adev) 243 { 244 unsigned size; 245 void *ptr; 246 int i; 247 248 if (adev->uvd.vcpu_bo == NULL) 249 return 0; 250 251 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) 252 if (atomic_read(&adev->uvd.handles[i])) 253 break; 254 255 if (i == AMDGPU_MAX_UVD_HANDLES) 256 return 0; 257 258 size = amdgpu_bo_size(adev->uvd.vcpu_bo); 259 ptr = adev->uvd.cpu_addr; 260 261 adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); 262 if (!adev->uvd.saved_bo) 263 return -ENOMEM; 264 265 memcpy(adev->uvd.saved_bo, ptr, size); 266 267 return 0; 268 } 269 270 int amdgpu_uvd_resume(struct amdgpu_device *adev) 271 { 272 unsigned size; 273 void *ptr; 274 275 if (adev->uvd.vcpu_bo == NULL) 276 return -EINVAL; 277 278 size = amdgpu_bo_size(adev->uvd.vcpu_bo); 279 ptr = adev->uvd.cpu_addr; 280 281 if (adev->uvd.saved_bo != NULL) { 282 memcpy(ptr, adev->uvd.saved_bo, size); 283 kfree(adev->uvd.saved_bo); 284 adev->uvd.saved_bo = NULL; 285 } else { 286 const struct common_firmware_header *hdr; 287 unsigned offset; 288 289 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 290 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 291 memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset, 292 (adev->uvd.fw->size) - offset); 293 size -= le32_to_cpu(hdr->ucode_size_bytes); 294 ptr += le32_to_cpu(hdr->ucode_size_bytes); 295 memset(ptr, 0, size); 296 } 297 298 return 0; 299 } 300 301 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) 302 { 303 struct amdgpu_ring *ring = &adev->uvd.ring; 304 int i, r; 305 306 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 307 uint32_t handle = atomic_read(&adev->uvd.handles[i]); 308 if (handle != 0 && adev->uvd.filp[i] == filp) { 309 struct fence *fence; 310 311 amdgpu_uvd_note_usage(adev); 312 313 r = amdgpu_uvd_get_destroy_msg(ring, handle, 314 false, &fence); 315 if (r) { 316 DRM_ERROR("Error destroying UVD (%d)!\n", r); 317 continue; 318 } 319 320 fence_wait(fence, false); 321 fence_put(fence); 322 323 adev->uvd.filp[i] = NULL; 324 atomic_set(&adev->uvd.handles[i], 0); 325 } 326 } 327 } 328 329 static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *rbo) 330 { 331 int i; 332 for (i = 0; i < rbo->placement.num_placement; ++i) { 333 rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; 334 rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 335 } 336 } 337 338 /** 339 * amdgpu_uvd_cs_pass1 - first parsing round 340 * 341 * @ctx: UVD parser context 342 * 343 * Make sure UVD message and feedback buffers are in VRAM and 344 * nobody is violating an 256MB boundary. 345 */ 346 static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) 347 { 348 struct amdgpu_bo_va_mapping *mapping; 349 struct amdgpu_bo *bo; 350 uint32_t cmd, lo, hi; 351 uint64_t addr; 352 int r = 0; 353 354 lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); 355 hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); 356 addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); 357 358 mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); 359 if (mapping == NULL) { 360 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); 361 return -EINVAL; 362 } 363 364 if (!ctx->parser->adev->uvd.address_64_bit) { 365 /* check if it's a message or feedback command */ 366 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; 367 if (cmd == 0x0 || cmd == 0x3) { 368 /* yes, force it into VRAM */ 369 uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; 370 amdgpu_ttm_placement_from_domain(bo, domain); 371 } 372 amdgpu_uvd_force_into_uvd_segment(bo); 373 374 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); 375 } 376 377 return r; 378 } 379 380 /** 381 * amdgpu_uvd_cs_msg_decode - handle UVD decode message 382 * 383 * @msg: pointer to message structure 384 * @buf_sizes: returned buffer sizes 385 * 386 * Peek into the decode message and calculate the necessary buffer sizes. 387 */ 388 static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) 389 { 390 unsigned stream_type = msg[4]; 391 unsigned width = msg[6]; 392 unsigned height = msg[7]; 393 unsigned dpb_size = msg[9]; 394 unsigned pitch = msg[28]; 395 unsigned level = msg[57]; 396 397 unsigned width_in_mb = width / 16; 398 unsigned height_in_mb = ALIGN(height / 16, 2); 399 unsigned fs_in_mb = width_in_mb * height_in_mb; 400 401 unsigned image_size, tmp, min_dpb_size, num_dpb_buffer; 402 unsigned min_ctx_size = 0; 403 404 image_size = width * height; 405 image_size += image_size / 2; 406 image_size = ALIGN(image_size, 1024); 407 408 switch (stream_type) { 409 case 0: /* H264 */ 410 case 7: /* H264 Perf */ 411 switch(level) { 412 case 30: 413 num_dpb_buffer = 8100 / fs_in_mb; 414 break; 415 case 31: 416 num_dpb_buffer = 18000 / fs_in_mb; 417 break; 418 case 32: 419 num_dpb_buffer = 20480 / fs_in_mb; 420 break; 421 case 41: 422 num_dpb_buffer = 32768 / fs_in_mb; 423 break; 424 case 42: 425 num_dpb_buffer = 34816 / fs_in_mb; 426 break; 427 case 50: 428 num_dpb_buffer = 110400 / fs_in_mb; 429 break; 430 case 51: 431 num_dpb_buffer = 184320 / fs_in_mb; 432 break; 433 default: 434 num_dpb_buffer = 184320 / fs_in_mb; 435 break; 436 } 437 num_dpb_buffer++; 438 if (num_dpb_buffer > 17) 439 num_dpb_buffer = 17; 440 441 /* reference picture buffer */ 442 min_dpb_size = image_size * num_dpb_buffer; 443 444 /* macroblock context buffer */ 445 min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192; 446 447 /* IT surface buffer */ 448 min_dpb_size += width_in_mb * height_in_mb * 32; 449 break; 450 451 case 1: /* VC1 */ 452 453 /* reference picture buffer */ 454 min_dpb_size = image_size * 3; 455 456 /* CONTEXT_BUFFER */ 457 min_dpb_size += width_in_mb * height_in_mb * 128; 458 459 /* IT surface buffer */ 460 min_dpb_size += width_in_mb * 64; 461 462 /* DB surface buffer */ 463 min_dpb_size += width_in_mb * 128; 464 465 /* BP */ 466 tmp = max(width_in_mb, height_in_mb); 467 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 468 break; 469 470 case 3: /* MPEG2 */ 471 472 /* reference picture buffer */ 473 min_dpb_size = image_size * 3; 474 break; 475 476 case 4: /* MPEG4 */ 477 478 /* reference picture buffer */ 479 min_dpb_size = image_size * 3; 480 481 /* CM */ 482 min_dpb_size += width_in_mb * height_in_mb * 64; 483 484 /* IT surface buffer */ 485 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 486 break; 487 488 case 16: /* H265 */ 489 image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; 490 image_size = ALIGN(image_size, 256); 491 492 num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2; 493 min_dpb_size = image_size * num_dpb_buffer; 494 min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16) 495 * 16 * num_dpb_buffer + 52 * 1024; 496 break; 497 498 default: 499 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 500 return -EINVAL; 501 } 502 503 if (width > pitch) { 504 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 505 return -EINVAL; 506 } 507 508 if (dpb_size < min_dpb_size) { 509 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 510 dpb_size, min_dpb_size); 511 return -EINVAL; 512 } 513 514 buf_sizes[0x1] = dpb_size; 515 buf_sizes[0x2] = image_size; 516 buf_sizes[0x4] = min_ctx_size; 517 return 0; 518 } 519 520 /** 521 * amdgpu_uvd_cs_msg - handle UVD message 522 * 523 * @ctx: UVD parser context 524 * @bo: buffer object containing the message 525 * @offset: offset into the buffer object 526 * 527 * Peek into the UVD message and extract the session id. 528 * Make sure that we don't open up to many sessions. 529 */ 530 static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, 531 struct amdgpu_bo *bo, unsigned offset) 532 { 533 struct amdgpu_device *adev = ctx->parser->adev; 534 int32_t *msg, msg_type, handle; 535 void *ptr; 536 long r; 537 int i; 538 539 if (offset & 0x3F) { 540 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 541 return -EINVAL; 542 } 543 544 r = amdgpu_bo_kmap(bo, &ptr); 545 if (r) { 546 DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); 547 return r; 548 } 549 550 msg = ptr + offset; 551 552 msg_type = msg[1]; 553 handle = msg[2]; 554 555 if (handle == 0) { 556 DRM_ERROR("Invalid UVD handle!\n"); 557 return -EINVAL; 558 } 559 560 switch (msg_type) { 561 case 0: 562 /* it's a create msg, calc image size (width * height) */ 563 amdgpu_bo_kunmap(bo); 564 565 /* try to alloc a new handle */ 566 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 567 if (atomic_read(&adev->uvd.handles[i]) == handle) { 568 DRM_ERROR("Handle 0x%x already in use!\n", handle); 569 return -EINVAL; 570 } 571 572 if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { 573 adev->uvd.filp[i] = ctx->parser->filp; 574 return 0; 575 } 576 } 577 578 DRM_ERROR("No more free UVD handles!\n"); 579 return -EINVAL; 580 581 case 1: 582 /* it's a decode msg, calc buffer sizes */ 583 r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes); 584 amdgpu_bo_kunmap(bo); 585 if (r) 586 return r; 587 588 /* validate the handle */ 589 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 590 if (atomic_read(&adev->uvd.handles[i]) == handle) { 591 if (adev->uvd.filp[i] != ctx->parser->filp) { 592 DRM_ERROR("UVD handle collision detected!\n"); 593 return -EINVAL; 594 } 595 return 0; 596 } 597 } 598 599 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 600 return -ENOENT; 601 602 case 2: 603 /* it's a destroy msg, free the handle */ 604 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) 605 atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); 606 amdgpu_bo_kunmap(bo); 607 return 0; 608 609 default: 610 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 611 return -EINVAL; 612 } 613 BUG(); 614 return -EINVAL; 615 } 616 617 /** 618 * amdgpu_uvd_cs_pass2 - second parsing round 619 * 620 * @ctx: UVD parser context 621 * 622 * Patch buffer addresses, make sure buffer sizes are correct. 623 */ 624 static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) 625 { 626 struct amdgpu_bo_va_mapping *mapping; 627 struct amdgpu_bo *bo; 628 uint32_t cmd, lo, hi; 629 uint64_t start, end; 630 uint64_t addr; 631 int r; 632 633 lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); 634 hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); 635 addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); 636 637 mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); 638 if (mapping == NULL) 639 return -EINVAL; 640 641 start = amdgpu_bo_gpu_offset(bo); 642 643 end = (mapping->it.last + 1 - mapping->it.start); 644 end = end * AMDGPU_GPU_PAGE_SIZE + start; 645 646 addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; 647 start += addr; 648 649 amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0, 650 lower_32_bits(start)); 651 amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1, 652 upper_32_bits(start)); 653 654 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; 655 if (cmd < 0x4) { 656 if ((end - start) < ctx->buf_sizes[cmd]) { 657 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 658 (unsigned)(end - start), 659 ctx->buf_sizes[cmd]); 660 return -EINVAL; 661 } 662 663 } else if (cmd == 0x206) { 664 if ((end - start) < ctx->buf_sizes[4]) { 665 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 666 (unsigned)(end - start), 667 ctx->buf_sizes[4]); 668 return -EINVAL; 669 } 670 } else if ((cmd != 0x100) && (cmd != 0x204)) { 671 DRM_ERROR("invalid UVD command %X!\n", cmd); 672 return -EINVAL; 673 } 674 675 if (!ctx->parser->adev->uvd.address_64_bit) { 676 if ((start >> 28) != ((end - 1) >> 28)) { 677 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 678 start, end); 679 return -EINVAL; 680 } 681 682 if ((cmd == 0 || cmd == 0x3) && 683 (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) { 684 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 685 start, end); 686 return -EINVAL; 687 } 688 } 689 690 if (cmd == 0) { 691 ctx->has_msg_cmd = true; 692 r = amdgpu_uvd_cs_msg(ctx, bo, addr); 693 if (r) 694 return r; 695 } else if (!ctx->has_msg_cmd) { 696 DRM_ERROR("Message needed before other commands are send!\n"); 697 return -EINVAL; 698 } 699 700 return 0; 701 } 702 703 /** 704 * amdgpu_uvd_cs_reg - parse register writes 705 * 706 * @ctx: UVD parser context 707 * @cb: callback function 708 * 709 * Parse the register writes, call cb on each complete command. 710 */ 711 static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, 712 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 713 { 714 struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; 715 int i, r; 716 717 ctx->idx++; 718 for (i = 0; i <= ctx->count; ++i) { 719 unsigned reg = ctx->reg + i; 720 721 if (ctx->idx >= ib->length_dw) { 722 DRM_ERROR("Register command after end of CS!\n"); 723 return -EINVAL; 724 } 725 726 switch (reg) { 727 case mmUVD_GPCOM_VCPU_DATA0: 728 ctx->data0 = ctx->idx; 729 break; 730 case mmUVD_GPCOM_VCPU_DATA1: 731 ctx->data1 = ctx->idx; 732 break; 733 case mmUVD_GPCOM_VCPU_CMD: 734 r = cb(ctx); 735 if (r) 736 return r; 737 break; 738 case mmUVD_ENGINE_CNTL: 739 break; 740 default: 741 DRM_ERROR("Invalid reg 0x%X!\n", reg); 742 return -EINVAL; 743 } 744 ctx->idx++; 745 } 746 return 0; 747 } 748 749 /** 750 * amdgpu_uvd_cs_packets - parse UVD packets 751 * 752 * @ctx: UVD parser context 753 * @cb: callback function 754 * 755 * Parse the command stream packets. 756 */ 757 static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, 758 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 759 { 760 struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; 761 int r; 762 763 for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { 764 uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx); 765 unsigned type = CP_PACKET_GET_TYPE(cmd); 766 switch (type) { 767 case PACKET_TYPE0: 768 ctx->reg = CP_PACKET0_GET_REG(cmd); 769 ctx->count = CP_PACKET_GET_COUNT(cmd); 770 r = amdgpu_uvd_cs_reg(ctx, cb); 771 if (r) 772 return r; 773 break; 774 case PACKET_TYPE2: 775 ++ctx->idx; 776 break; 777 default: 778 DRM_ERROR("Unknown packet type %d !\n", type); 779 return -EINVAL; 780 } 781 } 782 return 0; 783 } 784 785 /** 786 * amdgpu_uvd_ring_parse_cs - UVD command submission parser 787 * 788 * @parser: Command submission parser context 789 * 790 * Parse the command stream, patch in addresses as necessary. 791 */ 792 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) 793 { 794 struct amdgpu_uvd_cs_ctx ctx = {}; 795 unsigned buf_sizes[] = { 796 [0x00000000] = 2048, 797 [0x00000001] = 0xFFFFFFFF, 798 [0x00000002] = 0xFFFFFFFF, 799 [0x00000003] = 2048, 800 [0x00000004] = 0xFFFFFFFF, 801 }; 802 struct amdgpu_ib *ib = &parser->job->ibs[ib_idx]; 803 int r; 804 805 if (ib->length_dw % 16) { 806 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 807 ib->length_dw); 808 return -EINVAL; 809 } 810 811 ctx.parser = parser; 812 ctx.buf_sizes = buf_sizes; 813 ctx.ib_idx = ib_idx; 814 815 /* first round, make sure the buffers are actually in the UVD segment */ 816 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1); 817 if (r) 818 return r; 819 820 /* second round, patch buffer addresses into the command stream */ 821 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2); 822 if (r) 823 return r; 824 825 if (!ctx.has_msg_cmd) { 826 DRM_ERROR("UVD-IBs need a msg command!\n"); 827 return -EINVAL; 828 } 829 830 amdgpu_uvd_note_usage(ctx.parser->adev); 831 832 return 0; 833 } 834 835 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, 836 bool direct, struct fence **fence) 837 { 838 struct ttm_validate_buffer tv; 839 struct ww_acquire_ctx ticket; 840 struct list_head head; 841 struct amdgpu_job *job; 842 struct amdgpu_ib *ib; 843 struct fence *f = NULL; 844 struct amdgpu_device *adev = ring->adev; 845 uint64_t addr; 846 int i, r; 847 848 memset(&tv, 0, sizeof(tv)); 849 tv.bo = &bo->tbo; 850 851 INIT_LIST_HEAD(&head); 852 list_add(&tv.head, &head); 853 854 r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); 855 if (r) 856 return r; 857 858 if (!bo->adev->uvd.address_64_bit) { 859 amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); 860 amdgpu_uvd_force_into_uvd_segment(bo); 861 } 862 863 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 864 if (r) 865 goto err; 866 867 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 868 if (r) 869 goto err; 870 871 ib = &job->ibs[0]; 872 addr = amdgpu_bo_gpu_offset(bo); 873 ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); 874 ib->ptr[1] = addr; 875 ib->ptr[2] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0); 876 ib->ptr[3] = addr >> 32; 877 ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0); 878 ib->ptr[5] = 0; 879 for (i = 6; i < 16; ++i) 880 ib->ptr[i] = PACKET2(0); 881 ib->length_dw = 16; 882 883 if (direct) { 884 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 885 job->fence = f; 886 if (r) 887 goto err_free; 888 889 amdgpu_job_free(job); 890 } else { 891 r = amdgpu_job_submit(job, ring, &adev->uvd.entity, 892 AMDGPU_FENCE_OWNER_UNDEFINED, &f); 893 if (r) 894 goto err_free; 895 } 896 897 ttm_eu_fence_buffer_objects(&ticket, &head, f); 898 899 if (fence) 900 *fence = fence_get(f); 901 amdgpu_bo_unref(&bo); 902 fence_put(f); 903 904 return 0; 905 906 err_free: 907 amdgpu_job_free(job); 908 909 err: 910 ttm_eu_backoff_reservation(&ticket, &head); 911 return r; 912 } 913 914 /* multiple fence commands without any stream commands in between can 915 crash the vcpu so just try to emmit a dummy create/destroy msg to 916 avoid this */ 917 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 918 struct fence **fence) 919 { 920 struct amdgpu_device *adev = ring->adev; 921 struct amdgpu_bo *bo; 922 uint32_t *msg; 923 int r, i; 924 925 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 926 AMDGPU_GEM_DOMAIN_VRAM, 927 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 928 NULL, NULL, &bo); 929 if (r) 930 return r; 931 932 r = amdgpu_bo_reserve(bo, false); 933 if (r) { 934 amdgpu_bo_unref(&bo); 935 return r; 936 } 937 938 r = amdgpu_bo_kmap(bo, (void **)&msg); 939 if (r) { 940 amdgpu_bo_unreserve(bo); 941 amdgpu_bo_unref(&bo); 942 return r; 943 } 944 945 /* stitch together an UVD create msg */ 946 msg[0] = cpu_to_le32(0x00000de4); 947 msg[1] = cpu_to_le32(0x00000000); 948 msg[2] = cpu_to_le32(handle); 949 msg[3] = cpu_to_le32(0x00000000); 950 msg[4] = cpu_to_le32(0x00000000); 951 msg[5] = cpu_to_le32(0x00000000); 952 msg[6] = cpu_to_le32(0x00000000); 953 msg[7] = cpu_to_le32(0x00000780); 954 msg[8] = cpu_to_le32(0x00000440); 955 msg[9] = cpu_to_le32(0x00000000); 956 msg[10] = cpu_to_le32(0x01b37000); 957 for (i = 11; i < 1024; ++i) 958 msg[i] = cpu_to_le32(0x0); 959 960 amdgpu_bo_kunmap(bo); 961 amdgpu_bo_unreserve(bo); 962 963 return amdgpu_uvd_send_msg(ring, bo, true, fence); 964 } 965 966 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 967 bool direct, struct fence **fence) 968 { 969 struct amdgpu_device *adev = ring->adev; 970 struct amdgpu_bo *bo; 971 uint32_t *msg; 972 int r, i; 973 974 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 975 AMDGPU_GEM_DOMAIN_VRAM, 976 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 977 NULL, NULL, &bo); 978 if (r) 979 return r; 980 981 r = amdgpu_bo_reserve(bo, false); 982 if (r) { 983 amdgpu_bo_unref(&bo); 984 return r; 985 } 986 987 r = amdgpu_bo_kmap(bo, (void **)&msg); 988 if (r) { 989 amdgpu_bo_unreserve(bo); 990 amdgpu_bo_unref(&bo); 991 return r; 992 } 993 994 /* stitch together an UVD destroy msg */ 995 msg[0] = cpu_to_le32(0x00000de4); 996 msg[1] = cpu_to_le32(0x00000002); 997 msg[2] = cpu_to_le32(handle); 998 msg[3] = cpu_to_le32(0x00000000); 999 for (i = 4; i < 1024; ++i) 1000 msg[i] = cpu_to_le32(0x0); 1001 1002 amdgpu_bo_kunmap(bo); 1003 amdgpu_bo_unreserve(bo); 1004 1005 return amdgpu_uvd_send_msg(ring, bo, direct, fence); 1006 } 1007 1008 static void amdgpu_uvd_idle_work_handler(struct work_struct *work) 1009 { 1010 struct amdgpu_device *adev = 1011 container_of(work, struct amdgpu_device, uvd.idle_work.work); 1012 unsigned i, fences, handles = 0; 1013 1014 fences = amdgpu_fence_count_emitted(&adev->uvd.ring); 1015 1016 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) 1017 if (atomic_read(&adev->uvd.handles[i])) 1018 ++handles; 1019 1020 if (fences == 0 && handles == 0) { 1021 if (adev->pm.dpm_enabled) { 1022 amdgpu_dpm_enable_uvd(adev, false); 1023 } else { 1024 amdgpu_asic_set_uvd_clocks(adev, 0, 0); 1025 } 1026 } else { 1027 schedule_delayed_work(&adev->uvd.idle_work, 1028 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 1029 } 1030 } 1031 1032 static void amdgpu_uvd_note_usage(struct amdgpu_device *adev) 1033 { 1034 bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); 1035 set_clocks &= schedule_delayed_work(&adev->uvd.idle_work, 1036 msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); 1037 1038 if (set_clocks) { 1039 if (adev->pm.dpm_enabled) { 1040 amdgpu_dpm_enable_uvd(adev, true); 1041 } else { 1042 amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); 1043 } 1044 } 1045 } 1046