1 /* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <deathsimple@vodafone.de> 29 */ 30 31 #include <linux/firmware.h> 32 #include <linux/module.h> 33 34 #include <drm/drm.h> 35 #include <drm/drm_drv.h> 36 37 #include "amdgpu.h" 38 #include "amdgpu_pm.h" 39 #include "amdgpu_uvd.h" 40 #include "amdgpu_cs.h" 41 #include "cikd.h" 42 #include "uvd/uvd_4_2_d.h" 43 44 #include "amdgpu_ras.h" 45 46 /* 1 second timeout */ 47 #define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000) 48 49 /* Firmware versions for VI */ 50 #define FW_1_65_10 ((1 << 24) | (65 << 16) | (10 << 8)) 51 #define FW_1_87_11 ((1 << 24) | (87 << 16) | (11 << 8)) 52 #define FW_1_87_12 ((1 << 24) | (87 << 16) | (12 << 8)) 53 #define FW_1_37_15 ((1 << 24) | (37 << 16) | (15 << 8)) 54 55 /* Polaris10/11 firmware version */ 56 #define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) 57 58 /* Firmware Names */ 59 #ifdef CONFIG_DRM_AMDGPU_SI 60 #define FIRMWARE_TAHITI "amdgpu/tahiti_uvd.bin" 61 #define FIRMWARE_VERDE "amdgpu/verde_uvd.bin" 62 #define FIRMWARE_PITCAIRN "amdgpu/pitcairn_uvd.bin" 63 #define FIRMWARE_OLAND "amdgpu/oland_uvd.bin" 64 #endif 65 #ifdef CONFIG_DRM_AMDGPU_CIK 66 #define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin" 67 #define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin" 68 #define FIRMWARE_KAVERI "amdgpu/kaveri_uvd.bin" 69 #define FIRMWARE_HAWAII "amdgpu/hawaii_uvd.bin" 70 #define FIRMWARE_MULLINS "amdgpu/mullins_uvd.bin" 71 #endif 72 #define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin" 73 #define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin" 74 #define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin" 75 #define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin" 76 #define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin" 77 #define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" 78 #define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" 79 #define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin" 80 81 #define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" 82 #define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin" 83 #define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin" 84 85 /* These are common relative offsets for all asics, from uvd_7_0_offset.h, */ 86 #define UVD_GPCOM_VCPU_CMD 0x03c3 87 #define UVD_GPCOM_VCPU_DATA0 0x03c4 88 #define UVD_GPCOM_VCPU_DATA1 0x03c5 89 #define UVD_NO_OP 0x03ff 90 #define UVD_BASE_SI 0x3800 91 92 /* 93 * amdgpu_uvd_cs_ctx - Command submission parser context 94 * 95 * Used for emulating virtual memory support on UVD 4.2. 96 */ 97 struct amdgpu_uvd_cs_ctx { 98 struct amdgpu_cs_parser *parser; 99 unsigned reg, count; 100 unsigned data0, data1; 101 unsigned idx; 102 struct amdgpu_ib *ib; 103 104 /* does the IB has a msg command */ 105 bool has_msg_cmd; 106 107 /* minimum buffer sizes */ 108 unsigned *buf_sizes; 109 }; 110 111 #ifdef CONFIG_DRM_AMDGPU_SI 112 MODULE_FIRMWARE(FIRMWARE_TAHITI); 113 MODULE_FIRMWARE(FIRMWARE_VERDE); 114 MODULE_FIRMWARE(FIRMWARE_PITCAIRN); 115 MODULE_FIRMWARE(FIRMWARE_OLAND); 116 #endif 117 #ifdef CONFIG_DRM_AMDGPU_CIK 118 MODULE_FIRMWARE(FIRMWARE_BONAIRE); 119 MODULE_FIRMWARE(FIRMWARE_KABINI); 120 MODULE_FIRMWARE(FIRMWARE_KAVERI); 121 MODULE_FIRMWARE(FIRMWARE_HAWAII); 122 MODULE_FIRMWARE(FIRMWARE_MULLINS); 123 #endif 124 MODULE_FIRMWARE(FIRMWARE_TONGA); 125 MODULE_FIRMWARE(FIRMWARE_CARRIZO); 126 MODULE_FIRMWARE(FIRMWARE_FIJI); 127 MODULE_FIRMWARE(FIRMWARE_STONEY); 128 MODULE_FIRMWARE(FIRMWARE_POLARIS10); 129 MODULE_FIRMWARE(FIRMWARE_POLARIS11); 130 MODULE_FIRMWARE(FIRMWARE_POLARIS12); 131 MODULE_FIRMWARE(FIRMWARE_VEGAM); 132 133 MODULE_FIRMWARE(FIRMWARE_VEGA10); 134 MODULE_FIRMWARE(FIRMWARE_VEGA12); 135 MODULE_FIRMWARE(FIRMWARE_VEGA20); 136 137 static void amdgpu_uvd_idle_work_handler(struct work_struct *work); 138 static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo); 139 140 static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev, 141 uint32_t size, 142 struct amdgpu_bo **bo_ptr) 143 { 144 struct ttm_operation_ctx ctx = { true, false }; 145 struct amdgpu_bo *bo = NULL; 146 void *addr; 147 int r; 148 149 r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, 150 AMDGPU_GEM_DOMAIN_GTT, 151 &bo, NULL, &addr); 152 if (r) 153 return r; 154 155 if (adev->uvd.address_64_bit) 156 goto succ; 157 158 amdgpu_bo_kunmap(bo); 159 amdgpu_bo_unpin(bo); 160 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); 161 amdgpu_uvd_force_into_uvd_segment(bo); 162 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 163 if (r) 164 goto err; 165 r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM); 166 if (r) 167 goto err_pin; 168 r = amdgpu_bo_kmap(bo, &addr); 169 if (r) 170 goto err_kmap; 171 succ: 172 amdgpu_bo_unreserve(bo); 173 *bo_ptr = bo; 174 return 0; 175 err_kmap: 176 amdgpu_bo_unpin(bo); 177 err_pin: 178 err: 179 amdgpu_bo_unreserve(bo); 180 amdgpu_bo_unref(&bo); 181 return r; 182 } 183 184 int amdgpu_uvd_sw_init(struct amdgpu_device *adev) 185 { 186 unsigned long bo_size; 187 const char *fw_name; 188 const struct common_firmware_header *hdr; 189 unsigned family_id; 190 int i, j, r; 191 192 INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); 193 194 switch (adev->asic_type) { 195 #ifdef CONFIG_DRM_AMDGPU_SI 196 case CHIP_TAHITI: 197 fw_name = FIRMWARE_TAHITI; 198 break; 199 case CHIP_VERDE: 200 fw_name = FIRMWARE_VERDE; 201 break; 202 case CHIP_PITCAIRN: 203 fw_name = FIRMWARE_PITCAIRN; 204 break; 205 case CHIP_OLAND: 206 fw_name = FIRMWARE_OLAND; 207 break; 208 #endif 209 #ifdef CONFIG_DRM_AMDGPU_CIK 210 case CHIP_BONAIRE: 211 fw_name = FIRMWARE_BONAIRE; 212 break; 213 case CHIP_KABINI: 214 fw_name = FIRMWARE_KABINI; 215 break; 216 case CHIP_KAVERI: 217 fw_name = FIRMWARE_KAVERI; 218 break; 219 case CHIP_HAWAII: 220 fw_name = FIRMWARE_HAWAII; 221 break; 222 case CHIP_MULLINS: 223 fw_name = FIRMWARE_MULLINS; 224 break; 225 #endif 226 case CHIP_TONGA: 227 fw_name = FIRMWARE_TONGA; 228 break; 229 case CHIP_FIJI: 230 fw_name = FIRMWARE_FIJI; 231 break; 232 case CHIP_CARRIZO: 233 fw_name = FIRMWARE_CARRIZO; 234 break; 235 case CHIP_STONEY: 236 fw_name = FIRMWARE_STONEY; 237 break; 238 case CHIP_POLARIS10: 239 fw_name = FIRMWARE_POLARIS10; 240 break; 241 case CHIP_POLARIS11: 242 fw_name = FIRMWARE_POLARIS11; 243 break; 244 case CHIP_POLARIS12: 245 fw_name = FIRMWARE_POLARIS12; 246 break; 247 case CHIP_VEGA10: 248 fw_name = FIRMWARE_VEGA10; 249 break; 250 case CHIP_VEGA12: 251 fw_name = FIRMWARE_VEGA12; 252 break; 253 case CHIP_VEGAM: 254 fw_name = FIRMWARE_VEGAM; 255 break; 256 case CHIP_VEGA20: 257 fw_name = FIRMWARE_VEGA20; 258 break; 259 default: 260 return -EINVAL; 261 } 262 263 r = request_firmware(&adev->uvd.fw, fw_name, adev->dev); 264 if (r) { 265 dev_err(adev->dev, "amdgpu_uvd: Can't load firmware \"%s\"\n", 266 fw_name); 267 return r; 268 } 269 270 r = amdgpu_ucode_validate(adev->uvd.fw); 271 if (r) { 272 dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", 273 fw_name); 274 release_firmware(adev->uvd.fw); 275 adev->uvd.fw = NULL; 276 return r; 277 } 278 279 /* Set the default UVD handles that the firmware can handle */ 280 adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES; 281 282 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 283 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 284 285 if (adev->asic_type < CHIP_VEGA20) { 286 unsigned version_major, version_minor; 287 288 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 289 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 290 DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n", 291 version_major, version_minor, family_id); 292 293 /* 294 * Limit the number of UVD handles depending on microcode major 295 * and minor versions. The firmware version which has 40 UVD 296 * instances support is 1.80. So all subsequent versions should 297 * also have the same support. 298 */ 299 if ((version_major > 0x01) || 300 ((version_major == 0x01) && (version_minor >= 0x50))) 301 adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; 302 303 adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | 304 (family_id << 8)); 305 306 if ((adev->asic_type == CHIP_POLARIS10 || 307 adev->asic_type == CHIP_POLARIS11) && 308 (adev->uvd.fw_version < FW_1_66_16)) 309 DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too old.\n", 310 version_major, version_minor); 311 } else { 312 unsigned int enc_major, enc_minor, dec_minor; 313 314 dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 315 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f; 316 enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3; 317 DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n", 318 enc_major, enc_minor, dec_minor, family_id); 319 320 adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; 321 322 adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version); 323 } 324 325 bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE 326 + AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles; 327 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 328 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 329 330 for (j = 0; j < adev->uvd.num_uvd_inst; j++) { 331 if (adev->uvd.harvest_config & (1 << j)) 332 continue; 333 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 334 AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo, 335 &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr); 336 if (r) { 337 dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); 338 return r; 339 } 340 } 341 342 for (i = 0; i < adev->uvd.max_handles; ++i) { 343 atomic_set(&adev->uvd.handles[i], 0); 344 adev->uvd.filp[i] = NULL; 345 } 346 347 /* from uvd v5.0 HW addressing capacity increased to 64 bits */ 348 if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) 349 adev->uvd.address_64_bit = true; 350 351 r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo); 352 if (r) 353 return r; 354 355 switch (adev->asic_type) { 356 case CHIP_TONGA: 357 adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10; 358 break; 359 case CHIP_CARRIZO: 360 adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11; 361 break; 362 case CHIP_FIJI: 363 adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12; 364 break; 365 case CHIP_STONEY: 366 adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15; 367 break; 368 default: 369 adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10; 370 } 371 372 return 0; 373 } 374 375 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) 376 { 377 void *addr = amdgpu_bo_kptr(adev->uvd.ib_bo); 378 int i, j; 379 380 drm_sched_entity_destroy(&adev->uvd.entity); 381 382 for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { 383 if (adev->uvd.harvest_config & (1 << j)) 384 continue; 385 kvfree(adev->uvd.inst[j].saved_bo); 386 387 amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo, 388 &adev->uvd.inst[j].gpu_addr, 389 (void **)&adev->uvd.inst[j].cpu_addr); 390 391 amdgpu_ring_fini(&adev->uvd.inst[j].ring); 392 393 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) 394 amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); 395 } 396 amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, &addr); 397 release_firmware(adev->uvd.fw); 398 399 return 0; 400 } 401 402 /** 403 * amdgpu_uvd_entity_init - init entity 404 * 405 * @adev: amdgpu_device pointer 406 * 407 */ 408 int amdgpu_uvd_entity_init(struct amdgpu_device *adev) 409 { 410 struct amdgpu_ring *ring; 411 struct drm_gpu_scheduler *sched; 412 int r; 413 414 ring = &adev->uvd.inst[0].ring; 415 sched = &ring->sched; 416 r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, 417 &sched, 1, NULL); 418 if (r) { 419 DRM_ERROR("Failed setting up UVD kernel entity.\n"); 420 return r; 421 } 422 423 return 0; 424 } 425 426 int amdgpu_uvd_suspend(struct amdgpu_device *adev) 427 { 428 unsigned size; 429 void *ptr; 430 int i, j, idx; 431 bool in_ras_intr = amdgpu_ras_intr_triggered(); 432 433 cancel_delayed_work_sync(&adev->uvd.idle_work); 434 435 /* only valid for physical mode */ 436 if (adev->asic_type < CHIP_POLARIS10) { 437 for (i = 0; i < adev->uvd.max_handles; ++i) 438 if (atomic_read(&adev->uvd.handles[i])) 439 break; 440 441 if (i == adev->uvd.max_handles) 442 return 0; 443 } 444 445 for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { 446 if (adev->uvd.harvest_config & (1 << j)) 447 continue; 448 if (adev->uvd.inst[j].vcpu_bo == NULL) 449 continue; 450 451 size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo); 452 ptr = adev->uvd.inst[j].cpu_addr; 453 454 adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL); 455 if (!adev->uvd.inst[j].saved_bo) 456 return -ENOMEM; 457 458 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 459 /* re-write 0 since err_event_athub will corrupt VCPU buffer */ 460 if (in_ras_intr) 461 memset(adev->uvd.inst[j].saved_bo, 0, size); 462 else 463 memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); 464 465 drm_dev_exit(idx); 466 } 467 } 468 469 if (in_ras_intr) 470 DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); 471 472 return 0; 473 } 474 475 int amdgpu_uvd_resume(struct amdgpu_device *adev) 476 { 477 unsigned size; 478 void *ptr; 479 int i, idx; 480 481 for (i = 0; i < adev->uvd.num_uvd_inst; i++) { 482 if (adev->uvd.harvest_config & (1 << i)) 483 continue; 484 if (adev->uvd.inst[i].vcpu_bo == NULL) 485 return -EINVAL; 486 487 size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo); 488 ptr = adev->uvd.inst[i].cpu_addr; 489 490 if (adev->uvd.inst[i].saved_bo != NULL) { 491 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 492 memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); 493 drm_dev_exit(idx); 494 } 495 kvfree(adev->uvd.inst[i].saved_bo); 496 adev->uvd.inst[i].saved_bo = NULL; 497 } else { 498 const struct common_firmware_header *hdr; 499 unsigned offset; 500 501 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 502 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 503 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 504 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 505 memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, 506 le32_to_cpu(hdr->ucode_size_bytes)); 507 drm_dev_exit(idx); 508 } 509 size -= le32_to_cpu(hdr->ucode_size_bytes); 510 ptr += le32_to_cpu(hdr->ucode_size_bytes); 511 } 512 memset_io(ptr, 0, size); 513 /* to restore uvd fence seq */ 514 amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring); 515 } 516 } 517 return 0; 518 } 519 520 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) 521 { 522 struct amdgpu_ring *ring = &adev->uvd.inst[0].ring; 523 int i, r; 524 525 for (i = 0; i < adev->uvd.max_handles; ++i) { 526 uint32_t handle = atomic_read(&adev->uvd.handles[i]); 527 528 if (handle != 0 && adev->uvd.filp[i] == filp) { 529 struct dma_fence *fence; 530 531 r = amdgpu_uvd_get_destroy_msg(ring, handle, false, 532 &fence); 533 if (r) { 534 DRM_ERROR("Error destroying UVD %d!\n", r); 535 continue; 536 } 537 538 dma_fence_wait(fence, false); 539 dma_fence_put(fence); 540 541 adev->uvd.filp[i] = NULL; 542 atomic_set(&adev->uvd.handles[i], 0); 543 } 544 } 545 } 546 547 static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo) 548 { 549 int i; 550 for (i = 0; i < abo->placement.num_placement; ++i) { 551 abo->placements[i].fpfn = 0 >> PAGE_SHIFT; 552 abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; 553 } 554 } 555 556 static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx) 557 { 558 uint32_t lo, hi; 559 uint64_t addr; 560 561 lo = amdgpu_ib_get_value(ctx->ib, ctx->data0); 562 hi = amdgpu_ib_get_value(ctx->ib, ctx->data1); 563 addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); 564 565 return addr; 566 } 567 568 /** 569 * amdgpu_uvd_cs_pass1 - first parsing round 570 * 571 * @ctx: UVD parser context 572 * 573 * Make sure UVD message and feedback buffers are in VRAM and 574 * nobody is violating an 256MB boundary. 575 */ 576 static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) 577 { 578 struct ttm_operation_ctx tctx = { false, false }; 579 struct amdgpu_bo_va_mapping *mapping; 580 struct amdgpu_bo *bo; 581 uint32_t cmd; 582 uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); 583 int r = 0; 584 585 r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); 586 if (r) { 587 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); 588 return r; 589 } 590 591 if (!ctx->parser->adev->uvd.address_64_bit) { 592 /* check if it's a message or feedback command */ 593 cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; 594 if (cmd == 0x0 || cmd == 0x3) { 595 /* yes, force it into VRAM */ 596 uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; 597 amdgpu_bo_placement_from_domain(bo, domain); 598 } 599 amdgpu_uvd_force_into_uvd_segment(bo); 600 601 r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx); 602 } 603 604 return r; 605 } 606 607 /** 608 * amdgpu_uvd_cs_msg_decode - handle UVD decode message 609 * 610 * @adev: amdgpu_device pointer 611 * @msg: pointer to message structure 612 * @buf_sizes: placeholder to put the different buffer lengths 613 * 614 * Peek into the decode message and calculate the necessary buffer sizes. 615 */ 616 static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, 617 unsigned buf_sizes[]) 618 { 619 unsigned stream_type = msg[4]; 620 unsigned width = msg[6]; 621 unsigned height = msg[7]; 622 unsigned dpb_size = msg[9]; 623 unsigned pitch = msg[28]; 624 unsigned level = msg[57]; 625 626 unsigned width_in_mb = width / 16; 627 unsigned height_in_mb = ALIGN(height / 16, 2); 628 unsigned fs_in_mb = width_in_mb * height_in_mb; 629 630 unsigned image_size, tmp, min_dpb_size, num_dpb_buffer; 631 unsigned min_ctx_size = ~0; 632 633 image_size = width * height; 634 image_size += image_size / 2; 635 image_size = ALIGN(image_size, 1024); 636 637 switch (stream_type) { 638 case 0: /* H264 */ 639 switch(level) { 640 case 30: 641 num_dpb_buffer = 8100 / fs_in_mb; 642 break; 643 case 31: 644 num_dpb_buffer = 18000 / fs_in_mb; 645 break; 646 case 32: 647 num_dpb_buffer = 20480 / fs_in_mb; 648 break; 649 case 41: 650 num_dpb_buffer = 32768 / fs_in_mb; 651 break; 652 case 42: 653 num_dpb_buffer = 34816 / fs_in_mb; 654 break; 655 case 50: 656 num_dpb_buffer = 110400 / fs_in_mb; 657 break; 658 case 51: 659 num_dpb_buffer = 184320 / fs_in_mb; 660 break; 661 default: 662 num_dpb_buffer = 184320 / fs_in_mb; 663 break; 664 } 665 num_dpb_buffer++; 666 if (num_dpb_buffer > 17) 667 num_dpb_buffer = 17; 668 669 /* reference picture buffer */ 670 min_dpb_size = image_size * num_dpb_buffer; 671 672 /* macroblock context buffer */ 673 min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192; 674 675 /* IT surface buffer */ 676 min_dpb_size += width_in_mb * height_in_mb * 32; 677 break; 678 679 case 1: /* VC1 */ 680 681 /* reference picture buffer */ 682 min_dpb_size = image_size * 3; 683 684 /* CONTEXT_BUFFER */ 685 min_dpb_size += width_in_mb * height_in_mb * 128; 686 687 /* IT surface buffer */ 688 min_dpb_size += width_in_mb * 64; 689 690 /* DB surface buffer */ 691 min_dpb_size += width_in_mb * 128; 692 693 /* BP */ 694 tmp = max(width_in_mb, height_in_mb); 695 min_dpb_size += ALIGN(tmp * 7 * 16, 64); 696 break; 697 698 case 3: /* MPEG2 */ 699 700 /* reference picture buffer */ 701 min_dpb_size = image_size * 3; 702 break; 703 704 case 4: /* MPEG4 */ 705 706 /* reference picture buffer */ 707 min_dpb_size = image_size * 3; 708 709 /* CM */ 710 min_dpb_size += width_in_mb * height_in_mb * 64; 711 712 /* IT surface buffer */ 713 min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); 714 break; 715 716 case 7: /* H264 Perf */ 717 switch(level) { 718 case 30: 719 num_dpb_buffer = 8100 / fs_in_mb; 720 break; 721 case 31: 722 num_dpb_buffer = 18000 / fs_in_mb; 723 break; 724 case 32: 725 num_dpb_buffer = 20480 / fs_in_mb; 726 break; 727 case 41: 728 num_dpb_buffer = 32768 / fs_in_mb; 729 break; 730 case 42: 731 num_dpb_buffer = 34816 / fs_in_mb; 732 break; 733 case 50: 734 num_dpb_buffer = 110400 / fs_in_mb; 735 break; 736 case 51: 737 num_dpb_buffer = 184320 / fs_in_mb; 738 break; 739 default: 740 num_dpb_buffer = 184320 / fs_in_mb; 741 break; 742 } 743 num_dpb_buffer++; 744 if (num_dpb_buffer > 17) 745 num_dpb_buffer = 17; 746 747 /* reference picture buffer */ 748 min_dpb_size = image_size * num_dpb_buffer; 749 750 if (!adev->uvd.use_ctx_buf){ 751 /* macroblock context buffer */ 752 min_dpb_size += 753 width_in_mb * height_in_mb * num_dpb_buffer * 192; 754 755 /* IT surface buffer */ 756 min_dpb_size += width_in_mb * height_in_mb * 32; 757 } else { 758 /* macroblock context buffer */ 759 min_ctx_size = 760 width_in_mb * height_in_mb * num_dpb_buffer * 192; 761 } 762 break; 763 764 case 8: /* MJPEG */ 765 min_dpb_size = 0; 766 break; 767 768 case 16: /* H265 */ 769 image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; 770 image_size = ALIGN(image_size, 256); 771 772 num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2; 773 min_dpb_size = image_size * num_dpb_buffer; 774 min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16) 775 * 16 * num_dpb_buffer + 52 * 1024; 776 break; 777 778 default: 779 DRM_ERROR("UVD codec not handled %d!\n", stream_type); 780 return -EINVAL; 781 } 782 783 if (width > pitch) { 784 DRM_ERROR("Invalid UVD decoding target pitch!\n"); 785 return -EINVAL; 786 } 787 788 if (dpb_size < min_dpb_size) { 789 DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n", 790 dpb_size, min_dpb_size); 791 return -EINVAL; 792 } 793 794 buf_sizes[0x1] = dpb_size; 795 buf_sizes[0x2] = image_size; 796 buf_sizes[0x4] = min_ctx_size; 797 /* store image width to adjust nb memory pstate */ 798 adev->uvd.decode_image_width = width; 799 return 0; 800 } 801 802 /** 803 * amdgpu_uvd_cs_msg - handle UVD message 804 * 805 * @ctx: UVD parser context 806 * @bo: buffer object containing the message 807 * @offset: offset into the buffer object 808 * 809 * Peek into the UVD message and extract the session id. 810 * Make sure that we don't open up to many sessions. 811 */ 812 static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, 813 struct amdgpu_bo *bo, unsigned offset) 814 { 815 struct amdgpu_device *adev = ctx->parser->adev; 816 int32_t *msg, msg_type, handle; 817 void *ptr; 818 long r; 819 int i; 820 821 if (offset & 0x3F) { 822 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 823 return -EINVAL; 824 } 825 826 r = amdgpu_bo_kmap(bo, &ptr); 827 if (r) { 828 DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r); 829 return r; 830 } 831 832 msg = ptr + offset; 833 834 msg_type = msg[1]; 835 handle = msg[2]; 836 837 if (handle == 0) { 838 amdgpu_bo_kunmap(bo); 839 DRM_ERROR("Invalid UVD handle!\n"); 840 return -EINVAL; 841 } 842 843 switch (msg_type) { 844 case 0: 845 /* it's a create msg, calc image size (width * height) */ 846 amdgpu_bo_kunmap(bo); 847 848 /* try to alloc a new handle */ 849 for (i = 0; i < adev->uvd.max_handles; ++i) { 850 if (atomic_read(&adev->uvd.handles[i]) == handle) { 851 DRM_ERROR(")Handle 0x%x already in use!\n", 852 handle); 853 return -EINVAL; 854 } 855 856 if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { 857 adev->uvd.filp[i] = ctx->parser->filp; 858 return 0; 859 } 860 } 861 862 DRM_ERROR("No more free UVD handles!\n"); 863 return -ENOSPC; 864 865 case 1: 866 /* it's a decode msg, calc buffer sizes */ 867 r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes); 868 amdgpu_bo_kunmap(bo); 869 if (r) 870 return r; 871 872 /* validate the handle */ 873 for (i = 0; i < adev->uvd.max_handles; ++i) { 874 if (atomic_read(&adev->uvd.handles[i]) == handle) { 875 if (adev->uvd.filp[i] != ctx->parser->filp) { 876 DRM_ERROR("UVD handle collision detected!\n"); 877 return -EINVAL; 878 } 879 return 0; 880 } 881 } 882 883 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 884 return -ENOENT; 885 886 case 2: 887 /* it's a destroy msg, free the handle */ 888 for (i = 0; i < adev->uvd.max_handles; ++i) 889 atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); 890 amdgpu_bo_kunmap(bo); 891 return 0; 892 893 default: 894 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 895 } 896 897 amdgpu_bo_kunmap(bo); 898 return -EINVAL; 899 } 900 901 /** 902 * amdgpu_uvd_cs_pass2 - second parsing round 903 * 904 * @ctx: UVD parser context 905 * 906 * Patch buffer addresses, make sure buffer sizes are correct. 907 */ 908 static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) 909 { 910 struct amdgpu_bo_va_mapping *mapping; 911 struct amdgpu_bo *bo; 912 uint32_t cmd; 913 uint64_t start, end; 914 uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); 915 int r; 916 917 r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); 918 if (r) { 919 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); 920 return r; 921 } 922 923 start = amdgpu_bo_gpu_offset(bo); 924 925 end = (mapping->last + 1 - mapping->start); 926 end = end * AMDGPU_GPU_PAGE_SIZE + start; 927 928 addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; 929 start += addr; 930 931 amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start)); 932 amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start)); 933 934 cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; 935 if (cmd < 0x4) { 936 if ((end - start) < ctx->buf_sizes[cmd]) { 937 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 938 (unsigned)(end - start), 939 ctx->buf_sizes[cmd]); 940 return -EINVAL; 941 } 942 943 } else if (cmd == 0x206) { 944 if ((end - start) < ctx->buf_sizes[4]) { 945 DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, 946 (unsigned)(end - start), 947 ctx->buf_sizes[4]); 948 return -EINVAL; 949 } 950 } else if ((cmd != 0x100) && (cmd != 0x204)) { 951 DRM_ERROR("invalid UVD command %X!\n", cmd); 952 return -EINVAL; 953 } 954 955 if (!ctx->parser->adev->uvd.address_64_bit) { 956 if ((start >> 28) != ((end - 1) >> 28)) { 957 DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", 958 start, end); 959 return -EINVAL; 960 } 961 962 if ((cmd == 0 || cmd == 0x3) && 963 (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) { 964 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 965 start, end); 966 return -EINVAL; 967 } 968 } 969 970 if (cmd == 0) { 971 ctx->has_msg_cmd = true; 972 r = amdgpu_uvd_cs_msg(ctx, bo, addr); 973 if (r) 974 return r; 975 } else if (!ctx->has_msg_cmd) { 976 DRM_ERROR("Message needed before other commands are send!\n"); 977 return -EINVAL; 978 } 979 980 return 0; 981 } 982 983 /** 984 * amdgpu_uvd_cs_reg - parse register writes 985 * 986 * @ctx: UVD parser context 987 * @cb: callback function 988 * 989 * Parse the register writes, call cb on each complete command. 990 */ 991 static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, 992 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 993 { 994 int i, r; 995 996 ctx->idx++; 997 for (i = 0; i <= ctx->count; ++i) { 998 unsigned reg = ctx->reg + i; 999 1000 if (ctx->idx >= ctx->ib->length_dw) { 1001 DRM_ERROR("Register command after end of CS!\n"); 1002 return -EINVAL; 1003 } 1004 1005 switch (reg) { 1006 case mmUVD_GPCOM_VCPU_DATA0: 1007 ctx->data0 = ctx->idx; 1008 break; 1009 case mmUVD_GPCOM_VCPU_DATA1: 1010 ctx->data1 = ctx->idx; 1011 break; 1012 case mmUVD_GPCOM_VCPU_CMD: 1013 r = cb(ctx); 1014 if (r) 1015 return r; 1016 break; 1017 case mmUVD_ENGINE_CNTL: 1018 case mmUVD_NO_OP: 1019 break; 1020 default: 1021 DRM_ERROR("Invalid reg 0x%X!\n", reg); 1022 return -EINVAL; 1023 } 1024 ctx->idx++; 1025 } 1026 return 0; 1027 } 1028 1029 /** 1030 * amdgpu_uvd_cs_packets - parse UVD packets 1031 * 1032 * @ctx: UVD parser context 1033 * @cb: callback function 1034 * 1035 * Parse the command stream packets. 1036 */ 1037 static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, 1038 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 1039 { 1040 int r; 1041 1042 for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) { 1043 uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx); 1044 unsigned type = CP_PACKET_GET_TYPE(cmd); 1045 switch (type) { 1046 case PACKET_TYPE0: 1047 ctx->reg = CP_PACKET0_GET_REG(cmd); 1048 ctx->count = CP_PACKET_GET_COUNT(cmd); 1049 r = amdgpu_uvd_cs_reg(ctx, cb); 1050 if (r) 1051 return r; 1052 break; 1053 case PACKET_TYPE2: 1054 ++ctx->idx; 1055 break; 1056 default: 1057 DRM_ERROR("Unknown packet type %d !\n", type); 1058 return -EINVAL; 1059 } 1060 } 1061 return 0; 1062 } 1063 1064 /** 1065 * amdgpu_uvd_ring_parse_cs - UVD command submission parser 1066 * 1067 * @parser: Command submission parser context 1068 * @job: the job to parse 1069 * @ib: the IB to patch 1070 * 1071 * Parse the command stream, patch in addresses as necessary. 1072 */ 1073 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, 1074 struct amdgpu_job *job, 1075 struct amdgpu_ib *ib) 1076 { 1077 struct amdgpu_uvd_cs_ctx ctx = {}; 1078 unsigned buf_sizes[] = { 1079 [0x00000000] = 2048, 1080 [0x00000001] = 0xFFFFFFFF, 1081 [0x00000002] = 0xFFFFFFFF, 1082 [0x00000003] = 2048, 1083 [0x00000004] = 0xFFFFFFFF, 1084 }; 1085 int r; 1086 1087 job->vm = NULL; 1088 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); 1089 1090 if (ib->length_dw % 16) { 1091 DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", 1092 ib->length_dw); 1093 return -EINVAL; 1094 } 1095 1096 ctx.parser = parser; 1097 ctx.buf_sizes = buf_sizes; 1098 ctx.ib = ib; 1099 1100 /* first round only required on chips without UVD 64 bit address support */ 1101 if (!parser->adev->uvd.address_64_bit) { 1102 /* first round, make sure the buffers are actually in the UVD segment */ 1103 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1); 1104 if (r) 1105 return r; 1106 } 1107 1108 /* second round, patch buffer addresses into the command stream */ 1109 r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2); 1110 if (r) 1111 return r; 1112 1113 if (!ctx.has_msg_cmd) { 1114 DRM_ERROR("UVD-IBs need a msg command!\n"); 1115 return -EINVAL; 1116 } 1117 1118 return 0; 1119 } 1120 1121 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, 1122 bool direct, struct dma_fence **fence) 1123 { 1124 struct amdgpu_device *adev = ring->adev; 1125 struct dma_fence *f = NULL; 1126 struct amdgpu_job *job; 1127 struct amdgpu_ib *ib; 1128 uint32_t data[4]; 1129 uint64_t addr; 1130 long r; 1131 int i; 1132 unsigned offset_idx = 0; 1133 unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; 1134 1135 r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity, 1136 AMDGPU_FENCE_OWNER_UNDEFINED, 1137 64, direct ? AMDGPU_IB_POOL_DIRECT : 1138 AMDGPU_IB_POOL_DELAYED, &job); 1139 if (r) 1140 return r; 1141 1142 if (adev->asic_type >= CHIP_VEGA10) { 1143 offset_idx = 1 + ring->me; 1144 offset[1] = adev->reg_offset[UVD_HWIP][0][1]; 1145 offset[2] = adev->reg_offset[UVD_HWIP][1][1]; 1146 } 1147 1148 data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0); 1149 data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0); 1150 data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0); 1151 data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0); 1152 1153 ib = &job->ibs[0]; 1154 addr = amdgpu_bo_gpu_offset(bo); 1155 ib->ptr[0] = data[0]; 1156 ib->ptr[1] = addr; 1157 ib->ptr[2] = data[1]; 1158 ib->ptr[3] = addr >> 32; 1159 ib->ptr[4] = data[2]; 1160 ib->ptr[5] = 0; 1161 for (i = 6; i < 16; i += 2) { 1162 ib->ptr[i] = data[3]; 1163 ib->ptr[i+1] = 0; 1164 } 1165 ib->length_dw = 16; 1166 1167 if (direct) { 1168 r = dma_resv_wait_timeout(bo->tbo.base.resv, 1169 DMA_RESV_USAGE_KERNEL, false, 1170 msecs_to_jiffies(10)); 1171 if (r == 0) 1172 r = -ETIMEDOUT; 1173 if (r < 0) 1174 goto err_free; 1175 1176 r = amdgpu_job_submit_direct(job, ring, &f); 1177 if (r) 1178 goto err_free; 1179 } else { 1180 r = drm_sched_job_add_resv_dependencies(&job->base, 1181 bo->tbo.base.resv, 1182 DMA_RESV_USAGE_KERNEL); 1183 if (r) 1184 goto err_free; 1185 1186 f = amdgpu_job_submit(job); 1187 } 1188 1189 amdgpu_bo_reserve(bo, true); 1190 amdgpu_bo_fence(bo, f, false); 1191 amdgpu_bo_unreserve(bo); 1192 1193 if (fence) 1194 *fence = dma_fence_get(f); 1195 dma_fence_put(f); 1196 1197 return 0; 1198 1199 err_free: 1200 amdgpu_job_free(job); 1201 return r; 1202 } 1203 1204 /* multiple fence commands without any stream commands in between can 1205 crash the vcpu so just try to emmit a dummy create/destroy msg to 1206 avoid this */ 1207 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 1208 struct dma_fence **fence) 1209 { 1210 struct amdgpu_device *adev = ring->adev; 1211 struct amdgpu_bo *bo = adev->uvd.ib_bo; 1212 uint32_t *msg; 1213 int i; 1214 1215 msg = amdgpu_bo_kptr(bo); 1216 /* stitch together an UVD create msg */ 1217 msg[0] = cpu_to_le32(0x00000de4); 1218 msg[1] = cpu_to_le32(0x00000000); 1219 msg[2] = cpu_to_le32(handle); 1220 msg[3] = cpu_to_le32(0x00000000); 1221 msg[4] = cpu_to_le32(0x00000000); 1222 msg[5] = cpu_to_le32(0x00000000); 1223 msg[6] = cpu_to_le32(0x00000000); 1224 msg[7] = cpu_to_le32(0x00000780); 1225 msg[8] = cpu_to_le32(0x00000440); 1226 msg[9] = cpu_to_le32(0x00000000); 1227 msg[10] = cpu_to_le32(0x01b37000); 1228 for (i = 11; i < 1024; ++i) 1229 msg[i] = cpu_to_le32(0x0); 1230 1231 return amdgpu_uvd_send_msg(ring, bo, true, fence); 1232 1233 } 1234 1235 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 1236 bool direct, struct dma_fence **fence) 1237 { 1238 struct amdgpu_device *adev = ring->adev; 1239 struct amdgpu_bo *bo = NULL; 1240 uint32_t *msg; 1241 int r, i; 1242 1243 if (direct) { 1244 bo = adev->uvd.ib_bo; 1245 } else { 1246 r = amdgpu_uvd_create_msg_bo_helper(adev, 4096, &bo); 1247 if (r) 1248 return r; 1249 } 1250 1251 msg = amdgpu_bo_kptr(bo); 1252 /* stitch together an UVD destroy msg */ 1253 msg[0] = cpu_to_le32(0x00000de4); 1254 msg[1] = cpu_to_le32(0x00000002); 1255 msg[2] = cpu_to_le32(handle); 1256 msg[3] = cpu_to_le32(0x00000000); 1257 for (i = 4; i < 1024; ++i) 1258 msg[i] = cpu_to_le32(0x0); 1259 1260 r = amdgpu_uvd_send_msg(ring, bo, direct, fence); 1261 1262 if (!direct) 1263 amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg); 1264 1265 return r; 1266 } 1267 1268 static void amdgpu_uvd_idle_work_handler(struct work_struct *work) 1269 { 1270 struct amdgpu_device *adev = 1271 container_of(work, struct amdgpu_device, uvd.idle_work.work); 1272 unsigned fences = 0, i, j; 1273 1274 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { 1275 if (adev->uvd.harvest_config & (1 << i)) 1276 continue; 1277 fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring); 1278 for (j = 0; j < adev->uvd.num_enc_rings; ++j) { 1279 fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]); 1280 } 1281 } 1282 1283 if (fences == 0) { 1284 if (adev->pm.dpm_enabled) { 1285 amdgpu_dpm_enable_uvd(adev, false); 1286 } else { 1287 amdgpu_asic_set_uvd_clocks(adev, 0, 0); 1288 /* shutdown the UVD block */ 1289 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, 1290 AMD_PG_STATE_GATE); 1291 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, 1292 AMD_CG_STATE_GATE); 1293 } 1294 } else { 1295 schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT); 1296 } 1297 } 1298 1299 void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) 1300 { 1301 struct amdgpu_device *adev = ring->adev; 1302 bool set_clocks; 1303 1304 if (amdgpu_sriov_vf(adev)) 1305 return; 1306 1307 set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); 1308 if (set_clocks) { 1309 if (adev->pm.dpm_enabled) { 1310 amdgpu_dpm_enable_uvd(adev, true); 1311 } else { 1312 amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); 1313 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, 1314 AMD_CG_STATE_UNGATE); 1315 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, 1316 AMD_PG_STATE_UNGATE); 1317 } 1318 } 1319 } 1320 1321 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) 1322 { 1323 if (!amdgpu_sriov_vf(ring->adev)) 1324 schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); 1325 } 1326 1327 /** 1328 * amdgpu_uvd_ring_test_ib - test ib execution 1329 * 1330 * @ring: amdgpu_ring pointer 1331 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 1332 * 1333 * Test if we can successfully execute an IB 1334 */ 1335 int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1336 { 1337 struct dma_fence *fence; 1338 long r; 1339 1340 r = amdgpu_uvd_get_create_msg(ring, 1, &fence); 1341 if (r) 1342 goto error; 1343 1344 r = dma_fence_wait_timeout(fence, false, timeout); 1345 dma_fence_put(fence); 1346 if (r == 0) 1347 r = -ETIMEDOUT; 1348 if (r < 0) 1349 goto error; 1350 1351 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); 1352 if (r) 1353 goto error; 1354 1355 r = dma_fence_wait_timeout(fence, false, timeout); 1356 if (r == 0) 1357 r = -ETIMEDOUT; 1358 else if (r > 0) 1359 r = 0; 1360 1361 dma_fence_put(fence); 1362 1363 error: 1364 return r; 1365 } 1366 1367 /** 1368 * amdgpu_uvd_used_handles - returns used UVD handles 1369 * 1370 * @adev: amdgpu_device pointer 1371 * 1372 * Returns the number of UVD handles in use 1373 */ 1374 uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev) 1375 { 1376 unsigned i; 1377 uint32_t used_handles = 0; 1378 1379 for (i = 0; i < adev->uvd.max_handles; ++i) { 1380 /* 1381 * Handles can be freed in any order, and not 1382 * necessarily linear. So we need to count 1383 * all non-zero handles. 1384 */ 1385 if (atomic_read(&adev->uvd.handles[i])) 1386 used_handles++; 1387 } 1388 1389 return used_handles; 1390 } 1391